This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, ivopts] Handle type-converted IVs better


Zdenek Dvorak writes:
> I overestimated complexity of the problem; OK, you may then ignore part
> 1), and just care about 2) -- something like the following patch.

I see.  Thanks.

I had to change two things:

1. If the candidate is incremented after the use statement cbase needs
to step one.  In this case common_wider_type fails to recognize the
new form.  Now I set common_type earlier which works even better
because:

2. Instead of utype now common_type should be checked against the
representational limitation.

I'm adding a test for 2. into the testsuite.

Tested on mipsisa64-elf, bootstrapped and tested on x86_64-linux.

Does this look OK to you?

OK to install?

Adam


	* tree-ssa-loop-ivopts.c (aff_combination_convert,
	determine_common_wider_type): New functions.
	(get_computation_aff): Use them to simplify arithmetic between
	UBASE and CBASE if they are shortened from the same type.


	* gcc.dg/tree-ssa/ivopts-1.c: New test.

Index: tree-ssa-loop-ivopts.c
===================================================================
*** tree-ssa-loop-ivopts.c	(revision 116991)
--- tree-ssa-loop-ivopts.c	(working copy)
*************** aff_combination_add (struct affine_tree_
*** 2762,2767 ****
--- 2762,2798 ----
      aff_combination_add_elt (comb1, comb2->rest, 1);
  }
  
+ /* Convert COMB to TYPE.  */
+ 
+ static void
+ aff_combination_convert (tree type, struct affine_tree_combination *comb)
+ {
+   unsigned prec = TYPE_PRECISION (type);
+   unsigned i;
+ 
+   /* If the precision of both types is the same, it suffices to change the type
+      of the whole combination -- the elements are allowed to have another type
+      equivalent wrto STRIP_NOPS.  */
+   if (prec == TYPE_PRECISION (comb->type))
+     {
+       comb->type = type;
+       return;
+     }
+ 
+   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
+   comb->offset = comb->offset & comb->mask;
+ 
+   /* The type of the elements can be different from comb->type only as
+      much as what STRIP_NOPS would remove.  We can just directly cast
+      to TYPE.  */
+   for (i = 0; i < comb->n; i++)
+     comb->elts[i] = fold_convert (type, comb->elts[i]);
+   if (comb->rest)
+     comb->rest = fold_convert (type, comb->rest);
+ 
+   comb->type = type;
+ }
+ 
  /* Splits EXPR into an affine combination of parts.  */
  
  static void
*************** fold_affine_expr (tree expr)
*** 2951,2956 ****
--- 2982,3025 ----
    return aff_combination_to_tree (&comb);
  }
  
+ /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
+    same precision that is at least as wide as the precision of TYPE, stores
+    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
+    type of A and B.  */
+ 
+ static tree
+ determine_common_wider_type (tree *a, tree *b)
+ {
+   tree wider_type = NULL;
+   tree suba, subb;
+   tree atype = TREE_TYPE (*a);
+ 
+   if ((TREE_CODE (*a) == NOP_EXPR
+        || TREE_CODE (*a) == CONVERT_EXPR))
+     {
+       suba = TREE_OPERAND (*a, 0);
+       wider_type = TREE_TYPE (suba);
+       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
+ 	return atype;
+     }
+   else
+     return atype;
+ 
+   if ((TREE_CODE (*b) == NOP_EXPR
+        || TREE_CODE (*b) == CONVERT_EXPR))
+     {
+       subb = TREE_OPERAND (*b, 0);
+       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
+ 	return atype;
+     }
+   else
+     return atype;
+ 
+   *a = suba;
+   *b = subb;
+   return wider_type;
+ }
+ 
  /* Determines the expression by that USE is expressed from induction variable
     CAND at statement AT in LOOP.  The expression is stored in a decomposed
     form into AFF.  Returns false if USE cannot be expressed using CAND.  */
*************** get_computation_aff (struct loop *loop,
*** 2965,2970 ****
--- 3034,3040 ----
    tree cbase = cand->iv->base;
    tree cstep = cand->iv->step;
    tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
+   tree common_type;
    tree uutype;
    tree expr, delta;
    tree ratio;
*************** get_computation_aff (struct loop *loop,
*** 3040,3048 ****
  	ratioi = 0;
      }
  
    /* We may need to shift the value if we are after the increment.  */
    if (stmt_after_increment (loop, cand, at))
!     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
  
    /* use = ubase - ratio * cbase + ratio * var.
  
--- 3110,3129 ----
  	ratioi = 0;
      }
  
+   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
+      type, we achieve better folding by computing their difference in this
+      wider type, and cast the result to UUTYPE.  We do not need to worry about
+      overflows, as all the arithmetics will in the end be performed in UUTYPE
+      anyway.  */
+   common_type = determine_common_wider_type (&ubase, &cbase);
+ 
    /* We may need to shift the value if we are after the increment.  */
    if (stmt_after_increment (loop, cand, at))
!     {
!       if (uutype != common_type)
! 	cstep = fold_convert (common_type, cstep);
!       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
!     }
  
    /* use = ubase - ratio * cbase + ratio * var.
  
*************** get_computation_aff (struct loop *loop,
*** 3052,3058 ****
       happen, fold is able to apply the distributive law to obtain this form
       anyway.  */
  
!   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
      {
        /* Let's compute in trees and just return the result in AFF.  This case
  	 should not be very common, and fold itself is not that bad either,
--- 3133,3139 ----
       happen, fold is able to apply the distributive law to obtain this form
       anyway.  */
  
!   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
      {
        /* Let's compute in trees and just return the result in AFF.  This case
  	 should not be very common, and fold itself is not that bad either,
*************** get_computation_aff (struct loop *loop,
*** 3060,3077 ****
  	 is not that urgent.  */
        if (ratioi == 1)
  	{
! 	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
  	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
  	}
        else if (ratioi == -1)
  	{
! 	  delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
  	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
  	}
        else
  	{
! 	  delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
! 	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
  	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
  	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
  	}
--- 3141,3164 ----
  	 is not that urgent.  */
        if (ratioi == 1)
  	{
! 	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
! 	  if (uutype != common_type)
! 	    delta = fold_convert (uutype, delta);
  	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
  	}
        else if (ratioi == -1)
  	{
! 	  delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
! 	  if (uutype != common_type)
! 	    delta = fold_convert (uutype, delta);
  	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
  	}
        else
  	{
! 	  delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
! 	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
! 	  if (uutype != common_type)
! 	    delta = fold_convert (uutype, delta);
  	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
  	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
  	}
*************** get_computation_aff (struct loop *loop,
*** 3088,3099 ****
       possible to compute ratioi.  */
    gcc_assert (ratioi);
  
!   tree_to_aff_combination (ubase, uutype, aff);
!   tree_to_aff_combination (cbase, uutype, &cbase_aff);
    tree_to_aff_combination (expr, uutype, &expr_aff);
    aff_combination_scale (&cbase_aff, -ratioi);
    aff_combination_scale (&expr_aff, ratioi);
    aff_combination_add (aff, &cbase_aff);
    aff_combination_add (aff, &expr_aff);
  
    return true;
--- 3175,3188 ----
       possible to compute ratioi.  */
    gcc_assert (ratioi);
  
!   tree_to_aff_combination (ubase, common_type, aff);
!   tree_to_aff_combination (cbase, common_type, &cbase_aff);
    tree_to_aff_combination (expr, uutype, &expr_aff);
    aff_combination_scale (&cbase_aff, -ratioi);
    aff_combination_scale (&expr_aff, ratioi);
    aff_combination_add (aff, &cbase_aff);
+   if (common_type != uutype)
+     aff_combination_convert (uutype, aff);
    aff_combination_add (aff, &expr_aff);
  
    return true;
Index: testsuite/gcc.dg/tree-ssa/ivopts-1.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/ivopts-1.c	(revision 0)
--- testsuite/gcc.dg/tree-ssa/ivopts-1.c	(revision 0)
***************
*** 0 ****
--- 1,27 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2" } */
+ 
+ /* Not all platforms support TImode integers.  */
+ #if defined(__LP64__) && !defined(__hppa__)
+ typedef int TItype __attribute__ ((mode (TI)));
+ #else
+ typedef long TItype;
+ #endif
+ 
+ TItype last_data_offset ;
+ int store;
+ char *data;
+ 
+ f ()
+ {
+ 
+   TItype data_offset = last_data_offset;
+   char *p;
+ 
+   for (p = data; *p; p++)
+     {
+       data_offset++;
+       g (data_offset);
+       store = data_offset + 1;
+     }
+ }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]