[PATCH, ivopts] Handle type-converted IVs better

Zdenek Dvorak rakdver@atrey.karlin.mff.cuni.cz
Thu Sep 21 12:46:00 GMT 2006


Hello,

> Zdenek Dvorak writes:
> > I overestimated complexity of the problem; OK, you may then ignore part
> > 1), and just care about 2) -- something like the following patch.
> 
> I see.  Thanks.
> 
> I had to change two things:
> 
> 1. If the candidate is incremented after the use statement cbase needs
> to step one.  In this case common_wider_type fails to recognize the
> new form.  Now I set common_type earlier which works even better
> because:
> 
> 2. Instead of utype now common_type should be checked against the
> representational limitation.
> 
> I'm adding a test for 2. into the testsuite.
> 
> Tested on mipsisa64-elf, bootstrapped and tested on x86_64-linux.
> 
> Does this look OK to you?

it looks fine to me, ...

> OK to install?

... I cannot approve the patches, though.

Zdenek

> Adam
> 
> 
> 	* tree-ssa-loop-ivopts.c (aff_combination_convert,
> 	determine_common_wider_type): New functions.
> 	(get_computation_aff): Use them to simplify arithmetic between
> 	UBASE and CBASE if they are shortened from the same type.
> 
> 
> 	* gcc.dg/tree-ssa/ivopts-1.c: New test.
> 
> Index: tree-ssa-loop-ivopts.c
> ===================================================================
> *** tree-ssa-loop-ivopts.c	(revision 116991)
> --- tree-ssa-loop-ivopts.c	(working copy)
> *************** aff_combination_add (struct affine_tree_
> *** 2762,2767 ****
> --- 2762,2798 ----
>       aff_combination_add_elt (comb1, comb2->rest, 1);
>   }
>   
> + /* Convert COMB to TYPE.  */
> + 
> + static void
> + aff_combination_convert (tree type, struct affine_tree_combination *comb)
> + {
> +   unsigned prec = TYPE_PRECISION (type);
> +   unsigned i;
> + 
> +   /* If the precision of both types is the same, it suffices to change the type
> +      of the whole combination -- the elements are allowed to have another type
> +      equivalent wrto STRIP_NOPS.  */
> +   if (prec == TYPE_PRECISION (comb->type))
> +     {
> +       comb->type = type;
> +       return;
> +     }
> + 
> +   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
> +   comb->offset = comb->offset & comb->mask;
> + 
> +   /* The type of the elements can be different from comb->type only as
> +      much as what STRIP_NOPS would remove.  We can just directly cast
> +      to TYPE.  */
> +   for (i = 0; i < comb->n; i++)
> +     comb->elts[i] = fold_convert (type, comb->elts[i]);
> +   if (comb->rest)
> +     comb->rest = fold_convert (type, comb->rest);
> + 
> +   comb->type = type;
> + }
> + 
>   /* Splits EXPR into an affine combination of parts.  */
>   
>   static void
> *************** fold_affine_expr (tree expr)
> *** 2951,2956 ****
> --- 2982,3025 ----
>     return aff_combination_to_tree (&comb);
>   }
>   
> + /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
> +    same precision that is at least as wide as the precision of TYPE, stores
> +    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
> +    type of A and B.  */
> + 
> + static tree
> + determine_common_wider_type (tree *a, tree *b)
> + {
> +   tree wider_type = NULL;
> +   tree suba, subb;
> +   tree atype = TREE_TYPE (*a);
> + 
> +   if ((TREE_CODE (*a) == NOP_EXPR
> +        || TREE_CODE (*a) == CONVERT_EXPR))
> +     {
> +       suba = TREE_OPERAND (*a, 0);
> +       wider_type = TREE_TYPE (suba);
> +       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
> + 	return atype;
> +     }
> +   else
> +     return atype;
> + 
> +   if ((TREE_CODE (*b) == NOP_EXPR
> +        || TREE_CODE (*b) == CONVERT_EXPR))
> +     {
> +       subb = TREE_OPERAND (*b, 0);
> +       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
> + 	return atype;
> +     }
> +   else
> +     return atype;
> + 
> +   *a = suba;
> +   *b = subb;
> +   return wider_type;
> + }
> + 
>   /* Determines the expression by that USE is expressed from induction variable
>      CAND at statement AT in LOOP.  The expression is stored in a decomposed
>      form into AFF.  Returns false if USE cannot be expressed using CAND.  */
> *************** get_computation_aff (struct loop *loop,
> *** 2965,2970 ****
> --- 3034,3040 ----
>     tree cbase = cand->iv->base;
>     tree cstep = cand->iv->step;
>     tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
> +   tree common_type;
>     tree uutype;
>     tree expr, delta;
>     tree ratio;
> *************** get_computation_aff (struct loop *loop,
> *** 3040,3048 ****
>   	ratioi = 0;
>       }
>   
>     /* We may need to shift the value if we are after the increment.  */
>     if (stmt_after_increment (loop, cand, at))
> !     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
>   
>     /* use = ubase - ratio * cbase + ratio * var.
>   
> --- 3110,3129 ----
>   	ratioi = 0;
>       }
>   
> +   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
> +      type, we achieve better folding by computing their difference in this
> +      wider type, and cast the result to UUTYPE.  We do not need to worry about
> +      overflows, as all the arithmetics will in the end be performed in UUTYPE
> +      anyway.  */
> +   common_type = determine_common_wider_type (&ubase, &cbase);
> + 
>     /* We may need to shift the value if we are after the increment.  */
>     if (stmt_after_increment (loop, cand, at))
> !     {
> !       if (uutype != common_type)
> ! 	cstep = fold_convert (common_type, cstep);
> !       cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
> !     }
>   
>     /* use = ubase - ratio * cbase + ratio * var.
>   
> *************** get_computation_aff (struct loop *loop,
> *** 3052,3058 ****
>        happen, fold is able to apply the distributive law to obtain this form
>        anyway.  */
>   
> !   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
>       {
>         /* Let's compute in trees and just return the result in AFF.  This case
>   	 should not be very common, and fold itself is not that bad either,
> --- 3133,3139 ----
>        happen, fold is able to apply the distributive law to obtain this form
>        anyway.  */
>   
> !   if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
>       {
>         /* Let's compute in trees and just return the result in AFF.  This case
>   	 should not be very common, and fold itself is not that bad either,
> *************** get_computation_aff (struct loop *loop,
> *** 3060,3077 ****
>   	 is not that urgent.  */
>         if (ratioi == 1)
>   	{
> ! 	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
>   	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
>   	}
>         else if (ratioi == -1)
>   	{
> ! 	  delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
>   	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
>   	}
>         else
>   	{
> ! 	  delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
> ! 	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
>   	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
>   	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
>   	}
> --- 3141,3164 ----
>   	 is not that urgent.  */
>         if (ratioi == 1)
>   	{
> ! 	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
> ! 	  if (uutype != common_type)
> ! 	    delta = fold_convert (uutype, delta);
>   	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
>   	}
>         else if (ratioi == -1)
>   	{
> ! 	  delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
> ! 	  if (uutype != common_type)
> ! 	    delta = fold_convert (uutype, delta);
>   	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
>   	}
>         else
>   	{
> ! 	  delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
> ! 	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
> ! 	  if (uutype != common_type)
> ! 	    delta = fold_convert (uutype, delta);
>   	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
>   	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
>   	}
> *************** get_computation_aff (struct loop *loop,
> *** 3088,3099 ****
>        possible to compute ratioi.  */
>     gcc_assert (ratioi);
>   
> !   tree_to_aff_combination (ubase, uutype, aff);
> !   tree_to_aff_combination (cbase, uutype, &cbase_aff);
>     tree_to_aff_combination (expr, uutype, &expr_aff);
>     aff_combination_scale (&cbase_aff, -ratioi);
>     aff_combination_scale (&expr_aff, ratioi);
>     aff_combination_add (aff, &cbase_aff);
>     aff_combination_add (aff, &expr_aff);
>   
>     return true;
> --- 3175,3188 ----
>        possible to compute ratioi.  */
>     gcc_assert (ratioi);
>   
> !   tree_to_aff_combination (ubase, common_type, aff);
> !   tree_to_aff_combination (cbase, common_type, &cbase_aff);
>     tree_to_aff_combination (expr, uutype, &expr_aff);
>     aff_combination_scale (&cbase_aff, -ratioi);
>     aff_combination_scale (&expr_aff, ratioi);
>     aff_combination_add (aff, &cbase_aff);
> +   if (common_type != uutype)
> +     aff_combination_convert (uutype, aff);
>     aff_combination_add (aff, &expr_aff);
>   
>     return true;
> Index: testsuite/gcc.dg/tree-ssa/ivopts-1.c
> ===================================================================
> *** testsuite/gcc.dg/tree-ssa/ivopts-1.c	(revision 0)
> --- testsuite/gcc.dg/tree-ssa/ivopts-1.c	(revision 0)
> ***************
> *** 0 ****
> --- 1,27 ----
> + /* { dg-do compile } */
> + /* { dg-options "-O2" } */
> + 
> + /* Not all platforms support TImode integers.  */
> + #if defined(__LP64__) && !defined(__hppa__)
> + typedef int TItype __attribute__ ((mode (TI)));
> + #else
> + typedef long TItype;
> + #endif
> + 
> + TItype last_data_offset ;
> + int store;
> + char *data;
> + 
> + f ()
> + {
> + 
> +   TItype data_offset = last_data_offset;
> +   char *p;
> + 
> +   for (p = data; *p; p++)
> +     {
> +       data_offset++;
> +       g (data_offset);
> +       store = data_offset + 1;
> +     }
> + }



More information about the Gcc-patches mailing list