This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: wide-int, tree


On Sat, Nov 23, 2013 at 8:22 PM, Mike Stump <mikestump@comcast.net> wrote:
> Richi has asked the we break the wide-int patch so that the individual port and front end maintainers can review their parts without have to go through the entire patch.    This patch covers the tree code.
>
> Ok?

diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
index 86f90d8..961b9a6 100644
--- a/gcc/tree-affine.h
+++ b/gcc/tree-affine.h
@@ -30,7 +32,7 @@ struct aff_comb_elt
   tree val;

   /* Its coefficient in the combination.  */
-  double_int coef;
+  widest_int coef;
 };

 typedef struct affine_tree_combination
@@ -39,7 +41,7 @@ typedef struct affine_tree_combination
   tree type;

   /* Constant offset.  */
-  double_int offset;
+  widest_int offset;

   /* Number of elements of the combination.  */
   unsigned n;

increases size of *aff_tree from 232 to 808 bytes.  Most of this is
waste, killing cache locality.  Now you are lucky, aff_tree mostly lives
on the stack (though I had patches to make LIM cache them as
creating them is expensive...).

Not objecting to the change at this very moment, but we desparately
need some external storage solution for these kind of uses.
Oh, and shrink that widest_int ... (x86_64 and XImode ...)

diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c
index afc1a6a..91a14e4 100644
--- a/gcc/tree-chrec.c
+++ b/gcc/tree-chrec.c
@@ -479,7 +479,6 @@ chrec_fold_multiply (tree type,
 static tree
 tree_fold_binomial (tree type, tree n, unsigned int k)
 {
-  double_int num, denom, idx, di_res;
   bool overflow;
   unsigned int i;
   tree res;
@@ -491,20 +490,20 @@ tree_fold_binomial (tree type, tree n, unsigned int k)
     return fold_convert (type, n);

   /* Numerator = n.  */
-  num = TREE_INT_CST (n);
+  wide_int num = n;

   /* Check that k <= n.  */
-  if (num.ult (double_int::from_uhwi (k)))
+  if (wi::ltu_p (num, k))
     return NULL_TREE;

compare_tree_int (n, k) < 0 may be cheaper here

diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 0d3c66c..f257d52 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -217,6 +217,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 #include "tree-affine.h"
 #include "tree-inline.h"
+#include "wide-int-print.h"

 /* The maximum number of iterations between the considered memory
    references.  */
@@ -244,7 +245,7 @@ typedef struct dref_d
   unsigned distance;

   /* Number of iterations offset from the first reference in the component.  */
-  double_int offset;
+  widest_int offset;

another one.

diff --git a/gcc/tree-streamer-in.c b/gcc/tree-streamer-in.c
index 560d4f8..a70c767 100644
--- a/gcc/tree-streamer-in.c
+++ b/gcc/tree-streamer-in.c
@@ -147,8 +147,9 @@ unpack_ts_base_value_fields (struct bitpack_d *bp,
tree expr)
 static void
 unpack_ts_int_cst_value_fields (struct bitpack_d *bp, tree expr)
 {
-  TREE_INT_CST_LOW (expr) = bp_unpack_var_len_unsigned (bp);
-  TREE_INT_CST_HIGH (expr) = bp_unpack_var_len_int (bp);
+  int i;
+  for (i = 0; i < TREE_INT_CST_EXT_NUNITS (expr); i++)
+    TREE_INT_CST_ELT (expr, i) = bp_unpack_var_len_int (bp);
 }


that's less than optimal - only the very last used word should use
var_len_int, the others should use unsigned.

@@ -958,6 +961,12 @@ streamer_write_tree_header (struct output_block
*ob, tree expr)
     streamer_write_uhwi (ob, BINFO_N_BASE_BINFOS (expr));
   else if (TREE_CODE (expr) == CALL_EXPR)
     streamer_write_uhwi (ob, call_expr_nargs (expr));
+  else if (CODE_CONTAINS_STRUCT (code, TS_INT_CST))
+    {
+      gcc_checking_assert (TREE_INT_CST_NUNITS (expr));
+      streamer_write_uhwi (ob, TREE_INT_CST_NUNITS (expr));
+      streamer_write_uhwi (ob, TREE_INT_CST_EXT_NUNITS (expr));
+    }

isn't ext_nunits always either nunits or nunits + 1?  So it should be
sufficient to write a single bit in the tree_base bitpack and only
write the nunits that are required for the actual allocation in
write_tree_header, not both.

@@ -967,9 +976,16 @@ streamer_write_tree_header (struct output_block
*ob, tree expr)
 void
 streamer_write_integer_cst (struct output_block *ob, tree cst, bool ref_p)
 {
+  int i;
+  int len = TREE_INT_CST_NUNITS (cst);
   gcc_assert (!TREE_OVERFLOW (cst));
   streamer_write_record_start (ob, LTO_integer_cst);
   stream_write_tree (ob, TREE_TYPE (cst), ref_p);
-  streamer_write_uhwi (ob, TREE_INT_CST_LOW (cst));
-  streamer_write_hwi (ob, TREE_INT_CST_HIGH (cst));
+  /* We're effectively streaming a non-sign-extended wide_int here,
+     so there's no need to stream TREE_INT_CST_EXT_NUNITS or any
+     array members beyond LEN.  We'll recreate the tree from the
+     wide_int and the type.  */
+  streamer_write_uhwi (ob, len);
+  for (i = 0; i < len; i++)
+    streamer_write_hwi (ob, TREE_INT_CST_ELT (cst, i));
 }

and that's duplicate writing of TREE_INT_CST_NUNITS ('len').

diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 494d48e..fc0b574 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -445,7 +445,13 @@ emit_case_bit_tests (gimple swtch, tree index_expr,
         if (const & csui) goto target  */
   for (k = 0; k < count; k++)
     {
-      tmp = build_int_cst_wide (word_type_node, test[k].lo, test[k].hi);
+      HOST_WIDE_INT a[2];
+
+      a[0] = test[k].lo;
+      a[1] = test[k].hi;
+      tmp = wide_int_to_tree (word_type_node,
+                             wide_int::from_array (a, 2,
+                                                   TYPE_PRECISION
(word_type_node)));
       tmp = fold_build2 (BIT_AND_EXPR, word_type_node, csui, tmp);
       tmp = force_gimple_operand_gsi (&gsi, tmp,
                                      /*simple=*/true, NULL_TREE,

and that's truly odd code - it seems to expect that two HWIs fit
word_type_node.  Completely wrong on i?86 for example.

Please add a /* ??? */ comment here at least.

@@ -885,7 +891,7 @@ build_constructors (gimple swtch, struct
switch_conv_info *info)
              info->constructors[k]->quick_push (elt);
            }

-         pos = int_const_binop (PLUS_EXPR, pos, integer_one_node);
+         pos = int_const_binop (PLUS_EXPR, pos, build_int_cst
(TREE_TYPE (pos), 1));
        }
       gcc_assert (tree_int_cst_equal (pos, CASE_LOW (cs)));

reminds me that we want a HWI overload for int_const_binop ;)  Not
sure if wide_int_to_tree (TREE_TYPE (pos), wi::add (pos, 1)) is better
than that though.

 diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 4a24c66..f3e0ffe 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -1141,15 +1142,7 @@ operand_less_p (tree val, tree val2)
 {
   /* LT is folded faster than GE and others.  Inline the common case.  */
   if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
-    {
-      if (TYPE_UNSIGNED (TREE_TYPE (val)))
-       return INT_CST_LT_UNSIGNED (val, val2);
-      else
-       {
-         if (INT_CST_LT (val, val2))
-           return 1;
-       }
-    }
+    return INT_CST_LT (val, val2);
   else
     {
       tree tcmp;

Note that INT_CST_LT and INT_CST_LT_UNSIGNED were supposed to
be very fast.  Now you made them

#define INT_CST_LT(A, B) (wi::lts_p (wi::to_widest (A), wi::to_widest (B)))

which a) doesn't look the same to me and b) hides complexity.

So why not write

   if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
     if (TYPE_UNSIGNED (...)
     return wi::ltu_p (val, va2);
     else
      return wi::lts_p (val, val2);

?

@@ -6966,12 +6975,7 @@ tree_int_cst_lt (const_tree t1, const_tree t2)
 int
 tree_int_cst_compare (const_tree t1, const_tree t2)
 {
-  if (tree_int_cst_lt (t1, t2))
-    return -1;
-  else if (tree_int_cst_lt (t2, t1))
-    return 1;
-  else
-    return 0;
+  return wi::cmps (wi::to_widest (t1), wi::to_widest (t2));
 }

 /* Return true if T is an INTEGER_CST whose numerical value (extended

How does this work for comparing UHWI_MAX to 0 for example?  Looking
at

template <typename T1, typename T2>
inline int
wi::cmps (const T1 &x, const T2 &y)
{
  unsigned int precision = get_binary_precision (x, y);
  WIDE_INT_REF_FOR (T1) xi (x, precision);
  WIDE_INT_REF_FOR (T2) yi (y, precision);
  if (precision <= HOST_BITS_PER_WIDE_INT)
    {
      HOST_WIDE_INT xl = xi.to_shwi ();
      HOST_WIDE_INT yl = yi.to_shwi ();
      if (xl < yl)
        return -1;

this seems to happily return -1 instead of 1?  Similar issues elsewhere
where you change compares to unconditonally perform signed compares.
(it's at least non-obvious to me).

Ah, the to_widest () will make it XImode * 4 extended and thus
get_precision returns 2048 (or so) so we don't take the shortcut
(which means it's slow).

Shouldn't the shortcuts be taken on 'len' == 1 rather than
precision <= HWI?

Side-note: we have too many ways to compare trees / integers

@@ -8600,18 +8577,7 @@ retry:
   /* Check if c <= type_high_bound.  */
   if (type_high_bound && TREE_CODE (type_high_bound) == INTEGER_CST)
     {
-      dd = tree_to_double_int (type_high_bound);
-      if (unsc != TYPE_UNSIGNED (TREE_TYPE (type_high_bound)))
-       {
-         int c_neg = (!unsc && dc.is_negative ());
-         int t_neg = (unsc && dd.is_negative ());
-
-         if (t_neg && !c_neg)
-           return false;
-         if ((t_neg || !c_neg) && dc.ugt (dd))
-           return false;
-       }
-      else if (dc.cmp (dd, unsc) > 0)
+      if (INT_CST_LT (type_high_bound, c))
        return false;

please eliminate INT_CST_LT in favor of tree_int_cst_lt (which just
calls INT_CST_LT).  And INT_CST_LE as well.


The rest of the patch looks ok to me.  I suggest to produce patches
for the wide-int branch addressing review comments.

Thanks,
Richard.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]