This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR83518, track partial definitions for VN


This implements tracking of partial definitions in value-numbering
so that we can handle

typedef int v2si __attribute__((vector_size(__SIZEOF_INT__ * 2)));
v2si foo (int *a)
{
  a[0] = 1;
  a[1] = 2;
  return *(v2si *)a;
}

and value-number the load to { 1, 2 }.

This happens for example for the testcase in the PR where there
is a vectorized tail that can be constant folded after unrolling
of a non-vectorized head.

We track partial definitions in two pieces - first a vector
of partial defs is collected that can be native_encoded in
order to support overlaps of the partial defs.  Second, a
splay-tree is used to track the range(s) those partial defs
cover so we can stop once we have gathered enough of them.

The patch only implements byte-aligned/sized constant partial
defs for now but it should be possible to extend it to non-constant
partial defs to for example value-number a load to a
vector constuctor - not too useful in itself but for followup
simplifications inside VN.  It should be also possible to
extend this to cover bitfields with a bit of twiddling for
the native_encode/interpret glue since that works on byte-granularity
only.

I do plan to look at the non-constant (vector/complex) defs, bot
at the bitfield ones.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Re-testing with graphite disabled (too noisy at the moment) and after
committing all the prerequesites.

Comments?

Thanks,
Richard.

2019-07-05  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83518
	* tree-ssa-sccvn.c: Include splay-tree.h.
	(struct pd_range, struct pd_data): New.
	(struct vn_walk_cb_data): Add data to track partial definitions.
	(vn_walk_cb_data::~vn_walk_cb_data): New.
	(vn_walk_cb_data::push_partial_def): New.
	(pd_tree_alloc, pd_tree_dealloc, pd_range_compare): New.
	(vn_reference_lookup_2): When partial defs are registered give up.
	(vn_reference_lookup_3): Track partial defs for memset and
	constructor zeroing and for defs from constants.

	* gcc.dg/tree-ssa/ssa-fre-73.c: New testcase.
	* gcc.dg/tree-ssa/ssa-fre-74.c: Likewise.
	* gcc.dg/tree-ssa/ssa-fre-75.c: Likewise.
	* gcc.dg/tree-ssa/ssa-fre-76.c: Likewise.
	* g++.dg/tree-ssa/pr83518.C: Likewise.

Index: gcc/tree-ssa-sccvn.c
===================================================================
--- gcc/tree-ssa-sccvn.c	(revision 273136)
+++ gcc/tree-ssa-sccvn.c	(working copy)
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "splay-tree.h"
 #include "backend.h"
 #include "rtl.h"
 #include "tree.h"
@@ -360,6 +361,8 @@ static void init_vn_nary_op_from_stmt (v
 static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int,
 					 enum tree_code, tree, tree *);
 static tree vn_lookup_simplify_result (gimple_match_op *);
+static vn_reference_t vn_reference_lookup_or_insert_for_pieces
+	  (tree, alias_set_type, tree, vec<vn_reference_op_s, va_heap>, tree);
 
 /* Return whether there is value numbering information for a given SSA name.  */
 
@@ -1646,20 +1649,245 @@ vn_reference_lookup_1 (vn_reference_t vr
   return NULL_TREE;
 }
 
+
+/* Partial definition tracking support.  */
+
+struct pd_range
+{
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT size;
+};
+
+struct pd_data
+{
+  tree rhs;
+  HOST_WIDE_INT offset;
+  HOST_WIDE_INT size;
+};
+
+/* Context for alias walking.  */
+
 struct vn_walk_cb_data
 {
   vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_,
-                   vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
+		   vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
     : vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_),
-      tbaa_p (tbaa_p_)
-    {}
+      tbaa_p (tbaa_p_), known_ranges (NULL)
+   {}
+  ~vn_walk_cb_data ();
+  void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT);
 
   vn_reference_t vr;
   tree *last_vuse_ptr;
   vn_lookup_kind vn_walk_kind;
   bool tbaa_p;
+
+  /* The VDEFs of partial defs we come along.  */
+  auto_vec<pd_data, 2> partial_defs;
+  /* The first defs range to avoid splay tree setup in most cases.  */
+  pd_range first_range;
+  tree first_vuse;
+  splay_tree known_ranges;
+  obstack ranges_obstack;
 };
 
+vn_walk_cb_data::~vn_walk_cb_data ()
+{
+  if (known_ranges)
+    {
+      splay_tree_delete (known_ranges);
+      obstack_free (&ranges_obstack, NULL);
+    }
+}
+
+/* pd_range splay-tree helpers.  */
+
+static int
+pd_range_compare (splay_tree_key offset1p, splay_tree_key offset2p)
+{
+  HOST_WIDE_INT offset1 = *(HOST_WIDE_INT *)offset1p;
+  HOST_WIDE_INT offset2 = *(HOST_WIDE_INT *)offset2p;
+  if (offset1 < offset2)
+    return -1;
+  else if (offset1 > offset2)
+    return 1;
+  return 0;
+}
+
+static void *
+pd_tree_alloc (int size, void *data_)
+{
+  vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
+  return obstack_alloc (&data->ranges_obstack, size);
+}
+
+static void
+pd_tree_dealloc (void *, void *)
+{
+}
+
+/* Push PD to the vector of partial definitions returning a
+   value when we are ready to combine things with VUSE and MAXSIZEI,
+   NULL when we want to continue looking for partial defs or -1
+   on failure.  */
+
+void *
+vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse,
+				   HOST_WIDE_INT maxsizei)
+{
+  if (partial_defs.is_empty ())
+    {
+      partial_defs.safe_push (pd);
+      first_range.offset = pd.offset;
+      first_range.size = pd.size;
+      first_vuse = vuse;
+      last_vuse_ptr = NULL;
+    }
+  else
+    {
+      if (!known_ranges)
+	{
+	  /* ???  Optimize the case where the second partial def
+	     completes things.  */
+	  gcc_obstack_init (&ranges_obstack);
+	  known_ranges
+	      = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
+					       pd_tree_alloc,
+					       pd_tree_dealloc, this);
+	  splay_tree_insert (known_ranges,
+			     (splay_tree_key)&first_range.offset,
+			     (splay_tree_value)&first_range);
+	}
+      if (known_ranges)
+	{
+	  pd_range newr = { pd.offset, pd.size };
+	  splay_tree_node n;
+	  pd_range *r;
+	  /* Lookup the predecessor of offset + 1 and see if
+	     we need to merge with it.  */
+	  HOST_WIDE_INT loffset = newr.offset + 1;
+	  if ((n = splay_tree_predecessor (known_ranges,
+					   (splay_tree_key)&loffset))
+	      && ((r = (pd_range *)n->value), true)
+	      && ranges_known_overlap_p (r->offset, r->size + 1,
+					 newr.offset, newr.size))
+	    {
+	      /* Ignore partial defs already covered.  */
+	      if (known_subrange_p (newr.offset, newr.size,
+				    r->offset, r->size))
+		return NULL;
+	      r->size = MAX (r->offset + r->size,
+			     newr.offset + newr.size) - r->offset;
+	    }
+	  else
+	    {
+	      /* newr.offset wasn't covered yet, insert the
+		 range.  */
+	      r = XOBNEW (&ranges_obstack, pd_range);
+	      *r = newr;
+	      splay_tree_insert (known_ranges,
+				 (splay_tree_key)&r->offset,
+				 (splay_tree_value)r);
+	    }
+	  /* Merge r which now contains newr and is a member
+	     of the splay tree with adjacent overlapping ranges.  */
+	  pd_range *rafter;
+	  while ((n = splay_tree_successor (known_ranges,
+					    (splay_tree_key)&r->offset))
+		 && ((rafter = (pd_range *)n->value), true)
+		 && ranges_known_overlap_p (r->offset, r->size + 1,
+					    rafter->offset, rafter->size))
+	    {
+	      r->size = MAX (r->offset + r->size,
+			     rafter->offset + rafter->size) - r->offset;
+	      splay_tree_remove (known_ranges,
+				 (splay_tree_key)&rafter->offset);
+	    }
+	  partial_defs.safe_push (pd);
+
+	  /* Now we have merged newr into the range tree.
+	     When we have covered [offseti, sizei] then the
+	     tree will contain exactly one node which has
+	     the desired properties and it will be 'r'.  */
+	  if (known_subrange_p (0, maxsizei / BITS_PER_UNIT,
+				r->offset, r->size))
+	    {
+	      /* Now simply native encode all partial defs
+		 in reverse order.  */
+	      unsigned ndefs = partial_defs.length ();
+	      /* We support up to 512-bit values (for V8DFmode).  */
+	      unsigned char buffer[64];
+	      int len;
+
+	      while (!partial_defs.is_empty ())
+		{
+		  pd_data pd = partial_defs.pop ();
+		  if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
+		    /* Empty CONSTRUCTOR.  */
+		    memset (buffer + MAX (0, pd.offset),
+			    0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size));
+		  else
+		    {
+		      len = native_encode_expr (pd.rhs,
+						buffer + MAX (0, pd.offset),
+						sizeof (buffer - MAX (0, pd.offset)),
+						MAX (0, -pd.offset));
+		      if (len <= 0
+			  || len < (pd.size - MAX (0, -pd.offset)))
+			{
+			  if (dump_file && (dump_flags & TDF_DETAILS))
+			    fprintf (dump_file, "Failed to encode %u "
+				     "partial definitions\n", ndefs);
+			  return (void *)-1;
+			}
+		    }
+		}
+
+	      tree type = vr->type;
+	      /* Make sure to interpret in a type that has a range
+		 covering the whole access size.  */
+	      if (INTEGRAL_TYPE_P (vr->type)
+		  && maxsizei != TYPE_PRECISION (vr->type))
+		type = build_nonstandard_integer_type (maxsizei,
+						       TYPE_UNSIGNED (type));
+	      tree val = native_interpret_expr (type, buffer,
+						maxsizei / BITS_PER_UNIT);
+	      /* If we chop off bits because the types precision doesn't
+		 match the memory access size this is ok when optimizing
+		 reads but not when called from the DSE code during
+		 elimination.  */
+	      if (val
+		  && type != vr->type)
+		{
+		  if (! int_fits_type_p (val, vr->type))
+		    val = NULL_TREE;
+		  else
+		    val = fold_convert (vr->type, val);
+		}
+
+	      if (val)
+		{
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    fprintf (dump_file, "Successfully combined %u "
+			     "partial definitions\n", ndefs);
+		  return vn_reference_lookup_or_insert_for_pieces
+		      (first_vuse,
+		       vr->set, vr->type, vr->operands, val);
+		}
+	      else
+		{
+		  if (dump_file && (dump_flags & TDF_DETAILS))
+		    fprintf (dump_file, "Failed to interpret %u "
+			     "encoded partial definitions\n", ndefs);
+		  return (void *)-1;
+		}
+	    }
+	}
+    }
+  /* Continue looking for partial defs.  */
+  return NULL;
+}
+
 /* Callback for walk_non_aliased_vuses.  Adjusts the vn_reference_t VR_
    with the current VUSE and performs the expression lookup.  */
 
@@ -1671,6 +1899,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB
   vn_reference_s **slot;
   hashval_t hash;
 
+  /* If we have partial definitions recorded we have to go through
+     vn_reference_lookup_3.  */
+  if (!data->partial_defs.is_empty ())
+    return NULL;
+
   if (data->last_vuse_ptr)
     *data->last_vuse_ptr = vuse;
 
@@ -2179,8 +2412,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree
       else
 	return (void *)-1;
       tree len = gimple_call_arg (def_stmt, 2);
-      if (known_subrange_p (offset, maxsize, offset2,
-			    wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
+      HOST_WIDE_INT leni, offset2i, offseti;
+      if (data->partial_defs.is_empty ()
+	  && known_subrange_p (offset, maxsize, offset2,
+			       wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
 	{
 	  tree val;
 	  if (integer_zerop (gimple_call_arg (def_stmt, 1)))
@@ -2209,6 +2444,19 @@ vn_reference_lookup_3 (ao_ref *ref, tree
 	  return vn_reference_lookup_or_insert_for_pieces
 	           (vuse, vr->set, vr->type, vr->operands, val);
 	}
+      /* For now handle clearing memory with partial defs.  */
+      else if (integer_zerop (gimple_call_arg (def_stmt, 1))
+	       && tree_to_poly_int64 (len).is_constant (&leni)
+	       && offset.is_constant (&offseti)
+	       && offset2.is_constant (&offset2i)
+	       && maxsize.is_constant (&maxsizei))
+	{
+	  pd_data pd;
+	  pd.rhs = build_constructor (NULL_TREE, NULL);
+	  pd.offset = offset2i - offseti;
+	  pd.size = leni;
+	  return data->push_partial_def (pd, vuse, maxsizei);
+	}
     }
 
   /* 2) Assignment from an empty CONSTRUCTOR.  */
@@ -2219,18 +2467,37 @@ vn_reference_lookup_3 (ao_ref *ref, tree
     {
       tree base2;
       poly_int64 offset2, size2, maxsize2;
+      HOST_WIDE_INT offset2i, size2i;
       bool reverse;
       base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
 				       &offset2, &size2, &maxsize2, &reverse);
       if (known_size_p (maxsize2)
 	  && known_eq (maxsize2, size2)
 	  && adjust_offsets_for_equal_base_address (base, &offset,
-						    base2, &offset2)
-	  && known_subrange_p (offset, maxsize, offset2, size2))
+						    base2, &offset2))
 	{
-	  tree val = build_zero_cst (vr->type);
-	  return vn_reference_lookup_or_insert_for_pieces
-	           (vuse, vr->set, vr->type, vr->operands, val);
+	  if (data->partial_defs.is_empty ()
+	      && known_subrange_p (offset, maxsize, offset2, size2))
+	    {
+	      tree val = build_zero_cst (vr->type);
+	      return vn_reference_lookup_or_insert_for_pieces
+		  (vuse, vr->set, vr->type, vr->operands, val);
+	    }
+	  else if (maxsize.is_constant (&maxsizei)
+		   && maxsizei % BITS_PER_UNIT == 0
+		   && offset.is_constant (&offseti)
+		   && offseti % BITS_PER_UNIT == 0
+		   && offset2.is_constant (&offset2i)
+		   && offset2i % BITS_PER_UNIT == 0
+		   && size2.is_constant (&size2i)
+		   && size2i % BITS_PER_UNIT == 0)
+	    {
+	      pd_data pd;
+	      pd.rhs = gimple_assign_rhs1 (def_stmt);
+	      pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+	      pd.size = size2i / BITS_PER_UNIT;
+	      return data->push_partial_def (pd, vuse, maxsizei);
+	    }
 	}
     }
 
@@ -2253,7 +2520,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
     {
       tree base2;
       poly_int64 offset2, size2, maxsize2;
-      HOST_WIDE_INT offset2i;
+      HOST_WIDE_INT offset2i, size2i;
       bool reverse;
       base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
 				       &offset2, &size2, &maxsize2, &reverse);
@@ -2266,45 +2533,60 @@ vn_reference_lookup_3 (ao_ref *ref, tree
 						    base2, &offset2)
 	  && offset.is_constant (&offseti)
 	  && offset2.is_constant (&offset2i)
-	  && known_subrange_p (offseti, maxsizei, offset2, size2))
+	  && size2.is_constant (&size2i))
 	{
-	  /* We support up to 512-bit values (for V8DFmode).  */
-	  unsigned char buffer[64];
-	  int len;
-
-	  tree rhs = gimple_assign_rhs1 (def_stmt);
-	  if (TREE_CODE (rhs) == SSA_NAME)
-	    rhs = SSA_VAL (rhs);
-	  len = native_encode_expr (rhs,
-				    buffer, sizeof (buffer),
-				    (offseti - offset2i) / BITS_PER_UNIT);
-	  if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
+	  if (data->partial_defs.is_empty ()
+	      && known_subrange_p (offseti, maxsizei, offset2, size2))
 	    {
-	      tree type = vr->type;
-	      /* Make sure to interpret in a type that has a range
-		 covering the whole access size.  */
-	      if (INTEGRAL_TYPE_P (vr->type)
-		  && maxsizei != TYPE_PRECISION (vr->type))
-		type = build_nonstandard_integer_type (maxsizei,
-						       TYPE_UNSIGNED (type));
-	      tree val = native_interpret_expr (type, buffer,
-						maxsizei / BITS_PER_UNIT);
-	      /* If we chop off bits because the types precision doesn't
-		 match the memory access size this is ok when optimizing
-		 reads but not when called from the DSE code during
-		 elimination.  */
-	      if (val
-		  && type != vr->type)
+	      /* We support up to 512-bit values (for V8DFmode).  */
+	      unsigned char buffer[64];
+	      int len;
+
+	      tree rhs = gimple_assign_rhs1 (def_stmt);
+	      if (TREE_CODE (rhs) == SSA_NAME)
+		rhs = SSA_VAL (rhs);
+	      len = native_encode_expr (rhs,
+					buffer, sizeof (buffer),
+					(offseti - offset2i) / BITS_PER_UNIT);
+	      if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
 		{
-		  if (! int_fits_type_p (val, vr->type))
-		    val = NULL_TREE;
-		  else
-		    val = fold_convert (vr->type, val);
-		}
+		  tree type = vr->type;
+		  /* Make sure to interpret in a type that has a range
+		     covering the whole access size.  */
+		  if (INTEGRAL_TYPE_P (vr->type)
+		      && maxsizei != TYPE_PRECISION (vr->type))
+		    type = build_nonstandard_integer_type (maxsizei,
+							   TYPE_UNSIGNED (type));
+		  tree val = native_interpret_expr (type, buffer,
+						    maxsizei / BITS_PER_UNIT);
+		  /* If we chop off bits because the types precision doesn't
+		     match the memory access size this is ok when optimizing
+		     reads but not when called from the DSE code during
+		     elimination.  */
+		  if (val
+		      && type != vr->type)
+		    {
+		      if (! int_fits_type_p (val, vr->type))
+			val = NULL_TREE;
+		      else
+			val = fold_convert (vr->type, val);
+		    }
 
-	      if (val)
-		return vn_reference_lookup_or_insert_for_pieces
-			 (vuse, vr->set, vr->type, vr->operands, val);
+		  if (val)
+		    return vn_reference_lookup_or_insert_for_pieces
+			(vuse, vr->set, vr->type, vr->operands, val);
+		}
+	    }
+	  else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
+	    {
+	      pd_data pd;
+	      tree rhs = gimple_assign_rhs1 (def_stmt);
+	      if (TREE_CODE (rhs) == SSA_NAME)
+		rhs = SSA_VAL (rhs);
+	      pd.rhs = rhs;
+	      pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+	      pd.size = size2i / BITS_PER_UNIT;
+	      return data->push_partial_def (pd, vuse, maxsizei);
 	    }
 	}
     }
@@ -2315,7 +2597,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
 	   && is_gimple_reg_type (vr->type)
 	   && !contains_storage_order_barrier_p (vr->operands)
 	   && gimple_assign_single_p (def_stmt)
-	   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
+	   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
+	   /* A subset of partial defs from non-constants can be handled
+	      by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or
+	      even a (series of) BIT_INSERT_EXPR hoping for simplifications
+	      downstream, not so much for actually doing the insertion.  */
+	   && data->partial_defs.is_empty ())
     {
       tree base2;
       poly_int64 offset2, size2, maxsize2;
@@ -2363,7 +2650,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
 	   && gimple_assign_single_p (def_stmt)
 	   && (DECL_P (gimple_assign_rhs1 (def_stmt))
 	       || TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF
-	       || handled_component_p (gimple_assign_rhs1 (def_stmt))))
+	       || handled_component_p (gimple_assign_rhs1 (def_stmt)))
+	   /* Handling this is more complicated, give up for now.  */
+	   && data->partial_defs.is_empty ())
     {
       tree base2;
       int i, j, k;
@@ -2497,7 +2786,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
 	       || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)
 	   && (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR
 	       || TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME)
-	   && poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size))
+	   && poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size)
+	   /* Handling this is more complicated, give up for now.  */
+	   && data->partial_defs.is_empty ())
     {
       tree lhs, rhs;
       ao_ref r;
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c	(working copy)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int v2si __attribute__((vector_size(__SIZEOF_INT__ * 2)));
+int foo (int *a)
+{
+  a[0] = 1;
+  a[1] = 2;
+  v2si x = *(v2si *)a;
+  *(v2si *)&a[2] = x;
+  return a[3];
+}
+
+/* { dg-final { scan-tree-dump "return 2;" "fre1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c	(working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
+int foo (int *a)
+{
+  a[2] = 2;
+  a[0] = 0;
+  a[1] = 1;
+  a[3] = 4;
+  v4si x = *(v4si *)a;
+  *(v4si *)&a[4] = x;
+  return a[4] + a[7];
+}
+
+/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-75.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-75.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-75.c	(working copy)
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O -fgimple -fdump-tree-fre1" } */
+
+typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
+#if __SIZEOF_INT__ == 4
+__GIMPLE (ssa) int foo (int *a)
+{
+  v4si _2;
+  int _3;
+  int _4;
+  int _5;
+  int _6;
+  int _7;
+  int _8;
+  int _9;
+
+__BB(2):
+  __MEM <unsigned char[3 * __SIZEOF_INT__]> ((char *)a_1(D) + 4) = _Literal (unsigned char[3 * __SIZEOF_INT__]) {};
+  __MEM <int> (a_1(D) + 8) = 2;
+  __MEM <int> (a_1(D)) = 1;
+  _2 = __MEM <v4si> (a_1(D));
+  _3 = __BIT_FIELD_REF <int> (_2, 32, 0);
+  _4 = __BIT_FIELD_REF <int> (_2, 32, 32);
+  _5 = __BIT_FIELD_REF <int> (_2, 32, 64);
+  _6 = __BIT_FIELD_REF <int> (_2, 32, 96);
+  _7 = _3 + _4;
+  _8 = _7 + _5;
+  _9 = _8 + _6;
+  return _9;
+}
+#endif
+
+/* { dg-final { scan-tree-dump "return 3;" "fre1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c	(working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-fre1" } */
+
+typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
+int foo (int *a)
+{
+  __builtin_memset (a, 0, 2 * __SIZEOF_INT__);
+  a[2] = 2;
+  a[0] = 1;
+  a[3] = 3;
+  v4si x = *(v4si *)a;
+  *(v4si *)&a[4] = x;
+  return a[4] + a[5] + a[7];
+}
+
+/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */
Index: gcc/testsuite/g++.dg/tree-ssa/pr83518.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/pr83518.C	(nonexistent)
+++ gcc/testsuite/g++.dg/tree-ssa/pr83518.C	(working copy)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+unsigned test()
+{
+  int arr[] = {5,4,3,2,1};
+  int sum = 0;
+
+  for(int i = 0;i < 5;++i)
+    {
+      for(int j = 0; j < 5; ++j)
+	{
+	  int t = arr[i];
+	  arr[i] = arr[j];
+	  arr[j] = t;
+	}
+    }
+
+  for(int i = 0; i < 5; ++i)
+    {
+      sum += arr[i];
+    }
+
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump "return 15;" "optimized" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]