[PATCH, SRA] Total scalarization and padding

Martin Jambor mjambor@suse.cz
Tue Jun 28 13:47:00 GMT 2011


Hi,

at the moment SRA can get confused by alignment padding and think that
it actually contains some data for which there is no planned
replacement and thus might leave some loads and stores in place
instead of removing them.  This is perhaps the biggest problem when we
attempt total scalarization of simple structures exactly in order to
get rid of these and of the variables altogether.

I've pondered for quite a while how to best deal with them.  One
option was to make just the total scalarization stronger.  I have also
contemplated creating phantom accesses for padding I could detect
(i.e. in simple structures) which would be more general, but this
would complicate the parts of SRA which are already quite convoluted
and I was not really sure it was worth it.

Eventually I decided for the total scalarization option.  This patch
changes it such that the flag is propagated down the access tree but
also, if it does not work out, is reset on the way up.  If the flag
survives, the access tree is considered "covered" by scalar
replacements and thus it is known not to contain unscalarized data.

While changing function analyze_access_subtree I have simplified the
way we compute the hole flag and also fixed one comparison which we
currently have the wrong way round but it fortunately does not matter
because if there is a hole, the covered_to will never add up to the
total size.  I'll probably post a separate patch against 4.6 just in
case someone attempts to read the source.

Bootstrapped and tested on x86_64-linux, OK for trunk?

Thanks,

Martin


2011-06-24  Martin Jambor  <mjambor@suse.cz>

        * tree-sra.c (struct access): Rename total_scalarization to
        grp_total_scalarization
        (completely_scalarize_var): New function.
        (sort_and_splice_var_accesses): Set total_scalarization in the
        representative access.
        (analyze_access_subtree): Propagate total scalarization accross the
        tree, no holes in totally scalarized trees, simplify coverage
        computation.
        (analyze_all_variable_accesses): Call completely_scalarize_var instead
        of completely_scalarize_record.

        * testsuite/gcc.dg/tree-ssa/sra-12.c: New test.

Index: src/gcc/tree-sra.c
===================================================================
*** src.orig/gcc/tree-sra.c
--- src/gcc/tree-sra.c
*************** struct access
*** 170,179 ****
    /* Is this particular access write access? */
    unsigned write : 1;
  
-   /* Is this access an artificial one created to scalarize some record
-      entirely? */
-   unsigned total_scalarization : 1;
- 
    /* Is this access an access to a non-addressable field? */
    unsigned non_addressable : 1;
  
--- 170,175 ----
*************** struct access
*** 204,209 ****
--- 200,209 ----
       is not propagated in the access tree in any direction.  */
    unsigned grp_scalar_write : 1;
  
+   /* Is this access an artificial one created to scalarize some record
+      entirely? */
+   unsigned grp_total_scalarization : 1;
+ 
    /* Other passes of the analysis use this bit to make function
       analyze_access_subtree create scalar replacements for this group if
       possible.  */
*************** dump_access (FILE *f, struct access *acc
*** 377,402 ****
    fprintf (f, ", type = ");
    print_generic_expr (f, access->type, 0);
    if (grp)
!     fprintf (f, ", total_scalarization = %d, grp_read = %d, grp_write = %d, "
! 	     "grp_assignment_read = %d, grp_assignment_write = %d, "
! 	     "grp_scalar_read = %d, grp_scalar_write = %d, "
  	     "grp_hint = %d, grp_covered = %d, "
  	     "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, "
  	     "grp_partial_lhs = %d, grp_to_be_replaced = %d, "
  	     "grp_maybe_modified = %d, "
  	     "grp_not_necessarilly_dereferenced = %d\n",
! 	     access->total_scalarization, access->grp_read, access->grp_write,
! 	     access->grp_assignment_read, access->grp_assignment_write,
! 	     access->grp_scalar_read, access->grp_scalar_write,
  	     access->grp_hint, access->grp_covered,
  	     access->grp_unscalarizable_region, access->grp_unscalarized_data,
  	     access->grp_partial_lhs, access->grp_to_be_replaced,
  	     access->grp_maybe_modified,
  	     access->grp_not_necessarilly_dereferenced);
    else
!     fprintf (f, ", write = %d, total_scalarization = %d, "
  	     "grp_partial_lhs = %d\n",
! 	     access->write, access->total_scalarization,
  	     access->grp_partial_lhs);
  }
  
--- 377,402 ----
    fprintf (f, ", type = ");
    print_generic_expr (f, access->type, 0);
    if (grp)
!     fprintf (f, ", grp_read = %d, grp_write = %d, grp_assignment_read = %d, "
! 	     "grp_assignment_write = %d, grp_scalar_read = %d, "
! 	     "grp_scalar_write = %d, grp_total_scalarization = %d, "
  	     "grp_hint = %d, grp_covered = %d, "
  	     "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, "
  	     "grp_partial_lhs = %d, grp_to_be_replaced = %d, "
  	     "grp_maybe_modified = %d, "
  	     "grp_not_necessarilly_dereferenced = %d\n",
! 	     access->grp_read, access->grp_write, access->grp_assignment_read,
! 	     access->grp_assignment_write, access->grp_scalar_read,
! 	     access->grp_scalar_write, access->grp_total_scalarization,
  	     access->grp_hint, access->grp_covered,
  	     access->grp_unscalarizable_region, access->grp_unscalarized_data,
  	     access->grp_partial_lhs, access->grp_to_be_replaced,
  	     access->grp_maybe_modified,
  	     access->grp_not_necessarilly_dereferenced);
    else
!     fprintf (f, ", write = %d, grp_total_scalarization = %d, "
  	     "grp_partial_lhs = %d\n",
! 	     access->write, access->grp_total_scalarization,
  	     access->grp_partial_lhs);
  }
  
*************** completely_scalarize_record (tree base,
*** 924,930 ****
  	    access = create_access_1 (base, pos, size);
  	    access->expr = nref;
  	    access->type = ft;
! 	    access->total_scalarization = 1;
  	    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
  	  }
  	else
--- 924,930 ----
  	    access = create_access_1 (base, pos, size);
  	    access->expr = nref;
  	    access->type = ft;
! 	    access->grp_total_scalarization = 1;
  	    /* Accesses for intraprocedural SRA can have their stmt NULL.  */
  	  }
  	else
*************** completely_scalarize_record (tree base,
*** 932,937 ****
--- 932,954 ----
        }
  }
  
+ /* Create total_scalarization accesses for all scalar type fields in VAR and
+    for VAR a a whole.  VAR must be of a RECORD_TYPE conforming to
+    type_consists_of_records_p.   */
+ 
+ static void
+ completely_scalarize_var (tree var)
+ {
+   HOST_WIDE_INT size = tree_low_cst (DECL_SIZE (var), 1);
+   struct access *access;
+ 
+   access = create_access_1 (var, 0, size);
+   access->expr = var;
+   access->type = TREE_TYPE (var);
+   access->grp_total_scalarization = 1;
+ 
+   completely_scalarize_record (var, var, 0, var);
+ }
  
  /* Search the given tree for a declaration by skipping handled components and
     exclude it from the candidates.  */
*************** sort_and_splice_var_accesses (tree var)
*** 1713,1719 ****
        bool grp_assignment_read = access->grp_assignment_read;
        bool grp_assignment_write = access->grp_assignment_write;
        bool multiple_scalar_reads = false;
!       bool total_scalarization = access->total_scalarization;
        bool grp_partial_lhs = access->grp_partial_lhs;
        bool first_scalar = is_gimple_reg_type (access->type);
        bool unscalarizable_region = access->grp_unscalarizable_region;
--- 1730,1736 ----
        bool grp_assignment_read = access->grp_assignment_read;
        bool grp_assignment_write = access->grp_assignment_write;
        bool multiple_scalar_reads = false;
!       bool total_scalarization = access->grp_total_scalarization;
        bool grp_partial_lhs = access->grp_partial_lhs;
        bool first_scalar = is_gimple_reg_type (access->type);
        bool unscalarizable_region = access->grp_unscalarizable_region;
*************** sort_and_splice_var_accesses (tree var)
*** 1757,1763 ****
  	  grp_assignment_write |= ac2->grp_assignment_write;
  	  grp_partial_lhs |= ac2->grp_partial_lhs;
  	  unscalarizable_region |= ac2->grp_unscalarizable_region;
! 	  total_scalarization |= ac2->total_scalarization;
  	  relink_to_new_repr (access, ac2);
  
  	  /* If there are both aggregate-type and scalar-type accesses with
--- 1774,1780 ----
  	  grp_assignment_write |= ac2->grp_assignment_write;
  	  grp_partial_lhs |= ac2->grp_partial_lhs;
  	  unscalarizable_region |= ac2->grp_unscalarizable_region;
! 	  total_scalarization |= ac2->grp_total_scalarization;
  	  relink_to_new_repr (access, ac2);
  
  	  /* If there are both aggregate-type and scalar-type accesses with
*************** sort_and_splice_var_accesses (tree var)
*** 1778,1783 ****
--- 1795,1801 ----
        access->grp_assignment_read = grp_assignment_read;
        access->grp_assignment_write = grp_assignment_write;
        access->grp_hint = multiple_scalar_reads || total_scalarization;
+       access->grp_total_scalarization = total_scalarization;
        access->grp_partial_lhs = grp_partial_lhs;
        access->grp_unscalarizable_region = unscalarizable_region;
        if (access->first_link)
*************** analyze_access_subtree (struct access *r
*** 2023,2028 ****
--- 2041,2048 ----
  	root->grp_write = 1;
        if (parent->grp_assignment_write)
  	root->grp_assignment_write = 1;
+       if (parent->grp_total_scalarization)
+ 	root->grp_total_scalarization = 1;
      }
  
    if (root->grp_unscalarizable_region)
*************** analyze_access_subtree (struct access *r
*** 2033,2048 ****
  
    for (child = root->first_child; child; child = child->next_sibling)
      {
!       if (!hole && child->offset < covered_to)
! 	hole = true;
!       else
! 	covered_to += child->size;
! 
        sth_created |= analyze_access_subtree (child, root,
  					     allow_replacements && !scalar);
  
        root->grp_unscalarized_data |= child->grp_unscalarized_data;
!       hole |= !child->grp_covered;
      }
  
    if (allow_replacements && scalar && !root->first_child
--- 2053,2068 ----
  
    for (child = root->first_child; child; child = child->next_sibling)
      {
!       hole |= covered_to < child->offset;
        sth_created |= analyze_access_subtree (child, root,
  					     allow_replacements && !scalar);
  
        root->grp_unscalarized_data |= child->grp_unscalarized_data;
!       root->grp_total_scalarization &= child->grp_total_scalarization;
!       if (child->grp_covered)
! 	covered_to += child->size;
!       else
! 	hole = true;
      }
  
    if (allow_replacements && scalar && !root->first_child
*************** analyze_access_subtree (struct access *r
*** 2063,2072 ****
        sth_created = true;
        hole = false;
      }
!   else if (covered_to < limit)
!     hole = true;
  
!   if (sth_created && !hole)
      {
        root->grp_covered = 1;
        return true;
--- 2083,2098 ----
        sth_created = true;
        hole = false;
      }
!   else
!     {
!       if (covered_to < limit)
! 	hole = true;
!       if (scalar)
! 	root->grp_total_scalarization = 0;
!     }
  
!   if (sth_created
!       && (!hole || root->grp_total_scalarization))
      {
        root->grp_covered = 1;
        return true;
*************** analyze_all_variable_accesses (void)
*** 2288,2294 ****
  		<= max_total_scalarization_size)
  	    && type_consists_of_records_p (TREE_TYPE (var)))
  	  {
! 	    completely_scalarize_record (var, var, 0, var);
  	    if (dump_file && (dump_flags & TDF_DETAILS))
  	      {
  		fprintf (dump_file, "Will attempt to totally scalarize ");
--- 2314,2320 ----
  		<= max_total_scalarization_size)
  	    && type_consists_of_records_p (TREE_TYPE (var)))
  	  {
! 	    completely_scalarize_var (var);
  	    if (dump_file && (dump_flags & TDF_DETAILS))
  	      {
  		fprintf (dump_file, "Will attempt to totally scalarize ");
Index: src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
===================================================================
*** /dev/null
--- src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c
***************
*** 0 ****
--- 1,25 ----
+ /* Verify that SRA total scalarization will not be confused by padding.  */
+ 
+ /* { dg-do compile } */
+ /* { dg-options "-O1 -fdump-tree-release_ssa" } */
+ 
+ struct S
+ {
+   int i;
+   unsigned short f1;
+   char f2;
+   unsigned short f3, f4;
+ };
+ 
+ 
+ int foo (struct S *p)
+ {
+   struct S l;
+ 
+   l = *p;
+   l.i++;
+   *p = l;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa"} } */
+ /* { dg-final { cleanup-tree-dump "release_ssa" } } */



More information about the Gcc-patches mailing list