This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Replace has_single_use guards in store-merging


On Mon, 6 Nov 2017, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned earlier, the !has_single_use checks disable store merging
> in many cases, it is enough to have a single multiple-use somewhere and
> all of sudden we break the group.
> 
> The following patch replaces it by heuristics, it is GIMPLE statement count
> based, but I think it should work pretty fine in general.
> The code counts how many statements there are for the group without
> store-merging (i.e. stores, if not storing constant, then associated loads
> as well, and bitwise stmts), and then counts how many are needed
> if store merging is performed and expected DCE gets rid of dead stmts
> - i.e. we count what we'll emit in the store merging sequences for the group
> (without the masking for bitfields with padding bits in between, as that
> is even normal stores to bitfields have that implicitly and only expansion
> makes it explicit into the IL) and whatever original statements had multiple
> uses (and stmts they use if any).
> So, e.g. if we have _1 = t.a; s.a = _1; _2 = t.b; s.b = _2; use (_1); we
> can still store merge it, as there are 4 original stmts and we'd replace it
> with 3 new stmts: _1 = t.a; _3 = [t.a;t.b]; [s.a;s.b] = _3; use (_1);
> while if we have _1 = t.a; s.a = _1; _2 = t.b; s.b = _2; use (_1, _2);
> we don't replace it anymore, as that would be trading 4 original for 4 new
> ones.
> 
> During the combined {x86_64,i686}-linux bootstraps/regtests, without this
> and the today posted patch the counts were:
> integer_cst  215621   442752
> mem_ref  12115   24919
> bit_and_expr  37   88
> bit_ior_expr  19   46
> bit_xor_expr  27   58
> and with the two patches:
> integer_cst  215605   442817
> mem_ref  17320   36133
> bit_and_expr  93   224
> bit_ior_expr  66   153
> bit_xor_expr  56   153
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2017-11-06  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* gimple-ssa-store-merging.c (count_multiple_uses): New function.
> 	(split_group): Add total_orig and total_new arguments, estimate the
> 	number of statements related to the store group without store merging
> 	and with store merging.
> 	(imm_store_chain_info::output_merged_store): Adjust split_group
> 	callers, punt if estimated number of statements with store merging
> 	is not smaller than estimated number of statements without it.
> 	Formatting fix.
> 	(handled_load): Remove has_single_use checks.
> 	(pass_store_merging::process_store): Likewise.
> 
> --- gcc/gimple-ssa-store-merging.c.jj	2017-11-06 08:57:07.000000000 +0100
> +++ gcc/gimple-ssa-store-merging.c	2017-11-06 16:35:38.122437951 +0100
> @@ -1370,6 +1370,65 @@ find_constituent_stores (struct merged_s
>    return ret;
>  }
>  
> +/* Return how many SSA_NAMEs used to compute value to store in the INFO
> +   store have multiple uses.  If any SSA_NAME has multiple uses, also
> +   count statements needed to compute it.  */
> +
> +static unsigned
> +count_multiple_uses (store_immediate_info *info)
> +{
> +  gimple *stmt = info->stmt;
> +  unsigned ret = 0;
> +  switch (info->rhs_code)
> +    {
> +    case INTEGER_CST:
> +      return 0;
> +    case BIT_AND_EXPR:
> +    case BIT_IOR_EXPR:
> +    case BIT_XOR_EXPR:
> +      if (!has_single_use (gimple_assign_rhs1 (stmt)))
> +	{
> +	  ret += 1 + info->ops[0].bit_not_p;
> +	  if (info->ops[1].base_addr)
> +	    ret += 1 + info->ops[1].bit_not_p;
> +	  return ret + 1;
> +	}
> +      stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
> +      /* stmt is now the BIT_*_EXPR.  */
> +      if (!has_single_use (gimple_assign_rhs1 (stmt)))
> +	ret += 1 + info->ops[0].bit_not_p;
> +      else if (info->ops[0].bit_not_p)
> +	{
> +	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
> +	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
> +	    ++ret;
> +	}
> +      if (info->ops[1].base_addr == NULL_TREE)
> +	return ret;
> +      if (!has_single_use (gimple_assign_rhs2 (stmt)))
> +	ret += 1 + info->ops[1].bit_not_p;
> +      else if (info->ops[1].bit_not_p)
> +	{
> +	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
> +	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
> +	    ++ret;
> +	}
> +      return ret;
> +    case MEM_REF:
> +      if (!has_single_use (gimple_assign_rhs1 (stmt)))
> +	return 1 + info->ops[0].bit_not_p;
> +      else if (info->ops[0].bit_not_p)
> +	{
> +	  stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
> +	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
> +	    return 1;
> +	}
> +      return 0;
> +    default:
> +      gcc_unreachable ();
> +    }

Can't you simply use

   unsigned ret = 0;
   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
     if (!has_single_use (op))
       ++ret;
   return ret;

?  Not sure if the bit_not_p handling is required.

It doesn't seem you handle multi-uses of the BIT_*_EXPR results
itself?  Or does it only handle multi-uses of the BIT_*_EXPR
but not the feeding loads?

That is, it looks like an approximation already - can we simplify
it a bit?

Thanks,
Richard.

> +}
> +
>  /* Split a merged store described by GROUP by populating the SPLIT_STORES
>     vector (if non-NULL) with split_store structs describing the byte offset
>     (from the base), the bit size and alignment of each store as well as the
> @@ -1385,7 +1444,9 @@ find_constituent_stores (struct merged_s
>  static unsigned int
>  split_group (merged_store_group *group, bool allow_unaligned_store,
>  	     bool allow_unaligned_load,
> -	     vec<struct split_store *> *split_stores)
> +	     vec<struct split_store *> *split_stores,
> +	     unsigned *total_orig,
> +	     unsigned *total_new)
>  {
>    unsigned HOST_WIDE_INT pos = group->bitregion_start;
>    unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
> @@ -1393,6 +1454,7 @@ split_group (merged_store_group *group,
>    unsigned HOST_WIDE_INT group_align = group->align;
>    unsigned HOST_WIDE_INT align_base = group->align_base;
>    unsigned HOST_WIDE_INT group_load_align = group_align;
> +  bool any_orig = false;
>  
>    gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
>  
> @@ -1400,6 +1462,34 @@ split_group (merged_store_group *group,
>    unsigned HOST_WIDE_INT try_pos = bytepos;
>    group->stores.qsort (sort_by_bitpos);
>  
> +  if (total_orig)
> +    {
> +      unsigned int i;
> +      store_immediate_info *info = group->stores[0];
> +
> +      total_new[0] = 0;
> +      total_orig[0] = 1; /* The orig store.  */
> +      info = group->stores[0];
> +      if (info->ops[0].base_addr)
> +	total_orig[0] += 1 + info->ops[0].bit_not_p;
> +      if (info->ops[1].base_addr)
> +	total_orig[0] += 1 + info->ops[1].bit_not_p;
> +      switch (info->rhs_code)
> +	{
> +	case BIT_AND_EXPR:
> +	case BIT_IOR_EXPR:
> +	case BIT_XOR_EXPR:
> +	  total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
> +	  break;
> +	default:
> +	  break;
> +	}
> +      total_orig[0] *= group->stores.length ();
> +
> +      FOR_EACH_VEC_ELT (group->stores, i, info)
> +	total_new[0] += count_multiple_uses (info);
> +    }
> +
>    if (!allow_unaligned_load)
>      for (int i = 0; i < 2; ++i)
>        if (group->load_align[i])
> @@ -1524,7 +1614,10 @@ split_group (merged_store_group *group,
>  	  if (info
>  	      && info->bitpos >= try_bitpos
>  	      && info->bitpos + info->bitsize <= try_bitpos + try_size)
> -	    store->orig = true;
> +	    {
> +	      store->orig = true;
> +	      any_orig = true;
> +	    }
>  	  split_stores->safe_push (store);
>  	}
>  
> @@ -1532,6 +1625,37 @@ split_group (merged_store_group *group,
>        size -= try_size;
>      }
>  
> +  if (total_orig)
> +    {
> +      /* If we are reusing some original stores and any of the
> +	 original SSA_NAMEs had multiple uses, we need to subtract
> +	 those now before we add the new ones.  */
> +      if (total_new[0] && any_orig)
> +	{
> +	  unsigned int i;
> +	  struct split_store *store;
> +	  FOR_EACH_VEC_ELT (*split_stores, i, store)
> +	    if (store->orig)
> +	      total_new[0] -= count_multiple_uses (store->orig_stores[0]);
> +	}
> +      total_new[0] += ret; /* The new store.  */
> +      store_immediate_info *info = group->stores[0];
> +      if (info->ops[0].base_addr)
> +	total_new[0] += ret * (1 + info->ops[0].bit_not_p);
> +      if (info->ops[1].base_addr)
> +	total_new[0] += ret * (1 + info->ops[1].bit_not_p);
> +      switch (info->rhs_code)
> +	{
> +	case BIT_AND_EXPR:
> +	case BIT_IOR_EXPR:
> +	case BIT_XOR_EXPR:
> +	  total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
> +	  break;
> +	default:
> +	  break;
> +	}
> +    }
> +
>    return ret;
>  }
>  
> @@ -1564,26 +1688,35 @@ imm_store_chain_info::output_merged_stor
>  	 for unaligned and how many stores we'd emit for aligned stores.
>  	 Only use unaligned stores if it allows fewer stores than aligned.  */
>        unsigned aligned_cnt
> -	= split_group (group, false, allow_unaligned_load, NULL);
> +	= split_group (group, false, allow_unaligned_load, NULL, NULL, NULL);
>        unsigned unaligned_cnt
> -	= split_group (group, true, allow_unaligned_load, NULL);
> +	= split_group (group, true, allow_unaligned_load, NULL, NULL, NULL);
>        if (aligned_cnt <= unaligned_cnt)
>  	allow_unaligned_store = false;
>      }
> +  unsigned total_orig, total_new;
>    split_group (group, allow_unaligned_store, allow_unaligned_load,
> -	       &split_stores);
> +	       &split_stores, &total_orig, &total_new);
>  
>    if (split_stores.length () >= orig_num_stmts)
>      {
>        /* We didn't manage to reduce the number of statements.  Bail out.  */
>        if (dump_file && (dump_flags & TDF_DETAILS))
> -	{
> -	  fprintf (dump_file, "Exceeded original number of stmts (%u)."
> -			      "  Not profitable to emit new sequence.\n",
> -		   orig_num_stmts);
> -	}
> +	fprintf (dump_file, "Exceeded original number of stmts (%u)."
> +			    "  Not profitable to emit new sequence.\n",
> +		 orig_num_stmts);
>        return false;
>      }
> +  if (total_orig <= total_new)
> +    {
> +      /* If number of estimated new statements is above estimated original
> +	 statements, bail out too.  */
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +	fprintf (dump_file, "Estimated number of original stmts (%u)"
> +			    " not larger than estimated number of new"
> +			    " stmts (%u).\n",
> +		 total_orig, total_new);
> +    }
>  
>    gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
>    gimple_seq seq = NULL;
> @@ -2096,7 +2229,6 @@ handled_load (gimple *stmt, store_operan
>      {
>        tree rhs1 = gimple_assign_rhs1 (stmt);
>        if (TREE_CODE (rhs1) == SSA_NAME
> -	  && has_single_use (rhs1)
>  	  && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
>  			   bitregion_start, bitregion_end))
>  	{
> @@ -2159,7 +2291,7 @@ pass_store_merging::process_store (gimpl
>        rhs_code = INTEGER_CST;
>        ops[0].val = rhs;
>      }
> -  else if (TREE_CODE (rhs) != SSA_NAME || !has_single_use (rhs))
> +  else if (TREE_CODE (rhs) != SSA_NAME)
>      invalid = true;
>    else
>      {
> @@ -2179,7 +2311,7 @@ pass_store_merging::process_store (gimpl
>  	    rhs1 = gimple_assign_rhs1 (def_stmt);
>  	    rhs2 = gimple_assign_rhs2 (def_stmt);
>  	    invalid = true;
> -	    if (TREE_CODE (rhs1) != SSA_NAME || !has_single_use (rhs1))
> +	    if (TREE_CODE (rhs1) != SSA_NAME)
>  	      break;
>  	    def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
>  	    if (!is_gimple_assign (def_stmt1)
> @@ -2188,7 +2320,7 @@ pass_store_merging::process_store (gimpl
>  	      break;
>  	    if (rhs_valid_for_store_merging_p (rhs2))
>  	      ops[1].val = rhs2;
> -	    else if (TREE_CODE (rhs2) != SSA_NAME || !has_single_use (rhs2))
> +	    else if (TREE_CODE (rhs2) != SSA_NAME)
>  	      break;
>  	    else
>  	      {
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]