[PATCH] middle-end: Prefer no RMW in __builtin_clear_padding implementation where possible

Richard Biener rguenther@suse.de
Tue Nov 24 09:32:45 GMT 2020


On Tue, 24 Nov 2020, Jakub Jelinek wrote:

> Hi!
> 
> Currently the __builtin_clear_padding expansion code emits no code for
> full words that don't have any padding bits, and most of the time if
> the only padding bytes are from the start of the word it attempts to merge
> them with previous padding store (via {}) or if the only padding bytes are
> from the end of the word, it attempts to merge it with following padding
> bytes.  For everything else it was using a RMW, except when it found
> an aligned char/short/int covering all the padding bytes and all those
> padding bytes were all ones in that store.
> 
> The following patch changes it, such that we only use RMW if the padding has
> any bytes which have some padding and some non-padding bits (i.e. bitfields
> are involved), often it is the same amount of instructions in the end and
> avoids being thread-unsafe unless necessary (and avoids having to wait for
> the reads to make it into the CPU).  So, if there are no bitfields,
> the function will just store some zero bytes, shorts, ints, long longs etc.
> where needed.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2020-11-24  Jakub Jelinek  <jakub@redhat.com>
> 
> 	* gimple-fold.c (clear_padding_flush): If a word contains only 0
> 	or 0xff bytes of padding other than all set, all clear, all set
> 	followed by all clear or all clear followed by all set, don't emit
> 	a RMW operation on the whole word or parts of it, but instead
> 	clear the individual bytes of padding.  For paddings of one byte
> 	size, don't use char[1] and {}, but instead just char and 0.
> 
> --- gcc/gimple-fold.c.jj	2020-11-23 11:59:18.790279409 +0100
> +++ gcc/gimple-fold.c	2020-11-23 15:48:36.723510615 +0100
> @@ -4033,7 +4033,9 @@ clear_padding_flush (clear_padding_struc
>      {
>        size_t nonzero_first = wordsize;
>        size_t nonzero_last = 0;
> -      bool all_ones = true;
> +      size_t zero_first = wordsize;
> +      size_t zero_last = 0;
> +      bool all_ones = true, bytes_only = true;
>        if ((unsigned HOST_WIDE_INT) (buf->off + i + wordsize)
>  	  > (unsigned HOST_WIDE_INT) buf->sz)
>  	{
> @@ -4055,9 +4057,19 @@ clear_padding_flush (clear_padding_struc
>  		all_ones = false;
>  	      nonzero_last = j + 1 - i;
>  	    }
> +	  else
> +	    {
> +	      if (zero_first == wordsize)
> +		zero_first = j - i;
> +	      zero_last = j + 1 - i;
> +	    }
>  	  if (buf->buf[j] != 0 && buf->buf[j] != (unsigned char) ~0)
> -	    all_ones = false;
> +	    {
> +	      all_ones = false;
> +	      bytes_only = false;
> +	    }
>  	}
> +      size_t padding_end = i;
>        if (padding_bytes)
>  	{
>  	  if (nonzero_first == 0
> @@ -4069,7 +4081,6 @@ clear_padding_flush (clear_padding_struc
>  	      padding_bytes += wordsize;
>  	      continue;
>  	    }
> -	  size_t padding_end = i;
>  	  if (all_ones && nonzero_first == 0)
>  	    {
>  	      padding_bytes += nonzero_last;
> @@ -4077,12 +4088,27 @@ clear_padding_flush (clear_padding_struc
>  	      nonzero_first = wordsize;
>  	      nonzero_last = 0;
>  	    }
> -	  tree atype = build_array_type_nelts (char_type_node, padding_bytes);
> +	  else if (bytes_only && nonzero_first == 0)
> +	    {
> +	      gcc_assert (zero_first && zero_first != wordsize);
> +	      padding_bytes += zero_first;
> +	      padding_end += zero_first;
> +	    }
> +	  tree atype, src;
> +	  if (padding_bytes == 1)
> +	    {
> +	      atype = char_type_node;
> +	      src = build_zero_cst (char_type_node);
> +	    }
> +	  else
> +	    {
> +	      atype = build_array_type_nelts (char_type_node, padding_bytes);
> +	      src = build_constructor (atype, NULL);
> +	    }
>  	  tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base,
>  				 build_int_cst (buf->alias_type,
>  						buf->off + padding_end
>  						- padding_bytes));
> -	  tree src = build_constructor (atype, NULL);
>  	  gimple *g = gimple_build_assign (dst, src);
>  	  gimple_set_location (g, buf->loc);
>  	  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> @@ -4099,6 +4125,45 @@ clear_padding_flush (clear_padding_struc
>  	  padding_bytes = nonzero_last - nonzero_first;
>  	  continue;
>  	}
> +      if (bytes_only)
> +	{
> +	  /* If bitfields aren't involved in this word, prefer storing
> +	     individual bytes or groups of them over performing a RMW
> +	     operation on the whole word.  */
> +	  gcc_assert (i + zero_last <= end);
> +	  for (size_t j = padding_end; j < i + zero_last; j++)
> +	    {
> +	      if (buf->buf[j])
> +		{
> +		  size_t k;
> +		  for (k = j; k < i + zero_last; k++)
> +		    if (buf->buf[k] == 0)
> +		      break;
> +		  HOST_WIDE_INT off = buf->off + j;
> +		  tree atype, src;
> +		  if (k - j == 1)
> +		    {
> +		      atype = char_type_node;
> +		      src = build_zero_cst (char_type_node);
> +		    }
> +		  else
> +		    {
> +		      atype = build_array_type_nelts (char_type_node, k - j);
> +		      src = build_constructor (atype, NULL);
> +		    }
> +		  tree dst = build2_loc (buf->loc, MEM_REF, atype,
> +					 buf->base,
> +					 build_int_cst (buf->alias_type, off));
> +		  gimple *g = gimple_build_assign (dst, src);
> +		  gimple_set_location (g, buf->loc);
> +		  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> +		  j = k;
> +		}
> +	    }
> +	  if (nonzero_last == wordsize)
> +	    padding_bytes = nonzero_last - zero_last;
> +	  continue;
> +	}
>        for (size_t eltsz = 1; eltsz <= wordsize; eltsz <<= 1)
>  	{
>  	  if (nonzero_last - nonzero_first <= eltsz
> @@ -4153,12 +4218,21 @@ clear_padding_flush (clear_padding_struc
>      {
>        if (padding_bytes)
>  	{
> -	  tree atype = build_array_type_nelts (char_type_node, padding_bytes);
> +	  tree atype, src;
> +	  if (padding_bytes == 1)
> +	    {
> +	      atype = char_type_node;
> +	      src = build_zero_cst (char_type_node);
> +	    }
> +	  else
> +	    {
> +	      atype = build_array_type_nelts (char_type_node, padding_bytes);
> +	      src = build_constructor (atype, NULL);
> +	    }
>  	  tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base,
>  				 build_int_cst (buf->alias_type,
>  						buf->off + end
>  						- padding_bytes));
> -	  tree src = build_constructor (atype, NULL);
>  	  gimple *g = gimple_build_assign (dst, src);
>  	  gimple_set_location (g, buf->loc);
>  	  gsi_insert_before (buf->gsi, g, GSI_SAME_STMT);
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend


More information about the Gcc-patches mailing list