[PATCH] Use two source permute for vector initialization (PR 85692, take 2)

Richard Biener rguenther@suse.de
Fri May 11 08:05:00 GMT 2018


On Thu, 10 May 2018, Jakub Jelinek wrote:

> On Wed, May 09, 2018 at 04:53:19PM +0200, Allan Sandfeld Jensen wrote:
> > > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator
> > > > *gsi)> 
> > > >    elem_type = TREE_TYPE (type);
> > > >    elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> > > > 
> > > > -  vec_perm_builder sel (nelts, nelts, 1);
> > > > -  orig = NULL;
> > > > +  vec_perm_builder sel (nelts, 2, nelts);
> > > 
> > > Why this change?  I admit the vec_parm_builder arguments are confusing, but
> > > I think the second times third is the number of how many indices are being
> > > pushed into the vector, so I think (nelts, nelts, 1) is right.
> > > 
> > I had the impression it was what was selected from. In any case, I changed it 
> > because without I get crash when vec_perm_indices is created later with a 
> > possible nparms of 2.
> 
> The documentation is apparently in vector-builder.h:
>    This class is a wrapper around auto_vec<T> for building vectors of T.
>    It aims to encode each vector as npatterns interleaved patterns,
>    where each pattern represents a sequence:
> 
>      { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
> 
>    The first three elements in each pattern provide enough information
>    to derive the other elements.  If all patterns have a STEP of zero,
>    we only need to encode the first two elements in each pattern.
>    If BASE1 is also equal to BASE0 for all patterns, we only need to
>    encode the first element in each pattern.  The number of encoded
>    elements per pattern is given by nelts_per_pattern.
> 
>    The class can be used in two ways:
> 
>    1. It can be used to build a full image of the vector, which is then
>       canonicalized by finalize ().  In this case npatterns is initially
>       the number of elements in the vector and nelts_per_pattern is
>       initially 1.
> 
>    2. It can be used to build a vector that already has a known encoding.
>       This is preferred since it is more efficient and copes with
>       variable-length vectors.  finalize () then canonicalizes the encoding
>       to a simpler form if possible.
> 
> As the vector is constant width and we are building the full image of the
> vector, the right arguments are (nelts, nelts, 1) as per 1. above, and the
> finalization can perhaps change it to something more compact.
> 
> > > (and sorry for missing your patch first, the PR wasn't ASSIGNED and there
> > > was no link to gcc-patches for it).
> > > 
> > It is okay. You are welcome to take it over. I am not a regular gcc 
> > contributor and thus not well-versed in the details, only the basic logic of 
> > how things work.
> 
> Ok, here is my version of the patch.  Bootstrapped/regtested on x86_64-linux
> and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2018-05-10  Allan Sandfeld Jensen  <allan.jensen@qt.io>
> 	    Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/85692
> 	* tree-ssa-forwprop.c (simplify_vector_constructor): Try two
> 	source permute as well.
> 
> 	* gcc.target/i386/pr85692.c: New test.
> 
> --- gcc/tree-ssa-forwprop.c.jj	2018-05-08 18:16:36.866614130 +0200
> +++ gcc/tree-ssa-forwprop.c	2018-05-09 20:44:32.621900540 +0200
> @@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt
>  {
>    gimple *stmt = gsi_stmt (*gsi);
>    gimple *def_stmt;
> -  tree op, op2, orig, type, elem_type;
> +  tree op, op2, orig[2], type, elem_type;
>    unsigned elem_size, i;
>    unsigned HOST_WIDE_INT nelts;
>    enum tree_code code, conv_code;
> @@ -2023,7 +2023,8 @@ simplify_vector_constructor (gimple_stmt
>    elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
>  
>    vec_perm_builder sel (nelts, nelts, 1);
> -  orig = NULL;
> +  orig[0] = NULL;
> +  orig[1] = NULL;
>    conv_code = ERROR_MARK;
>    maybe_ident = true;
>    FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
> @@ -2063,25 +2064,35 @@ simplify_vector_constructor (gimple_stmt
>  	return false;
>        op1 = gimple_assign_rhs1 (def_stmt);
>        ref = TREE_OPERAND (op1, 0);
> -      if (orig)
> +      unsigned int j;
> +      for (j = 0; j < 2; ++j)
>  	{
> -	  if (ref != orig)
> -	    return false;
> -	}
> -      else
> -	{
> -	  if (TREE_CODE (ref) != SSA_NAME)
> -	    return false;
> -	  if (! VECTOR_TYPE_P (TREE_TYPE (ref))
> -	      || ! useless_type_conversion_p (TREE_TYPE (op1),
> -					      TREE_TYPE (TREE_TYPE (ref))))
> -	    return false;
> -	  orig = ref;
> +	  if (!orig[j])
> +	    {
> +	      if (TREE_CODE (ref) != SSA_NAME)
> +		return false;
> +	      if (! VECTOR_TYPE_P (TREE_TYPE (ref))
> +		  || ! useless_type_conversion_p (TREE_TYPE (op1),
> +						  TREE_TYPE (TREE_TYPE (ref))))
> +		return false;
> +	      if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
> +						   TREE_TYPE (ref)))
> +		return false;
> +	      orig[j] = ref;
> +	      break;
> +	    }
> +	  else if (ref == orig[j])
> +	    break;
>  	}
> +      if (j == 2)
> +	return false;
> +
>        unsigned int elt;
>        if (maybe_ne (bit_field_size (op1), elem_size)
>  	  || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
>  	return false;
> +      if (j)
> +	elt += nelts;
>        if (elt != i)
>  	maybe_ident = false;
>        sel.quick_push (elt);
> @@ -2089,14 +2100,15 @@ simplify_vector_constructor (gimple_stmt
>    if (i < nelts)
>      return false;
>  
> -  if (! VECTOR_TYPE_P (TREE_TYPE (orig))
> +  if (! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
>        || maybe_ne (TYPE_VECTOR_SUBPARTS (type),
> -		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig))))
> +		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
>      return false;
>  
>    tree tem;
>    if (conv_code != ERROR_MARK
> -      && (! supportable_convert_operation (conv_code, type, TREE_TYPE (orig),
> +      && (! supportable_convert_operation (conv_code, type,
> +					   TREE_TYPE (orig[0]),
>  					   &tem, &conv_code)
>  	  || conv_code == CALL_EXPR))
>      return false;
> @@ -2104,16 +2116,16 @@ simplify_vector_constructor (gimple_stmt
>    if (maybe_ident)
>      {
>        if (conv_code == ERROR_MARK)
> -	gimple_assign_set_rhs_from_tree (gsi, orig);
> +	gimple_assign_set_rhs_from_tree (gsi, orig[0]);
>        else
> -	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
> +	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
>  					NULL_TREE, NULL_TREE);
>      }
>    else
>      {
>        tree mask_type;
>  
> -      vec_perm_indices indices (sel, 1, nelts);
> +      vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts);
>        if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
>  	return false;
>        mask_type
> @@ -2124,16 +2136,19 @@ simplify_vector_constructor (gimple_stmt
>  		       GET_MODE_SIZE (TYPE_MODE (type))))
>  	return false;
>        op2 = vec_perm_indices_to_tree (mask_type, indices);
> +      if (!orig[1])
> +	orig[1] = orig[0];
>        if (conv_code == ERROR_MARK)
> -	gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig, orig, op2);
> +	gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
> +					orig[1], op2);
>        else
>  	{
>  	  gimple *perm
> -	    = gimple_build_assign (make_ssa_name (TREE_TYPE (orig)),
> -				   VEC_PERM_EXPR, orig, orig, op2);
> -	  orig = gimple_assign_lhs (perm);
> +	    = gimple_build_assign (make_ssa_name (TREE_TYPE (orig[0])),
> +				   VEC_PERM_EXPR, orig[0], orig[1], op2);
> +	  orig[0] = gimple_assign_lhs (perm);
>  	  gsi_insert_before (gsi, perm, GSI_SAME_STMT);
> -	  gimple_assign_set_rhs_with_ops (gsi, conv_code, orig,
> +	  gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
>  					  NULL_TREE, NULL_TREE);
>  	}
>      }
> --- gcc/testsuite/gcc.target/i386/pr85692.c.jj	2018-05-09 20:44:37.153904405 +0200
> +++ gcc/testsuite/gcc.target/i386/pr85692.c	2018-05-09 20:44:37.153904405 +0200
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +/* { dg-final { scan-assembler "unpcklps" } } */
> +/* { dg-final { scan-assembler "blendps" } } */
> +/* { dg-final { scan-assembler-not "shufps" } } */
> +/* { dg-final { scan-assembler-not "unpckhps" } } */
> +
> +typedef float v4sf __attribute__ ((vector_size (16)));
> +
> +v4sf unpcklps(v4sf a, v4sf b)
> +{
> +    return (v4sf){a[0],b[0],a[1],b[1]};
> +}
> +
> +v4sf blendps(v4sf a, v4sf b)
> +{
> +    return (v4sf){a[0],b[1],a[2],b[3]};
> +}
> 
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)



More information about the Gcc-patches mailing list