This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH,rs6000] Replace swap of a loaded vector constant with load of a swapped vector constant


Hi Kelvin,

On Fri, Sep 15, 2017 at 03:04:52PM -0600, Kelvin Nilsen wrote:
> On Power8 little endian, two instructions are needed to load from the
> natural in-memory representation of a vector into a vector register: a
> load followed by a swap.  When the vector value to be loaded is a
> constant, more efficient code can be achieved by swapping the
> representation of the constant in memory so that only a load instruction
> is required.

I'll leave the review of the actual swaps part to Bill...  But some
comments:

> --- gcc/config/rs6000/rs6000-p8swap.c	(revision 252768)
> +++ gcc/config/rs6000/rs6000-p8swap.c	(working copy)
> @@ -342,7 +342,8 @@ const_load_sequence_p (swap_web_entry *insn_entry,
>    FOR_EACH_INSN_INFO_USE (use, insn_info)
>      {
>        struct df_link *def_link = DF_REF_CHAIN (use);
> -      if (!def_link || def_link->next)
> +      if (!def_link || !def_link->ref || DF_REF_IS_ARTIFICIAL (def_link->ref)
> +	  || def_link->next)
>  	return false;

You probably should adjust the comment before this a bit:
  /* Find the unique use in the swap and locate its def.  If the def
     isn't unique, punt.  */
no longer says what this does.

> @@ -370,6 +373,14 @@ const_load_sequence_p (swap_web_entry *insn_entry,
>  	  if (!base_def_link || base_def_link->next)
>  	    return false;
>  
> +	  /* Constants held on the stack are not "true" constants
> +	   * because their values are not part of the static load
> +	   * image.  If this constant's base reference is a stack
> +	   * or frame pointer, it is seen as an artificial
> +	   * reference. */

No leading asterisks in block comments.

> @@ -385,6 +396,25 @@ const_load_sequence_p (swap_web_entry *insn_entry,
>  	  split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
>  	  if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
>  	    return false;
> +	  else
> +	    {
> +	      /* FIXME: The conditions under which
> +	       *  ((GET_CODE (const_vector) == SYMBOL_REF) &&
> +	       *   !CONSTANT_POOL_ADDRESS_P (const_vector))
> +	       * are not well understood.  This code prevents
> +	       * an internal compiler error which will occur in
> +	       * replace_swapped_load_constant () if we were to return
> +	       * true.  Some day, we should figure out how to properly
> +	       * handle this condition in
> +	       * replace_swapped_load_constant () and then we can
> +	       * remove this special test.  */
> +	      rtx const_vector = get_pool_constant (base);
> +	      if (GET_CODE (const_vector) == SYMBOL_REF)
> +		{
> +		  if (!CONSTANT_POOL_ADDRESS_P (const_vector))
> +		    return false;
> +		}
> +	    }
>  	}
>      }
>    return true;

It would be good to understand what this is about.  Some day :-)

> @@ -1281,6 +1311,189 @@ replace_swap_with_copy (swap_web_entry *insn_entry
>    insn->set_deleted ();
>  }
>  
> +/* Given that swap_insn represents a swap of a load of a constant
> +   vector value, replace with a single instruction that loads a
> +   swapped variant of the original constant. 

(Trailing space).

> +static void
> +replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn)
> +{
> +  /* Find the load.  */
> +  struct df_insn_info *insn_info = DF_INSN_INFO_GET (swap_insn);
> +  rtx_insn *load_insn = 0;
> +  df_ref use;
> +
> +  FOR_EACH_INSN_INFO_USE (use, insn_info)
> +    {
> +      struct df_link *def_link = DF_REF_CHAIN (use);
> +      gcc_assert (def_link && !def_link->next);
> +      load_insn = DF_REF_INSN (def_link->ref);
> +      break;
> +    }
> +  gcc_assert (load_insn);

A loop where you always break after the first iteration?  You probably
can write this simpler without a loop?  Or is this normal idiom?

> +  /* Find the embedded CONST_VECTOR.  We have to call toc_relative_expr_p
> +     to set tocrel_base; otherwise it would be unnecessary as we've
> +     already established it will return true.  */
> +  rtx base, offset;
> +  rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
> +  const_rtx tocrel_base;
> +  /* There is an extra level of indirection for small/large code models.  */
> +  if (GET_CODE (tocrel_expr) == MEM)
> +    tocrel_expr = XEXP (tocrel_expr, 0);
> +  if (!toc_relative_expr_p (tocrel_expr, false, &tocrel_base, NULL))
> +    gcc_unreachable ();
> +  split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
> +  rtx const_vector = get_pool_constant (base);
> +  /* With the extra indirection, get_pool_constant will produce the
> +     real constant from the reg_equal expression, so get the real
> +     constant.  */
> +  if (GET_CODE (const_vector) == SYMBOL_REF)
> +    const_vector = get_pool_constant (const_vector);
> +  gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
> +
> +  rtx new_mem;
> +  enum machine_mode mode = GET_MODE (const_vector);
> +  /* Create an adjusted constant from the original constant.  */
> +
> +  if (mode == V1TImode)
> +    /* Leave this code as is.  */
> +    return;
> +  else if (mode == V16QImode)
> +    {
> +      rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (16));
> +      int i;
> +      for (i = 0; i < 16; i++)
> +	XVECEXP (vals, 0, ((i+8) % 16)) = XVECEXP (const_vector, 0, i);
> +      rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
> +      new_mem = force_const_mem (mode, new_const_vector);
> +    }
> +  else if ((mode == V4SImode) || (mode == V4SFmode))
> +    {
> +      rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (4));
> +      int i;
> +      for (i = 0; i < 4; i++)
> +	XVECEXP (vals, 0, ((i+2) % 4)) = XVECEXP (const_vector, 0, i);
> +      rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
> +      new_mem = force_const_mem (mode, new_const_vector);
> +    }
> +  else if ((mode == V8HImode)
> +#ifdef HAVE_V8HFmode
> +	   || (mode == V8HFmode)
> +#endif
> +	   )
> +    {
> +      rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8));
> +      int i;
> +      for (i = 0; i < 8; i++)
> +	XVECEXP (vals, 0, ((i+4) % 8)) = XVECEXP (const_vector, 0, i);
> +      rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
> +      new_mem = force_const_mem (mode, new_const_vector);
> +    }
> +  else if ((mode == V2DImode) || (mode == V2DFmode))
> +    {
> +      rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (2));
> +      int i;
> +      /* Sometimes, load of a V1TImode vector is represented as a load
> +	 of two double words with a swap on top of it.  */
> +      for (i = 0; i < 2; i++)
> +	XVECEXP (vals, 0, ((i+1) % 2)) = XVECEXP (const_vector, 0, i);
> +      rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
> +      new_mem = force_const_mem (mode, new_const_vector);
> +    }
> +  else
> +    {
> +      /* We do not expect other modes to be constant-load-swapped.  */
> +      gcc_assert (0);

gcc_unreachable ();

> +    }
> +
> +  /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
> +     can't recognize.  Force the SYMBOL_REF into a register.  */
> +  if (!REG_P (XEXP (new_mem, 0))) {
> +    rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
> +    XEXP (new_mem, 0) = base_reg;
> +    /* Move the newly created insn ahead of the load insn.  */
> +    rtx_insn *force_insn = get_last_insn ();
> +    remove_insn (force_insn);
> +    rtx_insn *before_load_insn = PREV_INSN (load_insn);
> +    add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
> +    df_insn_rescan (before_load_insn);
> +    df_insn_rescan (force_insn);
> +  }

Maybe do something with whitelines here to make it easier to read?  The
whole function I mean.

> +  {

Why the new block here?

> +    unsigned e = get_max_uid (), i;

Don't mix initialiased and uninitialised vars like this; you can just
declare "i" in the loop below.

> +    for (i = 0; i < e; ++i)

> +      {

And this block isn't needed either :-)

> +	if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load)
> +	  {
> +	    insn = pass2_insn_entry[i].insn;
> +	    if (const_load_sequence_p (pass2_insn_entry, insn))
> +	      replace_swapped_load_constant (pass2_insn_entry, insn);
> +	  }
> +      }
> +  }

So you'll get

  unsigned e = get_max_uid ();
  for (unsigned i = 0; i < e; ++i)
    if (pass2_insn_entry[i].is_swap && !pass2_insn_entry[i].is_load)
      {
	insn = pass2_insn_entry[i].insn;
	if (const_load_sequence_p (pass2_insn_entry, insn))
	  replace_swapped_load_constant (pass2_insn_entry, insn);
      }

(there is no real benefit using a temp var for "e" here probably, I
suspect the compiler can figure out it doesn't change.  But not sure).

> --- gcc/testsuite/gcc.target/powerpc/swaps-p8-28.c	(revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/swaps-p8-28.c	(working copy)
> @@ -0,0 +1,30 @@
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-do run { target { powerpc64*-*-* } } } */

powerpc*-*-*  or  powerpc*-*-* && lp64

> --- gcc/testsuite/gcc.target/powerpc/swps-p8-30.c	(revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/swps-p8-30.c	(working copy)
> @@ -0,0 +1,36 @@
> +/* This file's name was changed from swaps-p8-30.c so that I could
> +   search the assembler for "not swap", since swap is an alias for
> +   xxpermdi.  With the original file name, the assembler search would
> +   get a false positive on the name of the file.  */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-do compile { target { powerpc64le-*-* } } } */

This isn't really the correct way for testing for LE, but we don't have
anything better currently.  Okay.


Some of those comments apply to more than one spot.  Please adjust the
code to Bill's suggrestions (if any) too, and repost.

Looks like a nice improvement :-)


Segher


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]