This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Optimize vector init constructor


For vector init constructor:

---
typedef float __v4sf __attribute__ ((__vector_size__ (16)));

__v4sf
foo (__v4sf x, float f)
{
  __v4sf y = { f, x[1], x[2], x[3] };
  return y;
}
---

we can optimize vector init constructor with vector copy or permute
followed by a single scalar insert:

  __v4sf D.1912;
  __v4sf D.1913;
  __v4sf D.1914;
  __v4sf y;

  x.0_1 = x;
  D.1912 = x.0_1;
  _2 = D.1912;
  D.1913 = _2;
  BIT_FIELD_REF <D.1913, 32, 0> = f;
  y = D.1913;
  D.1914 = y;
  return D.1914;

instead of

  __v4sf D.1962;
  __v4sf y;

  _1 = BIT_FIELD_REF <x, 32, 32>;
  _2 = BIT_FIELD_REF <x, 32, 64>;
  _3 = BIT_FIELD_REF <x, 32, 96>;
  y = {f, _1, _2, _3};
  D.1962 = y;
  return D.1962;

gcc/

	PR tree-optimization/88828
	* gimplify.c (gimplify_init_constructor): Optimize vector init
	constructor with vector copy or permute followed by a single
	scalar insert.

gcc/testsuite/

	PR tree-optimization/88828
	* gcc.target/i386/pr88828-1.c: New test.
	* gcc.target/i386/pr88828-2.c: Likewise.
	* gcc.target/i386/pr88828-3a.c: Likewise.
	* gcc.target/i386/pr88828-3b.c: Likewise.
	* gcc.target/i386/pr88828-4a.c: Likewise.
	* gcc.target/i386/pr88828-4b.c: Likewise.
	* gcc.target/i386/pr88828-5a.c: Likewise.
	* gcc.target/i386/pr88828-5b.c: Likewise.
	* gcc.target/i386/pr88828-6a.c: Likewise.
	* gcc.target/i386/pr88828-6b.c: Likewise.
---
 gcc/gimplify.c                             | 176 +++++++++++++++++++--
 gcc/testsuite/gcc.target/i386/pr88828-1.c  |  16 ++
 gcc/testsuite/gcc.target/i386/pr88828-2.c  |  17 ++
 gcc/testsuite/gcc.target/i386/pr88828-3a.c |  16 ++
 gcc/testsuite/gcc.target/i386/pr88828-3b.c |  18 +++
 gcc/testsuite/gcc.target/i386/pr88828-4a.c |  17 ++
 gcc/testsuite/gcc.target/i386/pr88828-4b.c |  20 +++
 gcc/testsuite/gcc.target/i386/pr88828-5a.c |  16 ++
 gcc/testsuite/gcc.target/i386/pr88828-5b.c |  18 +++
 gcc/testsuite/gcc.target/i386/pr88828-6a.c |  17 ++
 gcc/testsuite/gcc.target/i386/pr88828-6b.c |  19 +++
 11 files changed, 336 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-4a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-4b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-5a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-5b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-6a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr88828-6b.c

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 983635ba21f..893a4311f9e 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -5082,22 +5082,170 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	    TREE_CONSTANT (ctor) = 0;
 	  }
 
-	/* Vector types use CONSTRUCTOR all the way through gimple
-	   compilation as a general initializer.  */
-	FOR_EACH_VEC_SAFE_ELT (elts, ix, ce)
+	tree rhs_vector = NULL;
+	/* The vector element to replace scalar elements, which
+	   will be overridden by scalar insert.  */
+	tree vector_element = NULL;
+	/* The single scalar element.  */
+	tree scalar_element = NULL;
+	unsigned int scalar_idx = 0;
+	enum { unknown, copy, permute, init } operation = unknown;
+	bool insert = false;
+
+	/* Check if we can generate vector copy or permute followed by
+	   a single scalar insert.  */
+	if (TYPE_VECTOR_SUBPARTS (type).is_constant ())
 	  {
-	    enum gimplify_status tret;
-	    tret = gimplify_expr (&ce->value, pre_p, post_p, is_gimple_val,
-				  fb_rvalue);
-	    if (tret == GS_ERROR)
-	      ret = GS_ERROR;
-	    else if (TREE_STATIC (ctor)
-		     && !initializer_constant_valid_p (ce->value,
-						       TREE_TYPE (ce->value)))
-	      TREE_STATIC (ctor) = 0;
+	    /* If all RHS vector elements come from the same vector,
+	       we can use permute.  If all RHS vector elements come
+	       from the same vector in the same order, we can use
+	       copy.  */
+	    unsigned int nunits
+	      = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+	    unsigned int nscalars = 0;
+	    unsigned int nvectors = 0;
+	    operation = unknown;
+	    FOR_EACH_VEC_SAFE_ELT (elts, ix, ce)
+	      if (TREE_CODE (ce->value) == ARRAY_REF
+		  || TREE_CODE (ce->value) == ARRAY_RANGE_REF)
+		{
+		  if (!vector_element)
+		    vector_element = ce->value;
+		  /* Get the vector index.  */
+		  tree idx = TREE_OPERAND (ce->value, 1);
+		  if (TREE_CODE (idx) == INTEGER_CST)
+		    {
+		      /* Get the RHS vector.  */
+		      tree r = ce->value;
+		      while (handled_component_p (r))
+			r = TREE_OPERAND (r, 0);
+		      if (type == TREE_TYPE (r))
+			{
+			  /* The RHS vector has the same type as
+			     LHS.  */
+			  if (rhs_vector == NULL)
+			    rhs_vector = r;
+
+			  /* Check if all RHS vector elements come
+			     fome the same vector.  */
+			  if (rhs_vector == r)
+			    {
+			      nvectors++;
+			      if (TREE_INT_CST_LOW (idx) == ix
+				  && (operation == unknown
+				      || operation == copy))
+				operation = copy;
+			      else
+				operation = permute;
+			      continue;
+			    }
+			}
+		    }
+
+		  /* Otherwise, use vector init.  */
+		  break;
+		}
+	      else if (TREE_CODE (TYPE_SIZE (TREE_TYPE (ce->value)))
+		       == INTEGER_CST)
+		{
+		  /* Only allow one single scalar insert.  */
+		  if (nscalars != 0)
+		    break;
+		  nscalars = 1;
+		  insert = true;
+		  scalar_idx = ix;
+		  scalar_element = ce->value;
+		}
+
+	    /* Allow a single scalar insert with vector copy or
+	       vector permute.  Vector copy without insert is OK.  */
+	    if (nunits != (nscalars + nvectors)
+		|| (nscalars == 0 && operation != copy))
+	      operation = unknown;
+	  }
+
+	if (operation == unknown)
+	  {
+	    /* Default to the regular vector init constructor.  */
+	    operation = init;
+	    insert = false;
+	  }
+
+	if (operation == copy)
+	  {
+	    /* Generate a vector copy.  */
+	    tree var = create_tmp_var (type);
+	    if (gimplify_expr (&rhs_vector, pre_p, post_p,
+			       is_gimple_val, fb_rvalue) == GS_ERROR)
+	      {
+		ret = GS_ERROR;
+		break;
+	      }
+	    gassign *init = gimple_build_assign (var, rhs_vector);
+	    gimple_seq_add_stmt (pre_p, init);
+	    if (gimplify_expr (&var, pre_p, post_p, is_gimple_val,
+			       fb_rvalue) == GS_ERROR)
+	      {
+		ret = GS_ERROR;
+		break;
+	      }
+	    /* Replace RHS with the vector copy.  */
+	    if (!is_gimple_reg (TREE_OPERAND (*expr_p, 0)))
+	      TREE_OPERAND (*expr_p, 1) = get_formal_tmp_var (var, pre_p);
+	    else
+	      TREE_OPERAND (*expr_p, 1) = var;
+	  }
+	else
+	  {
+	    /* Prepare for vector permute by replacing the scalar
+	       element with the vector one.  */
+	    if (operation == permute)
+	      (elts->address())[scalar_idx].value = vector_element;
+
+	    /* Vector types use CONSTRUCTOR all the way through gimple
+	       compilation as a general initializer.  */
+	    FOR_EACH_VEC_SAFE_ELT (elts, ix, ce)
+	      {
+		enum gimplify_status tret;
+		tret = gimplify_expr (&ce->value, pre_p, post_p,
+				      is_gimple_val,
+				      fb_rvalue);
+		if (tret == GS_ERROR)
+		  ret = GS_ERROR;
+		else if (TREE_STATIC (ctor)
+			 && !initializer_constant_valid_p (ce->value,
+							   TREE_TYPE (ce->value)))
+		  TREE_STATIC (ctor) = 0;
+	      }
+	    if (!is_gimple_reg (TREE_OPERAND (*expr_p, 0)))
+	      TREE_OPERAND (*expr_p, 1) = get_formal_tmp_var (ctor, pre_p);
+	  }
+
+	if (insert)
+	  {
+	    /* Generate a single scalar insert after vector copy or
+	       permute.  */
+	    tree rhs = TREE_OPERAND (*expr_p, 1);
+	    tree var = create_tmp_var (type);
+	    gassign *init = gimple_build_assign (var, rhs);
+	    gimple_seq_add_stmt (pre_p, init);
+	    if (gimplify_expr (&scalar_element, pre_p, post_p,
+			       is_gimple_val, fb_rvalue) == GS_ERROR)
+	      {
+		ret = GS_ERROR;
+		break;
+	      }
+	    tree scalar_type = TREE_TYPE (scalar_element);
+	    tree scalar_size = TYPE_SIZE (scalar_type);
+	    tree bitpos = bitsize_int (scalar_idx
+				       * TREE_INT_CST_LOW (scalar_size));
+	    tree ref = build3_loc (EXPR_LOCATION (rhs), BIT_FIELD_REF,
+				   scalar_type, var, scalar_size,
+				   bitpos);
+	    init = gimple_build_assign (ref, scalar_element);
+	    gimplify_seq_add_stmt (pre_p, init);
+	    TREE_OPERAND (*expr_p, 1) = var;
 	  }
-	if (!is_gimple_reg (TREE_OPERAND (*expr_p, 0)))
-	  TREE_OPERAND (*expr_p, 1) = get_formal_tmp_var (ctor, pre_p);
       }
       break;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1.c b/gcc/testsuite/gcc.target/i386/pr88828-1.c
new file mode 100644
index 00000000000..4ef1feab389
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { f, x[1], x[2], x[3] };
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-2.c b/gcc/testsuite/gcc.target/i386/pr88828-2.c
new file mode 100644
index 00000000000..6dc482b6f4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = x;
+  y[0] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-3a.c b/gcc/testsuite/gcc.target/i386/pr88828-3a.c
new file mode 100644
index 00000000000..97eb8e7162a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-3a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { f, x[0], x[2], x[3] };
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-3b.c b/gcc/testsuite/gcc.target/i386/pr88828-3b.c
new file mode 100644
index 00000000000..ab2ba730716
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-3b.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { f, x[0], x[2], x[3] };
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4a.c b/gcc/testsuite/gcc.target/i386/pr88828-4a.c
new file mode 100644
index 00000000000..a54689be701
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-4a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], x[1] };
+  y[0] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4b.c b/gcc/testsuite/gcc.target/i386/pr88828-4b.c
new file mode 100644
index 00000000000..0c3a1024d93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-4b.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], x[1] };
+  y[0] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5a.c b/gcc/testsuite/gcc.target/i386/pr88828-5a.c
new file mode 100644
index 00000000000..534808d3cd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-5a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 2 } } */
+/* { dg-final { scan-assembler-times "movaps" 1 } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], f };
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5b.c b/gcc/testsuite/gcc.target/i386/pr88828-5b.c
new file mode 100644
index 00000000000..aebea790979
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-5b.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], f };
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-6a.c b/gcc/testsuite/gcc.target/i386/pr88828-6a.c
new file mode 100644
index 00000000000..d43a36d9137
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-6a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 2 } } */
+/* { dg-final { scan-assembler-times "movaps" 1 } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], x[0] };
+  y[3] = f;
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr88828-6b.c b/gcc/testsuite/gcc.target/i386/pr88828-6b.c
new file mode 100644
index 00000000000..6856fe6500e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88828-6b.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo (__v4sf x, float f)
+{
+  __v4sf y = { x[0], x[2], x[3], x[0] };
+  y[3] = f;
+  return y;
+}
-- 
2.20.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]