This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] [AArch64] PR target/71663 Improve Vector Initializtion


Hi James,

Thanks for your review and useful comments.

>> If you could try to keep one reply chain for each patch series
Will keep that in mind for sure :-)

>> Very minor, but what is wrong with:
>> int matches[16][2] = {0};
Done.

>> nummatches is unused.
Removed.

>> This search algorithm is tough to follow
Updated as per your comments.

>> Put braces round this and write it as two statements
Done.

>> Move your new code above the part-variable case.
Done.

>> c is unused.
Removed.

Bootstrapped and Regression tested on aarch64-thunder-linux.

Please review the patch and let us know if any comments or suggestions.

Thanks,
Naveen
    
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index bce490f..239ba72 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11707,6 +11707,57 @@ aarch64_expand_vector_init (rtx target, rtx vals)
       return;
     }
 
+  enum insn_code icode = optab_handler (vec_set_optab, mode);
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  /* If there are only variable elements, try to optimize
+     the insertion using dup for the most common element
+     followed by insertions.  */
+
+  /* The algorithm will fill matches[*][0] with the earliest matching element,
+     and matches[X][1] with the count of duplicate elements (if X is the
+     earliest element which has duplicates).  */
+
+  if (n_var == n_elts && n_elts <= 16)
+    {
+      int matches[16][2] = {0};
+      for (int i = 0; i < n_elts; i++)
+	{
+	  for (int j = 0; j <= i; j++)
+	    {
+	      if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
+		{
+		  matches[i][0] = j;
+		  matches[j][1]++;
+		  break;
+		}
+	    }
+	}
+      int maxelement = 0;
+      int maxv = 0;
+      for (int i = 0; i < n_elts; i++)
+	if (matches[i][1] > maxv)
+	  {
+	    maxelement = i;
+	    maxv = matches[i][1];
+	  }
+
+      /* Create a duplicate of the most common element.  */
+      rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
+      aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
+
+      /* Insert the rest.  */
+      for (int i = 0; i < n_elts; i++)
+	{
+	  rtx x = XVECEXP (vals, 0, i);
+	  if (matches[i][0] == maxelement)
+	    continue;
+	  x = copy_to_mode_reg (inner_mode, x);
+	  emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
+	}
+      return;
+    }
+
   /* Initialise a vector which is part-variable.  We want to first try
      to build those lanes which are constant in the most efficient way we
      can.  */
@@ -11740,10 +11791,6 @@ aarch64_expand_vector_init (rtx target, rtx vals)
     }
 
   /* Insert the variable lanes directly.  */
-
-  enum insn_code icode = optab_handler (vec_set_optab, mode);
-  gcc_assert (icode != CODE_FOR_nothing);
-
   for (int i = 0; i < n_elts; i++)
     {
       rtx x = XVECEXP (vals, 0, i);
diff --git a/gcc/testsuite/gcc.target/aarch64/pr71663.c b/gcc/testsuite/gcc.target/aarch64/pr71663.c
new file mode 100644
index 0000000..65f368d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr71663.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#define vector __attribute__((vector_size(16)))
+
+vector float combine (float a, float b, float d)
+{
+  return (vector float) { a, b, a, d };
+}
+
+/* { dg-final { scan-assembler-not "movi\t" } } */
+/* { dg-final { scan-assembler-not "orr\t" } } */
+/* { dg-final { scan-assembler-times "ins\t" 2 } } */
+/* { dg-final { scan-assembler-times "dup\t" 1 } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]