This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[AVX]: Add ix86_expand_vector_init_interleave


I am checking this patch to AVX branch to optimize _mm256_set_epiXX.


H.J.
----
2008-05-18  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.c (ix86_expand_vector_init_interleave): New.
	(ix86_expand_vector_init_general): Use it.  Assert word_mode
	== SImode when n_words == 4.

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 135502)
+++ config/i386/i386.c	(working copy)
@@ -25171,6 +25171,117 @@ half:
     }
 }
 
+/* A subroutine of ix86_expand_vector_init_general.  Use vector
+   interleave to handle the most general case: all values variable,
+   and none identical.  */
+
+static void
+ix86_expand_vector_init_interleave (enum machine_mode mode,
+				    rtx target, rtx *ops, int n)
+{
+  enum machine_mode first_imode, second_imode, third_imode;
+  int i, j;
+  rtx op0, op1;
+  rtx (*gen_load_even) (rtx, rtx, rtx);
+  rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
+  rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
+  
+  switch (mode)
+    {
+    case V8HImode:
+      gen_load_even = gen_vec_setv8hi;
+      gen_interleave_first_low = gen_vec_interleave_lowv4si;
+      gen_interleave_second_low = gen_vec_interleave_lowv2di;
+      first_imode = V4SImode;
+      second_imode = V2DImode;
+      third_imode = VOIDmode;
+      break;
+    case V16QImode:
+      gen_load_even = gen_vec_setv16qi;
+      gen_interleave_first_low = gen_vec_interleave_lowv8hi;
+      gen_interleave_second_low = gen_vec_interleave_lowv4si;
+      first_imode = V8HImode;
+      second_imode = V4SImode;
+      third_imode = V2DImode;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+     
+  for (i = 0; i < n; i++)
+    {
+      /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
+      op0 = gen_reg_rtx (SImode);
+      emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
+
+      /* Insert the SImode value as low element of V4SImode vector. */
+      op1 = gen_reg_rtx (V4SImode);
+      op0 = gen_rtx_VEC_MERGE (V4SImode,
+			       gen_rtx_VEC_DUPLICATE (V4SImode,
+						      op0),
+			       CONST0_RTX (V4SImode),
+			       const1_rtx);
+      emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
+
+      /* Cast the V4SImode vector back to a vector in orignal mode.  */
+      op0 = gen_reg_rtx (mode);
+      emit_move_insn (op0, gen_lowpart (mode, op1));
+      
+      /* Load even elements into the second positon.  */
+      emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
+				   const1_rtx));
+
+      /* Cast vector to FIRST_IMODE vector.  */
+      ops[i] = gen_reg_rtx (first_imode);
+      emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
+    }
+
+  /* Interleave low FIRST_IMODE vectors.  */
+  for (i = j = 0; i < n; i += 2, j++)
+    {
+      op0 = gen_reg_rtx (first_imode);
+      emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
+
+      /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
+      ops[j] = gen_reg_rtx (second_imode);
+      emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
+    }
+
+  /* Interleave low SECOND_IMODE vectors.  */
+  switch (second_imode)
+    {
+    case V4SImode:
+      for (i = j = 0; i < n / 2; i += 2, j++)
+	{
+	  op0 = gen_reg_rtx (second_imode);
+	  emit_insn ((*gen_interleave_second_low) (op0, ops[i],
+						   ops[i + 1]));
+
+	  /* Cast the SECOND_IMODE vector to the THIRD_IMODE
+	     vector.  */
+	  ops[j] = gen_reg_rtx (third_imode);
+	  emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
+	}
+      second_imode = V2DImode;
+      gen_interleave_second_low = gen_vec_interleave_lowv2di;
+      /* FALLTHRU */
+
+    case V2DImode:
+      op0 = gen_reg_rtx (second_imode);
+      emit_insn ((*gen_interleave_second_low) (op0, ops[0],
+					       ops[1]));
+
+      /* Cast the SECOND_IMODE vector back to a vector on original
+	 mode.  */
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_lowpart (mode, op0)));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
    all values variable, and none identical.  */
 
@@ -25178,9 +25289,9 @@ static void
 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
 				 rtx target, rtx vals)
 {
-  rtx ops[8], op0, op1;
-  int n = 2;
-  unsigned int i, j;
+  rtx ops[32], op0, op1;
+  enum machine_mode half_mode = VOIDmode;
+  int n, i;
 
   switch (mode)
     {
@@ -25188,167 +25299,75 @@ ix86_expand_vector_init_general (bool mm
     case V2SImode:
       if (!mmx_ok && !TARGET_SSE)
 	break;
-      goto vec_concat2;
 
-    case V8SFmode:
-    case V8SImode:
-      ops[7] = XVECEXP (vals, 0, 7);
-      ops[6] = XVECEXP (vals, 0, 6);
-      ops[5] = XVECEXP (vals, 0, 5);
-      ops[4] = XVECEXP (vals, 0, 4);
-      n += 4;
-      /* FALLTHRU */
+      n = 2;
+      goto vec_concat;
 
     case V4DFmode:
     case V4DImode:
     case V4SFmode:
     case V4SImode:
-      ops[3] = XVECEXP (vals, 0, 3);
-      ops[2] = XVECEXP (vals, 0, 2);
-      n += 2;
-      /* FALLTHRU */
+      n = 4;
+      goto vec_concat;
 
     case V2DFmode:
     case V2DImode:
-vec_concat2:
-      ops[1] = XVECEXP (vals, 0, 1);
-      ops[0] = XVECEXP (vals, 0, 0);
+      n = 2;
+      goto vec_concat;
+
+    case V8SFmode:
+    case V8SImode:
+      n = 8;
+      goto vec_concat;
+
+vec_concat:
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
       ix86_expand_vector_init_concat (mode, target, ops, n);
       return;
 
-    case V8HImode:
-      if (TARGET_SSE2)
-	{
-	  for (i = 0; i < 4; i++)
-	    {
-	      /* Extend the odd elment from HImode to SImode using
-		 a paradoxical SUBREG.  */
-	      op0 = gen_reg_rtx (SImode);
-	      emit_move_insn (op0, gen_lowpart (SImode,
-						XVECEXP (vals, 0,
-							 i + i)));
-
-	      /* Insert the SImode value as low element of V4SImode
-		 vector. */
-	      op1 = gen_reg_rtx (V4SImode);
-	      op0 = gen_rtx_VEC_MERGE (V4SImode,
-				       gen_rtx_VEC_DUPLICATE (V4SImode,
-							      op0),
-				       CONST0_RTX (V4SImode),
-				       const1_rtx);
-	      emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
-
-	      /* Cast the V4SImode vector back to a V8HImode vector.  */
-	      op0 = gen_reg_rtx (mode);
-	      emit_move_insn (op0, gen_lowpart (mode, op1));
-
-	      /* Load even HI elements into the second positon.  */
-	      emit_insn (gen_vec_setv8hi (op0, XVECEXP (vals, 0,
-							i + i + 1),
-					  const1_rtx));
-
-	      /* Cast V8HImode vector to V4SImode vector.  */
-	      ops[i] = gen_reg_rtx (V4SImode);
-	      emit_move_insn (ops[i], gen_lowpart (V4SImode, op0));
-	    }
+    case V32QImode:
+      half_mode = V16QImode;
+      n = 32;
+      goto half;
 
-	  /* Interleave low V4SIs.  */
-	  for (i = j = 0; i < 4; i += 2, j++)
-	    {
-	      op0 = gen_reg_rtx (V4SImode);
-	      emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
-						     ops[i + 1]));
-
-	      /* Cast V4SImode vectors to V2DImode vectors.  */
-	      op1 = gen_reg_rtx (V2DImode);
-	      emit_move_insn (op1, gen_lowpart (V2DImode, op0));
-	      ops[j] = op1;
-	    }
+    case V16HImode:
+      half_mode = V8HImode;
+      n = 16;
 
-	  /* Interleave low V2DIs.  */
-	  op0 = gen_reg_rtx (V2DImode);
-	  emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
-
-	  /* Cast the V2DImode vector back to a V8HImode vector.  */
-	  emit_insn (gen_rtx_SET (VOIDmode, target,
-				  gen_lowpart (mode, op0)));
-	  return;
-	}
+half:
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      op0 = gen_reg_rtx (half_mode);
+      op1 = gen_reg_rtx (half_mode);
+      ix86_expand_vector_init_interleave (half_mode, op0, ops,
+					  n >> 2);
+      ix86_expand_vector_init_interleave (half_mode, op1,
+					  &ops [n >> 1], n >> 2);
+      emit_insn (gen_rtx_SET (VOIDmode, target,
+			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
+      return;
 
     case V16QImode:
-      if (TARGET_SSE4_1)
-	{
-	  for (i = 0; i < 8; i++)
-	    {
-	      /* Extend the odd elment from QImode to SImode using
-		 a paradoxical SUBREG.  */
-	      op0 = gen_reg_rtx (SImode);
-	      emit_move_insn (op0, gen_lowpart (SImode,
-						XVECEXP (vals, 0,
-							 i + i)));
-
-	      /* Insert the SImode value as low element of V4SImode
-		 vector. */
-	      op1 = gen_reg_rtx (V4SImode);
-	      op0 = gen_rtx_VEC_MERGE (V4SImode,
-				       gen_rtx_VEC_DUPLICATE (V4SImode,
-							      op0),
-				       CONST0_RTX (V4SImode),
-				       const1_rtx);
-	      emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
-
-	      /* Cast the V4SImode vector back to a V16QImode vector.  */
-	      op0 = gen_reg_rtx (mode);
-	      emit_move_insn (op0, gen_lowpart (mode, op1));
-
-	      /* Load even QI elements into the second positon.  */
-	      emit_insn (gen_vec_setv16qi (op0, XVECEXP (vals, 0,
-							 i + i + 1),
-					   const1_rtx));
-
-	      /* Cast V16QImode vector to V8HImode vector.  */
-	      ops[i] = gen_reg_rtx (V8HImode);
-	      emit_move_insn (ops[i], gen_lowpart (V8HImode, op0));
-	    }
+      if (!TARGET_SSE4_1)
+	break;
 
-	  /* Interleave low V8HIs.  */
-	  for (i = j = 0; i < 8; i += 2, j++)
-	    {
-	      op0 = gen_reg_rtx (V8HImode);
-	      emit_insn (gen_vec_interleave_lowv8hi (op0, ops[i],
-						     ops[i + 1]));
-
-	      /* Cast V8HImode vector to V4SImode vector.  */
-	      op1 = gen_reg_rtx (V4SImode);
-	      emit_move_insn (op1, gen_lowpart (V4SImode, op0));
-	      ops[j] = op1;
-	    }
+      n = 16;
+      goto vec_interleave;
 
-	  /* Interleave low V4SIs.  */
-	  for (i = j = 0; i < 8 / 2; i += 2, j++)
-	    {
-	      op0 = gen_reg_rtx (V4SImode);
-	      emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
-						     ops[i + 1]));
-
-	      /* Cast V4SImode vectors to V2DImode vectors.  */
-	      op1 = gen_reg_rtx (V2DImode);
-	      emit_move_insn (op1, gen_lowpart (V2DImode, op0));
-	      ops[j] = op1;
-	    }
+    case V8HImode:
+      if (!TARGET_SSE2)
+	break;
 
-	  /* Interleave low V2DIs.  */
-	  op0 = gen_reg_rtx (V2DImode);
-	  emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
-
-	  /* Cast the V2DImode vector back to a V8HImode vector.  */
-	  emit_insn (gen_rtx_SET (VOIDmode, target,
-				  gen_lowpart (mode, op0)));
-	  return;
-	}
+      n = 8;
+      goto vec_interleave;
+
+vec_interleave:
+      for (i = 0; i < n; i++)
+	ops[i] = XVECEXP (vals, 0, i);
+      ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
+      return;
 
-    case V16HImode:
-    case V32QImode:
     case V4HImode:
     case V8QImode:
       break;
@@ -25401,32 +25420,12 @@ vec_concat2:
 	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
 	  emit_move_insn (target, tmp);
 	}
-      else if (n_words == 4 || n_words == 8)
+      else if (n_words == 4)
 	{
-	  rtx tmp;
-	  enum machine_mode vector_mode;
-	  
-	  switch (word_mode)
-	    {
-	    case DImode:
-	      gcc_assert (n_words == 4);
-	      vector_mode = V4DImode;
-	      break;
-	    case SImode:
-	      if (n_words == 4)
-		vector_mode = V4SImode;
-	      else
-		vector_mode = V8SImode;
-	      break;
-	    default:
-	      gcc_unreachable ();
-	    }
-
-	  tmp = gen_reg_rtx (vector_mode);
-	  vals = gen_rtx_PARALLEL (vector_mode,
-				   gen_rtvec_v (n_words, words));
-	  ix86_expand_vector_init_general (false, vector_mode, tmp,
-					   vals);
+	  rtx tmp = gen_reg_rtx (V4SImode);
+	  gcc_assert (word_mode == SImode);
+	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
+	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
 	  emit_move_insn (target, gen_lowpart (mode, tmp));
 	}
       else


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]