This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

cleanup sse2 v2df part insert/extract


I need these new patterns for the misaligned load support.

I tried to keep the cleanup as minimal as possible.  It was difficult;
I think I wanna rewrite the entire vector support for 4.1...  :-(

Tested on i686 and x86_64.


r~


        * config/i386/emmintrin.h (_mm_loadh_pd): Don't cast pointer arg
        to __v2si.
        (_mm_storeh_pd, _mm_loadl_pd, _mm_storel_pd): Likewise.
        * config/i386/i386.c (ix86_init_mmx_sse_builtins): Use double* or
        const double* for __builtin_ia32_loadhpd, __builtin_ia32_loadlpd,
        __builtin_ia32_storehpd, __builtin_ia32_storelpd.
        (ix86_expand_builtin): Update to match.
        (ix86_expand_vector_init): Use sse2_loadlpd.
        * config/i386/i386.md (vec_setv2df): Use sse2_loadlpd, sse2_loadhpd.
        (vec_extractv2df): Use sse2_storelpd, sse2_storehpd.
        (sse2_storehpd, sse2_loadhpd, sse2_storelpd, sse2_loadlpd): New.
        (sse2_movhpd): Remove.

Index: config/i386/emmintrin.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/emmintrin.h,v
retrieving revision 1.7
diff -u -p -u -r1.7 emmintrin.h
--- config/i386/emmintrin.h	20 Dec 2004 10:55:11 -0000	1.7
+++ config/i386/emmintrin.h	23 Dec 2004 07:44:19 -0000
@@ -937,25 +937,25 @@ _mm_unpacklo_pd (__m128d __A, __m128d __
 static __inline __m128d
 _mm_loadh_pd (__m128d __A, double const *__B)
 {
-  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B);
+  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
 }
 
 static __inline void
 _mm_storeh_pd (double *__A, __m128d __B)
 {
-  __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B);
+  __builtin_ia32_storehpd (__A, (__v2df)__B);
 }
 
 static __inline __m128d
 _mm_loadl_pd (__m128d __A, double const *__B)
 {
-  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B);
+  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
 }
 
 static __inline void
 _mm_storel_pd (double *__A, __m128d __B)
 {
-  __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B);
+  __builtin_ia32_storelpd (__A, (__v2df)__B);
 }
 
 static __inline int
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.761
diff -u -p -u -r1.761 i386.c
--- config/i386/i386.c	23 Dec 2004 03:49:01 -0000	1.761
+++ config/i386/i386.c	23 Dec 2004 07:44:20 -0000
@@ -12551,12 +12551,9 @@ ix86_init_mmx_sse_builtins (void)
 				V2DF_type_node, V2DF_type_node,
 				integer_type_node,
 				NULL_TREE);
-  tree v2df_ftype_v2df_pv2si
+  tree v2df_ftype_v2df_pcdouble
     = build_function_type_list (V2DF_type_node,
-				V2DF_type_node, pv2si_type_node, NULL_TREE);
-  tree void_ftype_pv2si_v2df
-    = build_function_type_list (void_type_node,
-				pv2si_type_node, V2DF_type_node, NULL_TREE);
+				V2DF_type_node, pcdouble_type_node, NULL_TREE);
   tree void_ftype_pdouble_v2df
     = build_function_type_list (void_type_node,
 				pdouble_type_node, V2DF_type_node, NULL_TREE);
@@ -12858,10 +12855,10 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
   def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
 
-  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
-  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
-  def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
-  def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREHPD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORELPD);
 
   def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
   def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
@@ -13405,8 +13402,8 @@ ix86_expand_builtin (tree exp, rtx targe
     case IX86_BUILTIN_LOADLPD:
       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
 	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
-	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
-	       : CODE_FOR_sse2_movsd);
+	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
+	       : CODE_FOR_sse2_loadlpd);
       arg0 = TREE_VALUE (arglist);
       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
@@ -13430,12 +13427,8 @@ ix86_expand_builtin (tree exp, rtx targe
 
     case IX86_BUILTIN_STOREHPS:
     case IX86_BUILTIN_STORELPS:
-    case IX86_BUILTIN_STOREHPD:
-    case IX86_BUILTIN_STORELPD:
       icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
-	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
-	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
-	       : CODE_FOR_sse2_movsd);
+	       : CODE_FOR_sse_movlps);
       arg0 = TREE_VALUE (arglist);
       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
@@ -13451,7 +13444,28 @@ ix86_expand_builtin (tree exp, rtx targe
       if (! pat)
 	return 0;
       emit_insn (pat);
-      return 0;
+      return const0_rtx;
+
+    case IX86_BUILTIN_STOREHPD:
+    case IX86_BUILTIN_STORELPD:
+      icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
+	       : CODE_FOR_sse2_storelpd);
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      mode0 = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+
+      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+      pat = GEN_FCN (icode) (op0, op1);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return const0_rtx;
 
     case IX86_BUILTIN_MOVNTPS:
       return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
@@ -15189,24 +15203,29 @@ ix86_expand_vector_init (rtx target, rtx
 
   /* ... values where only first field is non-constant are best loaded
      from the pool and overwritten via move later.  */
-  if (!i)
+  if (i == 0)
     {
-      rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
-				    GET_MODE_INNER (mode), 0);
-
-      op = force_reg (mode, op);
       XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+
       switch (GET_MODE (target))
 	{
-	  case V2DFmode:
-	    emit_insn (gen_sse2_movsd (target, target, op));
-	    break;
-	  case V4SFmode:
+	case V2DFmode:
+	  emit_insn (gen_sse2_loadlpd (target, target, XVECEXP (vals, 0, 0)));
+	  break;
+
+	case V4SFmode:
+	  {
+	    /* ??? We can represent this better.  */
+	    rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
+				          GET_MODE_INNER (mode), 0);
+	    op = force_reg (mode, op);
 	    emit_insn (gen_sse_movss (target, target, op));
-	    break;
-	  default:
-	    break;
+	  }
+	  break;
+
+	default:
+	  break;
 	}
       return;
     }
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.590
diff -u -p -u -r1.590 i386.md
--- config/i386/i386.md	23 Dec 2004 07:23:31 -0000	1.590
+++ config/i386/i386.md	23 Dec 2004 07:44:21 -0000
@@ -4734,16 +4734,10 @@
   switch (INTVAL (operands[2]))
     {
     case 0:
-      emit_insn (gen_sse2_movsd (operands[0], operands[0],
-				 simplify_gen_subreg (V2DFmode, operands[1],
-						      DFmode, 0)));
+      emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1]));
       break;
     case 1:
-      {
-	rtx op1 = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
-
-	emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], op1));
-      }
+      emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1]));
       break;
     default:
       abort ();
@@ -4760,14 +4754,10 @@
   switch (INTVAL (operands[2]))
     {
     case 0:
-      emit_move_insn (operands[0], gen_lowpart (DFmode, operands[1]));
+      emit_insn (gen_sse2_storelpd (operands[0], operands[1]));
       break;
     case 1:
-      {
-	rtx dest = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
-
-	emit_insn (gen_sse2_unpckhpd (dest, operands[1], operands[1]));
-      }
+      emit_insn (gen_sse2_storehpd (operands[0], operands[1]));
       break;
     default:
       abort ();
@@ -23731,17 +23721,103 @@
   [(set_attr "type" "ssemov")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_movhpd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-	(vec_merge:V2DF
-	 (match_operand:V2DF 1 "nonimmediate_operand" "0,0")
-	 (match_operand:V2DF 2 "nonimmediate_operand" "m,x")
-	 (const_int 1)))]
-  "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
-  "movhpd\t{%2, %0|%0, %2}"
+;; Store the high double of the source vector into the double destination.
+(define_insn "sse2_storehpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,Y,Y")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" " Y,0,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_SSE2"
+  "@
+   movhpd\t{%1, %0|%0, %1}
+   unpckhpd\t%0, %0
+   #"
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V2DF")])
 
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "memory_operand" "")
+	  (parallel [(const_int 1)])))]
+  "TARGET_SSE2 && reload_completed"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[0], adjust_address (operands[1], DFmode, 8));
+  DONE;
+})
+
+;; Load the high double of the target vector from the source scalar.
+(define_insn "sse2_loadhpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,o")
+	(vec_concat:V2DF
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+	    (parallel [(const_int 0)]))
+	  (match_operand:DF 2 "nonimmediate_operand"     " m,Y,Y")))]
+  "TARGET_SSE2"
+  "@
+   movhpd\t{%2, %0|%0, %2}
+   unpcklpd\t{%2, %0|%0, %2}
+   #"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+(define_split
+  [(set (match_operand:V2DF 0 "memory_operand" "")
+	(vec_concat:V2DF
+	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+	  (match_operand:DF 1 "register_operand" "")))]
+  "TARGET_SSE2 && reload_completed"
+  [(const_int 0)]
+{
+  emit_move_insn (adjust_address (operands[0], DFmode, 8), operands[1]);
+  DONE;
+})
+
+;; Store the low double of the source vector into the double destination.
+(define_expand "sse2_storelpd"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "")
+	(vec_select:DF
+	  (match_operand:V2DF 1 "nonimmediate_operand" "")
+	  (parallel [(const_int 1)])))]
+  "TARGET_SSE2"
+{
+  operands[1] = gen_lowpart (DFmode, operands[1]);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+;; Load the load double of the target vector from the source scalar.
+(define_insn "sse2_loadlpd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,m")
+	(vec_concat:V2DF
+	  (match_operand:DF 2 "nonimmediate_operand"     " m,Y,Y")
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+	    (parallel [(const_int 1)]))))]
+  "TARGET_SSE2"
+  "@
+   movlpd\t{%2, %0|%0, %2}
+   movsd\t{%2, %0|%0, %2}
+   movlpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2DF")])
+
+;; Merge the low part of the source vector into the low part of the target.
+(define_insn "sse2_movsd"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
+        (vec_merge:V2DF
+         (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0")
+         (match_operand:V2DF 2 "nonimmediate_operand" "x,m,Y")
+         (const_int 2)))]
+  "TARGET_SSE2"
+  "@movsd\t{%2, %0|%0, %2}
+    movlpd\t{%2, %0|%0, %2}
+    movlpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DF,V2DF,V2DF")])
+
 (define_expand "sse2_loadsd"
   [(match_operand:V2DF 0 "register_operand" "")
    (match_operand:DF 1 "memory_operand" "")]
@@ -23763,24 +23839,6 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "DF")])
 
-;; ??? We ought to be using ix86_binary_operator_ok on this pattern, so
-;; that we enforce the whole matching memory thing through combine et al.
-;; But that requires that things be set up properly when invoked via an
-;; intrinsic, which we don't do.  Which leads to instantiate virtual regs
-;; lossage, as seen compiling gcc.dg/i386-sse-2.c for x86_64 at -O0.
-(define_insn "sse2_movsd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
-	(vec_merge:V2DF
-	 (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0")
-	 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,x")
-	 (const_int 2)))]
-  "TARGET_SSE2"
-  "@movsd\t{%2, %0|%0, %2}
-    movlpd\t{%2, %0|%0, %2}
-    movlpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF,V2DF,V2DF")])
-
 (define_insn "sse2_storesd"
   [(set (match_operand:DF 0 "memory_operand" "=m")
 	(vec_select:DF


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]