This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PR 12902: Still problems with unaligned SSE access (V4SF mode)


Richard Henderson wrote:

On Wed, Jan 05, 2005 at 10:23:22AM +0100, Uros Bizjak wrote:


There are still problems with unaligned SSE access. It looks that "sse_movhps" and "sse_movlps" patterns should be broken into load and store part, as it is now case with sse2_loadhpd/sse2_storehpd and sse2_loadlpd/sse2_storelpd



You're right. Testing a patch.


This is my approach to fix this:

2005-01-04 Uros Bizjak <uros@kss-loka.si>

   PR target/12902
   * config/i386/i386.c (ix86_expand_builtin) [IX86_BUILTIN_LOADHPS,
   IX86_BUILTIN_LOADLPS]: Unconditionally copy op0 to register.
   [IX86_BUILTIN_STOREHPS, IX86_BUILTIN_STORELPS]: Unconditionally
   copy op1 to register.
   * config/i386/i386.md (sse_movhps, sse_movlps): Prevent both
   operands in memory or both operands in registers.

Patch is boostrapped, regtested c,c++ and fixes pr12902.

Uros.
Index: i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.767
diff -u -p -r1.767 i386.c
--- i386.c	3 Jan 2005 06:26:49 -0000	1.767
+++ i386.c	5 Jan 2005 12:28:35 -0000
@@ -13506,12 +13506,8 @@ ix86_expand_builtin (tree exp, rtx targe
 
     case IX86_BUILTIN_LOADHPS:
     case IX86_BUILTIN_LOADLPS:
-    case IX86_BUILTIN_LOADHPD:
-    case IX86_BUILTIN_LOADLPD:
       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
-	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
-	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
-	       : CODE_FOR_sse2_loadlpd);
+	       : CODE_FOR_sse_movlps);
       arg0 = TREE_VALUE (arglist);
       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
@@ -13520,9 +13516,9 @@ ix86_expand_builtin (tree exp, rtx targe
       mode0 = insn_data[icode].operand[1].mode;
       mode1 = insn_data[icode].operand[2].mode;
 
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-	op0 = copy_to_mode_reg (mode0, op0);
+      op0 = copy_to_mode_reg (mode0, op0);
       op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
+
       if (target == 0
 	  || GET_MODE (target) != tmode
 	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
@@ -13545,8 +13541,7 @@ ix86_expand_builtin (tree exp, rtx targe
       mode1 = insn_data[icode].operand[2].mode;
 
       op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-	op1 = copy_to_mode_reg (mode1, op1);
+      op1 = copy_to_mode_reg (mode1, op1);
 
       pat = GEN_FCN (icode) (op0, op0, op1);
       if (! pat)
@@ -13554,6 +13549,31 @@ ix86_expand_builtin (tree exp, rtx targe
       emit_insn (pat);
       return const0_rtx;
 
+    case IX86_BUILTIN_LOADHPD:
+    case IX86_BUILTIN_LOADLPD:
+      icode = (fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
+	       : CODE_FOR_sse2_loadlpd);
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
+      if (target == 0
+	  || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+
     case IX86_BUILTIN_STOREHPD:
     case IX86_BUILTIN_STORELPD:
       icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
Index: i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.599
diff -u -p -r1.599 i386.md
--- i386.md	4 Jan 2005 10:40:52 -0000	1.599
+++ i386.md	5 Jan 2005 12:28:40 -0000
@@ -20335,6 +20335,9 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V4SF")])
 
+;; Constraint should reject both input operands from memory. Otherwise
+;; reload pass can be confused to make a secondary reload from unaligned
+;; memory address.
 (define_insn "sse_movhps"
   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
 	(vec_merge:V4SF
@@ -20342,11 +20345,14 @@
 	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
 	 (const_int 12)))]
   "TARGET_SSE
-   && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+   && ((GET_CODE (operands[1]) == MEM) != (GET_CODE (operands[2]) == MEM))"
   "movhps\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V4SF")])
 
+;; Constraint should reject both input operands from memory. Otherwise
+;; reload pass can be confused to make a secondary reload to unaligned
+;; memory address.
 (define_insn "sse_movlps"
   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
 	(vec_merge:V4SF
@@ -20354,7 +20360,7 @@
 	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
 	 (const_int 3)))]
   "TARGET_SSE
-   && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+   && ((GET_CODE (operands[1]) == MEM) != (GET_CODE (operands[2]) == MEM))"
   "movlps\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V4SF")])

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]