This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: PR 12902: Still problems with unaligned SSE access (V4SF mode)


On Wed, Jan 05, 2005 at 01:31:04PM +0100, Uros Bizjak wrote:
> +;; Constraint should reject both input operands from memory. Otherwise
> +;; reload pass can be confused to make a secondary reload from unaligned
> +;; memory address.

No, that's not really the problem.  The problem is that we lied about
what mode the source has.  It's a V2SF, and we're claiming that it's
a V4SF.  At which point it's hardly surprising that the wrong sort of
value gets reloaded.

Here's the patch I'm testing, for comparison.


r~



Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.767
diff -u -p -c -r1.767 i386.c
*** config/i386/i386.c	3 Jan 2005 06:26:49 -0000	1.767
--- config/i386/i386.c	5 Jan 2005 12:35:37 -0000
*************** ix86_expand_vector_move_misalign (enum m
*** 7645,7655 ****
  	  else
  	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
  
! 	  op0 = gen_lowpart (V4SFmode, op0);
! 	  m = adjust_address (op1, V4SFmode, 0);
! 	  emit_insn (gen_sse_movlps (op0, op0, m));
! 	  m = adjust_address (op1, V4SFmode, 8);
! 	  emit_insn (gen_sse_movhps (op0, op0, m));
  	}
      }
    else if (MEM_P (op0))
--- 7645,7654 ----
  	  else
  	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
  
! 	  m = adjust_address (op1, V2SFmode, 0);
! 	  emit_insn (gen_sse_loadlps (op0, op0, m));
! 	  m = adjust_address (op1, V2SFmode, 8);
! 	  emit_insn (gen_sse_loadhps (op0, op0, m));
  	}
      }
    else if (MEM_P (op0))
*************** ix86_expand_vector_move_misalign (enum m
*** 7684,7694 ****
  	}
        else
  	{
! 	  op1 = gen_lowpart (V4SFmode, op1);
! 	  m = adjust_address (op0, V4SFmode, 0);
! 	  emit_insn (gen_sse_movlps (m, m, op1));
! 	  m = adjust_address (op0, V4SFmode, 8);
! 	  emit_insn (gen_sse_movhps (m, m, op1));
  	  return;
  	}
      }
--- 7683,7692 ----
  	}
        else
  	{
! 	  m = adjust_address (op0, V2SFmode, 0);
! 	  emit_insn (gen_sse_storelps (m, op1));
! 	  m = adjust_address (op0, V2SFmode, 8);
! 	  emit_insn (gen_sse_storehps (m, op1));
  	  return;
  	}
      }
*************** ix86_expand_builtin (tree exp, rtx targe
*** 13508,13515 ****
      case IX86_BUILTIN_LOADLPS:
      case IX86_BUILTIN_LOADHPD:
      case IX86_BUILTIN_LOADLPD:
!       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
! 	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
  	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
  	       : CODE_FOR_sse2_loadlpd);
        arg0 = TREE_VALUE (arglist);
--- 13506,13513 ----
      case IX86_BUILTIN_LOADLPS:
      case IX86_BUILTIN_LOADHPD:
      case IX86_BUILTIN_LOADLPD:
!       icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
! 	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
  	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
  	       : CODE_FOR_sse2_loadlpd);
        arg0 = TREE_VALUE (arglist);
*************** ix86_expand_builtin (tree exp, rtx targe
*** 13535,13562 ****
  
      case IX86_BUILTIN_STOREHPS:
      case IX86_BUILTIN_STORELPS:
-       icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
- 	       : CODE_FOR_sse_movlps);
-       arg0 = TREE_VALUE (arglist);
-       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-       op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-       mode0 = insn_data[icode].operand[1].mode;
-       mode1 = insn_data[icode].operand[2].mode;
- 
-       op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
-       if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- 	op1 = copy_to_mode_reg (mode1, op1);
- 
-       pat = GEN_FCN (icode) (op0, op0, op1);
-       if (! pat)
- 	return 0;
-       emit_insn (pat);
-       return const0_rtx;
- 
      case IX86_BUILTIN_STOREHPD:
      case IX86_BUILTIN_STORELPD:
!       icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
  	       : CODE_FOR_sse2_storelpd);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
--- 13533,13543 ----
  
      case IX86_BUILTIN_STOREHPS:
      case IX86_BUILTIN_STORELPS:
      case IX86_BUILTIN_STOREHPD:
      case IX86_BUILTIN_STORELPD:
!       icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
! 	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_storelps
! 	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
  	       : CODE_FOR_sse2_storelpd);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v
retrieving revision 1.599
diff -u -p -c -r1.599 i386.md
*** config/i386/i386.md	4 Jan 2005 10:40:52 -0000	1.599
--- config/i386/i386.md	5 Jan 2005 12:35:38 -0000
***************
*** 20335,20363 ****
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "V4SF")])
  
! (define_insn "sse_movhps"
!   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! 	(vec_merge:V4SF
! 	 (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
! 	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
! 	 (const_int 12)))]
!   "TARGET_SSE
!    && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
!   "movhps\t{%2, %0|%0, %2}"
    [(set_attr "type" "ssecvt")
!    (set_attr "mode" "V4SF")])
  
! (define_insn "sse_movlps"
!   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
! 	(vec_merge:V4SF
! 	 (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
! 	 (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
! 	 (const_int 3)))]
!   "TARGET_SSE
!    && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
!   "movlps\t{%2, %0|%0, %2}"
    [(set_attr "type" "ssecvt")
!    (set_attr "mode" "V4SF")])
  
  (define_expand "sse_loadss"
    [(match_operand:V4SF 0 "register_operand" "")
--- 20335,20432 ----
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "V4SF")])
  
! ;; Store the high V2SF of the source vector to the destination.
! (define_insn "sse_storehps"
!   [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
! 	(vec_select:V2SF
! 	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
! 	  (parallel [(const_int 2) (const_int 3)])))]
!   "TARGET_SSE"
!   "@
!    movhps\t{%1, %0|%0, %1}
!    movhlps\t{%1, %0|%0, %1}
!    #"
    [(set_attr "type" "ssecvt")
!    (set_attr "mode" "V2SF")])
  
! (define_split
!   [(set (match_operand:V2SF 0 "register_operand" "")
! 	(vec_select:V2SF
! 	  (match_operand:V4SF 1 "memory_operand" "")
! 	  (parallel [(const_int 2) (const_int 3)])))]
!   "TARGET_SSE && reload_completed"
!   [(const_int 0)]
! {
!   emit_move_insn (operands[0], adjust_address (operands[1], V2SFmode, 8));
!   DONE;
! })
! 
! ;; Load the high V2SF of the target vector from the source vector.
! (define_insn "sse_loadhps"
!   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
! 	(vec_concat:V4SF
! 	  (vec_select:V2SF
! 	    (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
! 	    (parallel [(const_int 0) (const_int 1)]))
! 	  (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
!   "TARGET_SSE"
!   "@
!    movhps\t{%2, %0|%0, %2}
!    movlhps\t{%2, %0|%0, %2}
!    #"
    [(set_attr "type" "ssecvt")
!    (set_attr "mode" "V2SF")])
! 
! (define_split
!   [(set (match_operand:V4SF 0 "memory_operand" "")
! 	(vec_concat:V4SF
! 	  (vec_select:V2SF
! 	    (match_dup 0)
! 	    (parallel [(const_int 0) (const_int 1)]))
! 	  (match_operand:V2SF 2 "register_operand" "")))]
!   "TARGET_SSE && reload_completed"
!   [(const_int 0)]
! {
!   emit_move_insn (adjust_address (operands[0], V2SFmode, 8), operands[1]);
!   DONE;
! })
! 
! ;; Store the low V2SF of the source vector to the destination.
! (define_expand "sse_storelps"
!   [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
! 	(vec_select:V2SF
! 	  (match_operand:V4SF 1 "nonimmediate_operand" "")
! 	  (parallel [(const_int 0) (const_int 1)])))]
!   "TARGET_SSE"
! {
!   operands[1] = gen_lowpart (V2SFmode, operands[1]);
!   emit_move_insn (operands[0], operands[1]);
!   DONE;
! })
! 
! ;; Load the low V2SF of the target vector from the source vector.
! (define_insn "sse_loadlps"
!   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
! 	(vec_concat:V4SF
! 	  (match_operand:V2SF 2 "nonimmediate_operand" "m,0,x")
! 	  (vec_select:V2SF
! 	    (match_operand:V4SF 1 "nonimmediate_operand" "0,x,0")
! 	    (parallel [(const_int 2) (const_int 3)]))))]
!   "TARGET_SSE"
! {
!   static const char * const alt[] = {
!     "movlps\t{%2, %0|%0, %2}",
!     "shufps\t{%2, %1, %0|%0, %1, %2}",
!     "movlps\t{%2, %0|%0, %2}"
!   };
! 
!   if (which_alternative == 1)
!     operands[2] = GEN_INT (0xe4);
! 
!   return alt[which_alternative];
! }
!   [(set_attr "type" "ssecvt")
!    (set_attr "mode" "V2SF")])
  
  (define_expand "sse_loadss"
    [(match_operand:V4SF 0 "register_operand" "")
***************
*** 20405,20414 ****
    [(set (match_operand:V4SF 0 "register_operand" "=x")
          (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
  		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
! 		      (match_operand:SI 3 "immediate_operand" "i")]
  		     UNSPEC_SHUFFLE))]
    "TARGET_SSE"
-   ;; @@@ check operand order for intel/nonintel syntax
    "shufps\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "V4SF")])
--- 20474,20482 ----
    [(set (match_operand:V4SF 0 "register_operand" "=x")
          (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
  		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
! 		      (match_operand:SI 3 "const_int_operand" "n")]
  		     UNSPEC_SHUFFLE))]
    "TARGET_SSE"
    "shufps\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "V4SF")])
***************
*** 23902,23908 ****
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
  	(vec_select:DF
  	  (match_operand:V2DF 1 "nonimmediate_operand" "")
! 	  (parallel [(const_int 1)])))]
    "TARGET_SSE2"
  {
    operands[1] = gen_lowpart (DFmode, operands[1]);
--- 23970,23976 ----
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
  	(vec_select:DF
  	  (match_operand:V2DF 1 "nonimmediate_operand" "")
! 	  (parallel [(const_int 0)])))]
    "TARGET_SSE2"
  {
    operands[1] = gen_lowpart (DFmode, operands[1]);
***************
*** 23910,23916 ****
    DONE;
  })
  
! ;; Load the load double of the target vector from the source scalar.
  (define_insn "sse2_loadlpd"
    [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,m")
  	(vec_concat:V2DF
--- 23978,23984 ----
    DONE;
  })
  
! ;; Load the low double of the target vector from the source scalar.
  (define_insn "sse2_loadlpd"
    [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,m")
  	(vec_concat:V2DF
Index: testsuite/gcc.dg/i386-sse-11.c
===================================================================
RCS file: testsuite/gcc.dg/i386-sse-11.c
diff -N testsuite/gcc.dg/i386-sse-11.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- testsuite/gcc.dg/i386-sse-11.c	5 Jan 2005 12:35:41 -0000
***************
*** 0 ****
--- 1,52 ----
+ /* PR 12902 */
+ /* { dg-do run { target i?86-*-* x86_64-*-* } } */
+ /* { dg-options "-O1 -msse" } */
+ 
+ 
+ #include <xmmintrin.h>
+ #include "i386-cpuid.h"
+ 
+ 
+ typedef struct
+ {
+   int i;
+   float f[3];
+ } a_t;
+ 
+ typedef union
+ {
+   int i[4];
+   float f[4];
+   __m128 v;
+ } vector4_t;
+ 
+ void
+ swizzle (const void *a, vector4_t * b, vector4_t * c)
+ {
+   b->v = _mm_loadl_pi (b->v, (__m64 *) a);
+   c->v = _mm_loadl_pi (c->v, ((__m64 *) a) + 1);
+ }
+ 
+ int __attribute__((noinline))
+ main1 ()
+ {
+   a_t a[2];
+   vector4_t b, c, x;
+ 
+   swizzle (a, &b, &c);
+   x.v = _mm_add_ps (b.v, c.v);
+ 
+   return (x.i[1] + x.i[2] + x.i[3] + x.i[4]);
+ }
+ 
+ int
+ main ()
+ {
+   unsigned long cpu_facilities;
+ 
+   cpu_facilities = i386_cpuid ();
+   if (cpu_facilities & bit_SSE)
+     return main1 (); 
+   else
+     return 0;
+ }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]