re PR target/12902 (Invalid assembly generated when using SSE / xmmintrin.h)

author Richard Henderson <rth@gcc.gnu.org>

Wed, 5 Jan 2005 19:14:39 +0000 (11:14 -0800)

committer Richard Henderson <rth@gcc.gnu.org>

Wed, 5 Jan 2005 19:14:39 +0000 (11:14 -0800)
author Richard Henderson <rth@gcc.gnu.org>
Wed, 5 Jan 2005 19:14:39 +0000 (11:14 -0800)
committer Richard Henderson <rth@gcc.gnu.org>
Wed, 5 Jan 2005 19:14:39 +0000 (11:14 -0800)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 39da29dadc6361d372b999db448b959f27af3415..08b41f583f3d814aeb2f89d2ad435724d3ad188b 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,14 @@
-2004-01-05  Julian Brown  <julian@codesourcery.com>
+2005-01-05  Richard Henderson  <rth@redhat.com>
+
+       PR target/12902
+       * config/i386/i386.md (sse_movhps, sse_movlps): Remove.
+       (sse_shufps): Change operand 3 to const_int_operand.
+       (sse2_storelps): Fix typo in template.
+       (sse_storehps, sse_loadhps, sse_storelps, sse_loadlps): New.
+       * config/i386/i386.c (ix86_expand_vector_move_misalign): Use them.
+       (ix86_expand_builtin): Likewise.
+
+2005-01-05  Julian Brown  <julian@codesourcery.com>
  
         * config/arm/arm.c (arm_return_in_memory): Treat complex types
         as aggregates for AAPCS ABIs.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 23129a08943743b651186c306456893b76c56086..51d36f186266dc0729b1b2b174c4e2e50467a0d1 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1,6 +1,6 @@
  /* Subroutines used for code generation on IA-32.
     Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-   2002, 2003, 2004 Free Software Foundation, Inc.
+   2002, 2003, 2004, 2005 Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -7645,11 +7645,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
           else
             emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
  
-         op0 = gen_lowpart (V4SFmode, op0);
-         m = adjust_address (op1, V4SFmode, 0);
-         emit_insn (gen_sse_movlps (op0, op0, m));
-         m = adjust_address (op1, V4SFmode, 8);
-         emit_insn (gen_sse_movhps (op0, op0, m));
+         m = adjust_address (op1, V2SFmode, 0);
+         emit_insn (gen_sse_loadlps (op0, op0, m));
+         m = adjust_address (op1, V2SFmode, 8);
+         emit_insn (gen_sse_loadhps (op0, op0, m));
         }
      }
    else if (MEM_P (op0))
@@ -7684,11 +7683,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
         }
        else
         {
-         op1 = gen_lowpart (V4SFmode, op1);
-         m = adjust_address (op0, V4SFmode, 0);
-         emit_insn (gen_sse_movlps (m, m, op1));
-         m = adjust_address (op0, V4SFmode, 8);
-         emit_insn (gen_sse_movhps (m, m, op1));
+         m = adjust_address (op0, V2SFmode, 0);
+         emit_insn (gen_sse_storelps (m, op1));
+         m = adjust_address (op0, V2SFmode, 8);
+         emit_insn (gen_sse_storehps (m, op1));
           return;
         }
      }
@@ -13508,8 +13506,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
      case IX86_BUILTIN_LOADLPS:
      case IX86_BUILTIN_LOADHPD:
      case IX86_BUILTIN_LOADLPD:
-      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
-              : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
+      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
+              : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
                : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
                : CODE_FOR_sse2_loadlpd);
        arg0 = TREE_VALUE (arglist);
@@ -13535,28 +13533,11 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
  
      case IX86_BUILTIN_STOREHPS:
      case IX86_BUILTIN_STORELPS:
-      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
-              : CODE_FOR_sse_movlps);
-      arg0 = TREE_VALUE (arglist);
-      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-      mode0 = insn_data[icode].operand[1].mode;
-      mode1 = insn_data[icode].operand[2].mode;
-
-      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-       op1 = copy_to_mode_reg (mode1, op1);
-
-      pat = GEN_FCN (icode) (op0, op0, op1);
-      if (! pat)
-       return 0;
-      emit_insn (pat);
-      return const0_rtx;
-
      case IX86_BUILTIN_STOREHPD:
      case IX86_BUILTIN_STORELPD:
-      icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
+      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
+              : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_storelps
+              : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
                : CODE_FOR_sse2_storelpd);
        arg0 = TREE_VALUE (arglist);
        arg1 = TREE_VALUE (TREE_CHAIN (arglist));
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

index eb2eee895fcd01e65035a3ce09c03720a078bf2a..7848579153efaf6d0209d5130cf9dbc487c22c7a 100644 (file)
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1,6 +1,6 @@
  ;; GCC machine description for IA-32 and x86-64.
  ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-;; 2001, 2002, 2003, 2004
+;; 2001, 2002, 2003, 2004, 2005
  ;; Free Software Foundation, Inc.
  ;; Mostly by William Schelter.
  ;; x86_64 support added by Jan Hubicka
@@ -20335,29 +20335,98 @@
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "V4SF")])
  
-(define_insn "sse_movhps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
-        (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
-        (const_int 12)))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
-  "movhps\t{%2, %0|%0, %2}"
+;; Store the high V2SF of the source vector to the destination.
+(define_insn "sse_storehps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+       (vec_select:V2SF
+         (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+         (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SSE"
+  "@
+   movhps\t{%1, %0|%0, %1}
+   movhlps\t{%1, %0|%0, %1}
+   #"
    [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "V2SF")])
  
-(define_insn "sse_movlps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
-        (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
-        (const_int 3)))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
-  "movlps\t{%2, %0|%0, %2}"
+(define_split
+  [(set (match_operand:V2SF 0 "register_operand" "")
+       (vec_select:V2SF
+         (match_operand:V4SF 1 "memory_operand" "")
+         (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SSE && reload_completed"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[0], adjust_address (operands[1], V2SFmode, 8));
+  DONE;
+})
+
+;; Load the high V2SF of the target vector from the source vector.
+(define_insn "sse_loadhps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+       (vec_concat:V4SF
+         (vec_select:V2SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+           (parallel [(const_int 0) (const_int 1)]))
+         (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+  "TARGET_SSE"
+  "@
+   movhps\t{%2, %0|%0, %2}
+   movlhps\t{%2, %0|%0, %2}
+   #"
    [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "V2SF")])
+
+(define_split
+  [(set (match_operand:V4SF 0 "memory_operand" "")
+       (vec_concat:V4SF
+         (vec_select:V2SF
+           (match_dup 0)
+           (parallel [(const_int 0) (const_int 1)]))
+         (match_operand:V2SF 2 "register_operand" "")))]
+  "TARGET_SSE && reload_completed"
+  [(const_int 0)]
+{
+  emit_move_insn (adjust_address (operands[0], V2SFmode, 8), operands[1]);
+  DONE;
+})
+
+;; Store the low V2SF of the source vector to the destination.
+(define_expand "sse_storelps"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+       (vec_select:V2SF
+         (match_operand:V4SF 1 "nonimmediate_operand" "")
+         (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SSE"
+{
+  operands[1] = gen_lowpart (V2SFmode, operands[1]);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+;; Load the low V2SF of the target vector from the source vector.
+(define_insn "sse_loadlps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+       (vec_concat:V4SF
+         (match_operand:V2SF 2 "nonimmediate_operand" "m,0,x")
+         (vec_select:V2SF
+           (match_operand:V4SF 1 "nonimmediate_operand" "0,x,0")
+           (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_SSE"
+{
+  static const char * const alt[] = {
+    "movlps\t{%2, %0|%0, %2}",
+    "shufps\t{%2, %1, %0|%0, %1, %2}",
+    "movlps\t{%2, %0|%0, %2}"
+  };
+
+  if (which_alternative == 1)
+    operands[2] = GEN_INT (0xe4);
+
+  return alt[which_alternative];
+}
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "V2SF")])
  
  (define_expand "sse_loadss"
    [(match_operand:V4SF 0 "register_operand" "")
@@ -20405,10 +20474,9 @@
    [(set (match_operand:V4SF 0 "register_operand" "=x")
          (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
                       (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-                     (match_operand:SI 3 "immediate_operand" "i")]
+                     (match_operand:SI 3 "const_int_operand" "n")]
                      UNSPEC_SHUFFLE))]
    "TARGET_SSE"
-  ;; @@@ check operand order for intel/nonintel syntax
    "shufps\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "type" "ssecvt")
     (set_attr "mode" "V4SF")])
@@ -23902,7 +23970,7 @@
    [(set (match_operand:DF 0 "nonimmediate_operand" "")
         (vec_select:DF
           (match_operand:V2DF 1 "nonimmediate_operand" "")
-         (parallel [(const_int 1)])))]
+         (parallel [(const_int 0)])))]
    "TARGET_SSE2"
  {
    operands[1] = gen_lowpart (DFmode, operands[1]);
@@ -23910,7 +23978,7 @@
    DONE;
  })
  
-;; Load the load double of the target vector from the source scalar.
+;; Load the low double of the target vector from the source scalar.
  (define_insn "sse2_loadlpd"
    [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,m")
         (vec_concat:V2DF
diff --git a/gcc/testsuite/gcc.target/i386/sse-1.c b/gcc/testsuite/gcc.target/i386/sse-1.c

new file mode 100644 (file)

index 0000000..afae22d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-1.c
@@ -0,0 +1,25 @@
+/* PR 12902 */
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse" } */
+
+#include <xmmintrin.h>
+
+typedef union
+{
+  int i[4];
+  float f[4];
+  __m128 v;
+} vector4_t;
+
+void
+swizzle (const void *a, vector4_t * b, vector4_t * c)
+{
+  b->v = _mm_loadl_pi (b->v, (__m64 *) a);
+  c->v = _mm_loadl_pi (c->v, ((__m64 *) a) + 1);
+}
+
+/* While one legal rendering of each statement would be movaps;movlps;movaps,
+   we can implmenent this with just movlps;movlps.  Since we do now, anything
+   less would be a regression.  */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler "movlps" } } */
author	Richard Henderson <rth@gcc.gnu.org>
	Wed, 5 Jan 2005 19:14:39 +0000 (11:14 -0800)
committer	Richard Henderson <rth@gcc.gnu.org>
	Wed, 5 Jan 2005 19:14:39 +0000 (11:14 -0800)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/i386/i386.c		patch \| blob \| blame \| history
gcc/config/i386/i386.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/i386/sse-1.c	[new file with mode: 0644]	patch \| blob