i386.c (ix86_expand_vector_move): Tidy.

author Richard Henderson <rth@redhat.com>

Thu, 23 Dec 2004 10:20:04 +0000 (02:20 -0800)

committer Richard Henderson <rth@gcc.gnu.org>

Thu, 23 Dec 2004 10:20:04 +0000 (02:20 -0800)
author Richard Henderson <rth@redhat.com>
Thu, 23 Dec 2004 10:20:04 +0000 (02:20 -0800)
committer Richard Henderson <rth@gcc.gnu.org>
Thu, 23 Dec 2004 10:20:04 +0000 (02:20 -0800)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 4f8f3a61f016cf06d6fbf7d11325d81ea3ac9923..1fa3c957c0496e4883b8a5a795e610f4bec9dc4b 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2004-12-23  Richard Henderson  <rth@redhat.com>
+
+       * config/i386/i386.c (ix86_expand_vector_move): Tidy.
+       (ix86_expand_vector_move_misalign): New.
+       (ix86_misaligned_mem_ok): Remove.
+       (TARGET_VECTORIZE_MISALIGNED_MEM_OK): Remove.
+       * config/i386/i386-protos.h: Update.
+       * config/i386/i386.md (SSEMODEI): Rename from SSEINT16.
+       (MMXMODEI): Rename from MMXINT8.
+       (SSEMODE, MMXMODE, movmisalign<mode>): New.
+
  2004-12-23  Mark Mitchell  <mark@codesourcery.com>
  
         PR c++/16405
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h

index 3ee9b2278c837b74a505de3510e92159f493ca10..58e4e23471b95fc32c58bfe15eefd14547a44f07 100644 (file)
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -125,6 +125,7 @@ extern void i386_output_dwarf_dtprel (FILE*, int, rtx);
  extern void ix86_expand_clear (rtx);
  extern void ix86_expand_move (enum machine_mode, rtx[]);
  extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
  extern void ix86_expand_binary_operator (enum rtx_code,
                                          enum machine_mode, rtx[]);
  extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index 2a9dca24d34017a8e7d494e54a3cfa18d73dfc17..fa6c3b4cbf18d06783485e547b8481de2e41c02b 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -867,7 +867,6 @@ static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
  static int ix86_issue_rate (void);
  static int ix86_adjust_cost (rtx, rtx, rtx, int);
  static int ia32_multipass_dfa_lookahead (void);
-static bool ix86_misaligned_mem_ok (enum machine_mode);
  static void ix86_init_mmx_sse_builtins (void);
  static rtx x86_this_parameter (tree);
  static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
@@ -1010,9 +1009,6 @@ static void init_ext_80387_constants (void);
  #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
    ia32_multipass_dfa_lookahead
  
-#undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
-#define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok
-
  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
  #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
  
@@ -7556,28 +7552,149 @@ ix86_expand_move (enum machine_mode mode, rtx operands[])
  void
  ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
  {
+  rtx op0 = operands[0], op1 = operands[1];
+
    /* Force constants other than zero into memory.  We do not know how
       the instructions used to build constants modify the upper 64 bits
       of the register, once we have that information we may be able
       to handle some of them more efficiently.  */
    if ((reload_in_progress | reload_completed) == 0
-      && register_operand (operands[0], mode)
-      && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
-    operands[1] = validize_mem (force_const_mem (mode, operands[1]));
+      && register_operand (op0, mode)
+      && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
+    op1 = validize_mem (force_const_mem (mode, op1));
  
    /* Make operand1 a register if it isn't already.  */
    if (!no_new_pseudos
-      && !register_operand (operands[0], mode)
-      && !register_operand (operands[1], mode))
+      && !register_operand (op0, mode)
+      && !register_operand (op1, mode))
      {
-      rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
-      emit_move_insn (operands[0], temp);
+      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
        return;
      }
  
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+/* Implement the movmisalign patterns for SSE.  Non-SSE modes go 
+   straight to ix86_expand_vector_move.  */
+
+void
+ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
+{
+  rtx op0, op1, m;
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+  if (MEM_P (op1))
+    {
+      /* If we're optimizing for size, movups is the smallest.  */
+      if (optimize_size)
+       {
+         op0 = gen_lowpart (V4SFmode, op0);
+         op1 = gen_lowpart (V4SFmode, op1);
+         emit_insn (gen_sse_movups (op0, op1));
+         return;
+       }
+
+      /* ??? If we have typed data, then it would appear that using
+        movdqu is the only way to get unaligned data loaded with
+        integer type.  */
+      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+       {
+         op0 = gen_lowpart (V16QImode, op0);
+         op1 = gen_lowpart (V16QImode, op1);
+         emit_insn (gen_sse2_movdqu (op0, op1));
+         return;
+       }
+
+      if (TARGET_SSE2 && mode == V2DFmode)
+       {
+         /* When SSE registers are split into halves, we can avoid
+            writing to the top half twice.  */
+         if (TARGET_SSE_SPLIT_REGS)
+           {
+             emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+             m = adjust_address (op1, DFmode, 0);
+             emit_insn (gen_sse2_loadlpd (op0, op0, m));
+             m = adjust_address (op1, DFmode, 8);
+             emit_insn (gen_sse2_loadhpd (op0, op0, m));
+           }
+         else
+           {
+             /* ??? Not sure about the best option for the Intel chips.
+                The following would seem to satisfy; the register is
+                entirely cleared, breaking the dependency chain.  We
+                then store to the upper half, with a dependency depth
+                of one.  A rumor has it that Intel recommends two movsd
+                followed by an unpacklpd, but this is unconfirmed.  And
+                given that the dependency depth of the unpacklpd would
+                still be one, I'm not sure why this would be better.  */
+             m = adjust_address (op1, DFmode, 0);
+             emit_insn (gen_sse2_loadsd (op0, m));
+             m = adjust_address (op1, DFmode, 8);
+             emit_insn (gen_sse2_loadhpd (op0, op0, m));
+           }
+       }
+      else
+       {
+         if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
+           emit_move_insn (op0, CONST0_RTX (mode));
+         else
+           emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+
+         op0 = gen_lowpart (V4SFmode, op0);
+         m = adjust_address (op1, V4SFmode, 0);
+         emit_insn (gen_sse_movlps (op0, op0, m));
+         m = adjust_address (op1, V4SFmode, 8);
+         emit_insn (gen_sse_movhps (op0, op0, m));
+       }
+    }
+  else if (MEM_P (op0))
+    {
+      /* If we're optimizing for size, movups is the smallest.  */
+      if (optimize_size)
+       {
+         op0 = gen_lowpart (V4SFmode, op0);
+         op1 = gen_lowpart (V4SFmode, op1);
+         emit_insn (gen_sse_movups (op0, op1));
+         return;
+       }
+
+      /* ??? Similar to above, only less clear because of quote
+        typeless stores unquote.  */
+      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
+         && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+        {
+         op0 = gen_lowpart (V16QImode, op0);
+         op1 = gen_lowpart (V16QImode, op1);
+         emit_insn (gen_sse2_movdqu (op0, op1));
+         return;
+       }
+
+      if (TARGET_SSE2 && mode == V2DFmode)
+       {
+         m = adjust_address (op0, DFmode, 0);
+         emit_insn (gen_sse2_storelpd (m, op1));
+         m = adjust_address (op0, DFmode, 8);
+         emit_insn (gen_sse2_storehpd (m, op1));
+         return;
+       }
+      else
+       {
+         op1 = gen_lowpart (V4SFmode, op1);
+         m = adjust_address (op0, V4SFmode, 0);
+         emit_insn (gen_sse_movlps (m, m, op1));
+         m = adjust_address (op0, V4SFmode, 8);
+         emit_insn (gen_sse_movhps (m, m, op1));
+         return;
+       }
+    }
+  else
+    gcc_unreachable ();
  }
  
+
  /* Attempt to expand a binary operator.  Make the expansion closer to the
     actual machine, then just general_operand, which will allow 3 separate
     memory references (one output, two input) in a single insn.  */
@@ -11727,17 +11844,6 @@ ia32_multipass_dfa_lookahead (void)
  }
  
  \f
-/* Implement the target hook targetm.vectorize.misaligned_mem_ok.  */
-
-static bool
-ix86_misaligned_mem_ok (enum machine_mode mode)
-{
-  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
-    return true;
-  else
-    return false;
-}
-
  /* Compute the alignment given to a constant that is being placed in memory.
     EXP is the constant and ALIGN is the alignment that the object would
     ordinarily have.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md

index ff0f9f9dd8442ee7c9bfc86db656a896569440b4..17835c7e0df0b528608f95d1e4c631ec964b7c2b 100644 (file)
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -19789,11 +19789,11 @@
  
  ;; 16 byte integral modes handled by SSE, minus TImode, which gets
  ;; special-cased for TARGET_64BIT.
-(define_mode_macro SSEINT16 [V16QI V8HI V4SI V2DI])
+(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
  
  (define_expand "mov<mode>"
-  [(set (match_operand:SSEINT16 0 "nonimmediate_operand" "")
-       (match_operand:SSEINT16 1 "nonimmediate_operand" ""))]
+  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
+       (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
    "TARGET_SSE"
  {
    ix86_expand_vector_move (<MODE>mode, operands);
@@ -19801,8 +19801,8 @@
  })
  
  (define_insn "*mov<mode>_internal"
-  [(set (match_operand:SSEINT16 0 "nonimmediate_operand" "=x,x ,m")
-       (match_operand:SSEINT16 1 "vector_move_operand"  "C ,xm,x"))]
+  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
+       (match_operand:SSEMODEI 1 "vector_move_operand"  "C ,xm,x"))]
    "TARGET_SSE
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
  {
@@ -19842,11 +19842,11 @@
                (const_string "TI")))])
  
  ;; 8 byte integral modes handled by MMX (and by extension, SSE)
-(define_mode_macro MMXINT8 [V8QI V4HI V2SI])
+(define_mode_macro MMXMODEI [V8QI V4HI V2SI])
  
  (define_expand "mov<mode>"
-  [(set (match_operand:MMXINT8 0 "nonimmediate_operand" "")
-       (match_operand:MMXINT8 1 "nonimmediate_operand" ""))]
+  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
+       (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
    "TARGET_MMX"
  {
    ix86_expand_vector_move (<MODE>mode, operands);
@@ -19854,9 +19854,9 @@
  })
  
  (define_insn "*mov<mode>_internal"
-  [(set (match_operand:MMXINT8 0 "nonimmediate_operand"
+  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
                                         "=y,y ,m,!y,!*Y,*x,?*x,?m")
-       (match_operand:MMXINT8 1 "vector_move_operand"
+       (match_operand:MMXMODEI 1 "vector_move_operand"
                                         "C ,ym,y,*Y,y  ,C ,*xm,*x"))]
    "TARGET_MMX
     && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
@@ -20103,6 +20103,30 @@
    [(const_int 0)]
    "ix86_split_long_move (operands); DONE;")
  
+;; All 16-byte vector modes handled by SSE
+(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
+       (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move_misalign (<MODE>mode, operands);
+  DONE;
+})
+
+;; All 8-byte vector modes handled by MMX
+(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+       (match_operand:MMXMODE 1 "nonimmediate_operand" ""))]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
  ;; These two patterns are useful for specifying exactly whether to use
  ;; movaps or movups
  (define_expand "sse_movaps"
author	Richard Henderson <rth@redhat.com>
	Thu, 23 Dec 2004 10:20:04 +0000 (02:20 -0800)
committer	Richard Henderson <rth@gcc.gnu.org>
	Thu, 23 Dec 2004 10:20:04 +0000 (02:20 -0800)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/i386/i386-protos.h		patch \| blob \| blame \| history
gcc/config/i386/i386.c		patch \| blob \| blame \| history
gcc/config/i386/i386.md		patch \| blob \| blame \| history