This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Vector permutation support for x86


On Wed, Dec 2, 2009 at 16:23, Richard Henderson <rth@redhat.com> wrote:
>> @@ -1724,8 +1723,8 @@
>> Â Â Â Â Â(match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
>> Â Â Â Â (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
>> Â "TARGET_FMA4
>> - Â && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)
>> - Â && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)
>> + Â && MEM_P (operands[2])
>> + Â && (MEM_P (operands[1]) || MEM_P (operands[3]))
>> Â Â&& !reg_mentioned_p (operands[0], operands[1])
>> Â Â&& !reg_mentioned_p (operands[0], operands[2])
>> Â Â&& !reg_mentioned_p (operands[0], operands[3])"
>
> This is the splitter under fma4_fmadd<mode>4256", but the same comment
> applies to all of the fma4 splitters.
>
> First, MEM_P(operands[2]) would be better written as "memory_operand" in the
> match_operand for op2. ÂSecond, two of the reg_mentioned_p tests are
> *always* going to be false for these patterns, for the simple reason that
> operands[0] is a vector float register and the only registers that would be
> present in a memory operand is an address register.
>
> So I think these splitters would be better written:
>
> ;; Split fmadd with two memory operands into a load and the fmadd.
> (define_split
> Â[(set (match_operand:FMA4MODEF4 0 "register_operand" "")
> Â Â Â Â(plus:FMA4MODEF4
> Â Â Â Â (mult:FMA4MODEF4
> Â Â Â Â Â(match_operand:FMA4MODEF4 1 "register_operand" "")
> Â Â Â Â Â(match_operand:FMA4MODEF4 2 "memory_operand" ""))
> Â Â Â Â (match_operand:FMA4MODEF4 3 "memory_operand" "")))]
> Â"TARGET_FMA4"
> Â[(set (match_dup 0)
> Â Â Â Â(plus:FMA4MODEF4
> Â Â Â Â Â(mult:FMA4MODEF4 (match_dup 1) (match_dup 2))
> Â Â Â Â Â(match_dup 3)))]
> {
> Âif (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
> Â ÂFAIL;
> })
>
> bool
> ix86_expand_fma4_multiple_memory (rtx operands[],
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âenum machine_mode mode)
> {
> Ârtx scratch = operands[0];
>
> Âgcc_assert (register_operand (operands[0], mode));
> Âgcc_assert (register_operand (operands[1], mode));
> Âgcc_assert (MEM_P (operands[2]) && MEM_P (operands[3]));
>
> Âif (reg_mentioned_p (scratch, operands[1]))
> Â Â{
> Â Â Âif (!can_create_pseudo_p ())
> Â Â Â Âreturn false;
> Â Â Âscratch = gen_reg_rtx (mode);
> Â Â}
>
> Âemit_move_insn (scratch, operands[3]);
> Âif (rtx_equal_p (operands[2], operands[3]))
> Â Âoperands[2] = operands[3] = scratch;
> Âelse
> Â Âoperands[3] = scratch;
> Âreturn true;
> }
>

Fixed like this.

Sebastian
From e79de1050d77d66bdf314542d474289c855f08e8 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Thu, 3 Dec 2009 12:43:48 -0600
Subject: [PATCH] Fix FMA4 and XOP splitters.

---
 gcc/config/i386/i386-protos.h |    2 +-
 gcc/config/i386/i386.c        |   35 +++----
 gcc/config/i386/sse.md        |  236 +++++++++++++++++-----------------------
 3 files changed, 116 insertions(+), 157 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 27fca86..cf29cc7 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -218,7 +218,7 @@ extern void ix86_expand_vector_set (bool, rtx, rtx, int);
 extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
 extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
 
-extern void ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
+extern bool ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
 
 extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
 
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a0a2001..9b2829b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28813,34 +28813,29 @@ ix86_expand_round (rtx operand0, rtx operand1)
    memory operations.  Presently this is used by the multiply/add routines to
    allow 2 memory references.  */
 
-void
+bool
 ix86_expand_fma4_multiple_memory (rtx operands[],
 				  enum machine_mode mode)
 {
-  rtx op0 = operands[0];
+  rtx scratch = operands[0];
 
-  if (memory_operand (op0, mode)
-      || reg_mentioned_p (op0, operands[1])
-      || reg_mentioned_p (op0, operands[2])
-      || reg_mentioned_p (op0, operands[3]))
-    gcc_unreachable ();
+  gcc_assert (register_operand (operands[0], mode));
+  gcc_assert (register_operand (operands[1], mode));
+  gcc_assert (MEM_P (operands[2]) && MEM_P (operands[3]));
 
-  /* For 2 memory operands, pick either operands[1] or operands[3] to move into
-     the destination register.  */
-  if (memory_operand (operands[1], mode))
+  if (reg_mentioned_p (scratch, operands[1]))
     {
-      emit_move_insn (op0, operands[1]);
-      operands[1] = op0;
-    }
-  else if (memory_operand (operands[3], mode))
-    {
-      emit_move_insn (op0, operands[3]);
-      operands[3] = op0;
+      if (!can_create_pseudo_p ())
+	return false;
+      scratch = gen_reg_rtx (mode);
     }
-  else
-    gcc_unreachable ();
 
-  return;
+  emit_move_insn (scratch, operands[3]);
+  if (rtx_equal_p (operands[2], operands[3]))
+    operands[2] = operands[3] = scratch;
+  else
+    operands[3] = scratch;
+  return true;
 }
 
 /* Table of valid machine attributes.  */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7bb4802..2c9a6c8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1719,21 +1719,17 @@
   [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
 	(plus:FMA4MODEF4
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:FMA4MODEF4 1 "register_operand" "")
+	  (match_operand:FMA4MODEF4 2 "memory_operand" ""))
+	 (match_operand:FMA4MODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (plus:FMA4MODEF4
+         (mult:FMA4MODEF4 (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fmadd<mode>4256 (operands[0], operands[1],
-				    operands[2], operands[3]));
-  DONE;
+ if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+   FAIL;
 })
 
 ;; Floating multiply and subtract
@@ -1755,21 +1751,17 @@
   [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
 	(minus:FMA4MODEF4
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:FMA4MODEF4 1 "register_operand" "")
+	  (match_operand:FMA4MODEF4 2 "memory_operand" ""))
+	 (match_operand:FMA4MODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:FMA4MODEF4
+         (mult:FMA4MODEF4 (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fmsub<mode>4256 (operands[0], operands[1],
-				    operands[2], operands[3]));
-  DONE;
+ if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+   FAIL;
 })
 
 ;; Floating point negative multiply and add
@@ -1792,22 +1784,18 @@
 (define_split
   [(set (match_operand:FMA4MODEF4 0 "register_operand" "")
 	(minus:FMA4MODEF4
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")
+	 (match_operand:FMA4MODEF4 3 "register_operand" "")
 	 (mult:FMA4MODEF4
-	  (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))))]
-  "TARGET_FMA4
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:FMA4MODEF4 1 "memory_operand" "")
+	  (match_operand:FMA4MODEF4 2 "memory_operand" ""))))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:FMA4MODEF4
+	 (match_dup 3)
+         (mult:FMA4MODEF4 (match_dup 1) (match_dup 2))))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fnmadd<mode>4256 (operands[0], operands[1],
-				     operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; Floating point negative multiply and subtract
@@ -1832,21 +1820,19 @@
 	(minus:FMA4MODEF4
 	 (mult:FMA4MODEF4
 	  (neg:FMA4MODEF4
-	   (match_operand:FMA4MODEF4 1 "nonimmediate_operand" ""))
-	  (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	   (match_operand:FMA4MODEF4 1 "register_operand" ""))
+	  (match_operand:FMA4MODEF4 2 "memory_operand" ""))
+	 (match_operand:FMA4MODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:FMA4MODEF4
+         (mult:FMA4MODEF4
+	  (neg:FMA4MODEF4 (match_dup 1))
+	  (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fnmsub<mode>4256 (operands[0], operands[1],
-				        operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1867,21 +1853,17 @@
   [(set (match_operand:SSEMODEF4 0 "register_operand" "")
 	(plus:SSEMODEF4
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:SSEMODEF4 1 "register_operand" "")
+	  (match_operand:SSEMODEF4 2 "memory_operand" ""))
+	 (match_operand:SSEMODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (plus:SSEMODEF4
+         (mult:SSEMODEF4 (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fmadd<mode>4 (operands[0], operands[1],
-				    operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; For the scalar operations, use operand1 for the upper words that aren't
@@ -1921,21 +1903,17 @@
   [(set (match_operand:SSEMODEF4 0 "register_operand" "")
 	(minus:SSEMODEF4
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:SSEMODEF4 1 "register_operand" "")
+	  (match_operand:SSEMODEF4 2 "memory_operand" ""))
+	 (match_operand:SSEMODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:SSEMODEF4
+         (mult:SSEMODEF4 (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fmsub<mode>4 (operands[0], operands[1],
-				    operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; For the scalar operations, use operand1 for the upper words that aren't
@@ -1976,22 +1954,18 @@
 (define_split
   [(set (match_operand:SSEMODEF4 0 "register_operand" "")
 	(minus:SSEMODEF4
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
+	 (match_operand:SSEMODEF4 3 "register_operand" "")
 	 (mult:SSEMODEF4
-	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
-  "TARGET_FMA4
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	  (match_operand:SSEMODEF4 1 "memory_operand" "")
+	  (match_operand:SSEMODEF4 2 "memory_operand" ""))))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:SSEMODEF4
+	 (match_dup 3)
+         (mult:SSEMODEF4 (match_dup 1) (match_dup 2))))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fnmadd<mode>4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; For the scalar operations, use operand1 for the upper words that aren't
@@ -2034,21 +2008,19 @@
 	(minus:SSEMODEF4
 	 (mult:SSEMODEF4
 	  (neg:SSEMODEF4
-	   (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
-	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
-	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
-  "TARGET_FMA4
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+	   (match_operand:SSEMODEF4 1 "register_operand" ""))
+	  (match_operand:SSEMODEF4 2 "memory_operand" ""))
+	 (match_operand:SSEMODEF4 3 "memory_operand" "")))]
+  "TARGET_FMA4"
+  [(set (match_dup 0)
+        (minus:SSEMODEF4
+         (mult:SSEMODEF4
+	  (neg:SSEMODEF4 (match_dup 1))
+	  (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, <MODE>mode);
-  emit_insn (gen_fma4_fnmsub<mode>4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, <MODE>mode))
+    FAIL;
 })
 
 ;; For the scalar operations, use operand1 for the upper words that aren't
@@ -10346,18 +10318,14 @@
 	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
 		    (match_operand:V8HI 2 "nonimmediate_operand" ""))
 	 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
-  "TARGET_XOP
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+  "TARGET_XOP"
+  [(set (match_dup 0)
+        (plus:V8HI
+         (mult:V8HI (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, V8HImode);
-  emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2],
-			      operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, V8HImode))
+    FAIL;
 })
 
 (define_insn "xop_pmacssww"
@@ -10394,18 +10362,14 @@
 	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
 		    (match_operand:V4SI 2 "nonimmediate_operand" ""))
 	 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
-  "TARGET_XOP
-   && MEM_P (operands[2])
-   && (MEM_P (operands[1]) || MEM_P (operands[3]))
-   && !reg_mentioned_p (operands[0], operands[1])
-   && !reg_mentioned_p (operands[0], operands[2])
-   && !reg_mentioned_p (operands[0], operands[3])"
-  [(const_int 0)]
+  "TARGET_XOP"
+  [(set (match_dup 0)
+        (plus:V4SI
+         (mult:V4SI (match_dup 1) (match_dup 2))
+         (match_dup 3)))]
 {
-  ix86_expand_fma4_multiple_memory (operands, V4SImode);
-  emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2],
-			      operands[3]));
-  DONE;
+  if (!ix86_expand_fma4_multiple_memory (operands, V4SImode))
+    FAIL;
 })
 
 (define_insn "xop_pmacssdd"
-- 
1.6.0.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]