This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Vector permutation support for x86


On Mon, Dec 7, 2009 at 13:08, Richard Henderson <rth@redhat.com> wrote:
>> -;; Note the instruction does not allow the value being added to be a
>> memory
>> -;; operation. ÂHowever by pretending via the nonimmediate_operand
>> predicate
>> -;; that it does and splitting it later allows the following to be
>> recognized:
>> -;; Â Â a[i] = b[i] * c[i] + d[i];
>> Â(define_insn "xop_pmacsww"
>> Â [(set (match_operand:V8HI 0 "register_operand" "=x")
>> Â Â Â Â (plus:V8HI
>> Â Â Â Â (mult:V8HI
>> - Â Â Â Â (match_operand:V8HI 1 "register_operand" "%x")
>> + Â Â Â Â (match_operand:V8HI 1 "nonimmediate_operand" "%x")
>> Â Â Â Â Â(match_operand:V8HI 2 "nonimmediate_operand" "xm"))
>> - Â Â Â Â(match_operand:V8HI 3 "register_operand" "x")))]
>> + Â Â Â Â(match_operand:V8HI 3 "nonimmediate_operand" "x")))]
>
> I think the comment is still valuable, minus the subclause about splitting.
> ÂBecause otherwise I would question the use of nonimmediate_operand in op3.
>
> However, I do not believe that the same applies to any of the
> non-multiply-add patterns. ÂE.g.
>
>> Â(define_insn "xop_pperm"
>> Â [(set (match_operand:V16QI 0 "register_operand" "=x,x")
>> Â Â Â Â(unspec:V16QI
>> - Â Â Â Â [(match_operand:V16QI 1 "register_operand" "x,x")
>> + Â Â Â Â [(match_operand:V16QI 1 "nonimmediate_operand" "x,x")
>
> There's really no reason to accept a memory operand here, AFAICS.
> Similarly with all of the patterns that follow.

Fixed like this.

	* config/i386/i386-protos.h (ix86_expand_fma4_multiple_memory):
	Removed.
	* config/i386/i386.c (ix86_expand_fma4_multiple_memory): Removed.
	* config/i386/sse.md: Remove all XOP splitters.
	Allow the second and fourth operands of XOP multiply-add insns
	to be nonimmediate.

Sebastian
From b1381c4d97b1e5c4705f8ba74808666782035d25 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Mon, 7 Dec 2009 11:38:28 -0600
Subject: [PATCH] Remove XOP splitters.

	* config/i386/i386-protos.h (ix86_expand_fma4_multiple_memory):
	Removed.
	* config/i386/i386.c (ix86_expand_fma4_multiple_memory): Removed.
	* config/i386/sse.md: Remove all XOP splitters.
	Allow the second and fourth operands of XOP multiply-add insns
	to be nonimmediate.
---
 gcc/config/i386/i386-protos.h |    2 -
 gcc/config/i386/i386.c        |   30 --------
 gcc/config/i386/sse.md        |  163 +++++++----------------------------------
 3 files changed, 27 insertions(+), 168 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index cf29cc7..aa2ccd7 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -218,8 +218,6 @@ extern void ix86_expand_vector_set (bool, rtx, rtx, int);
 extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
 extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
 
-extern bool ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode);
-
 extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned);
 
 /* In i386-c.c  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6cd9d7d..7cafdf6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28808,36 +28808,6 @@ ix86_expand_round (rtx operand0, rtx operand1)
 }
 
 
-/* Fixup an FMA4 or XOP instruction that has 2 memory input references
-   into a form the hardware will allow by using the destination
-   register to load one of the memory operations.  Presently this is
-   used by the multiply/add routines to allow 2 memory references.  */
-
-bool
-ix86_expand_fma4_multiple_memory (rtx operands[],
-				  enum machine_mode mode)
-{
-  rtx scratch = operands[0];
-
-  gcc_assert (register_operand (operands[0], mode));
-  gcc_assert (register_operand (operands[1], mode));
-  gcc_assert (MEM_P (operands[2]) && MEM_P (operands[3]));
-
-  if (reg_mentioned_p (scratch, operands[1]))
-    {
-      if (!can_create_pseudo_p ())
-	return false;
-      scratch = gen_reg_rtx (mode);
-    }
-
-  emit_move_insn (scratch, operands[3]);
-  if (rtx_equal_p (operands[2], operands[3]))
-    operands[2] = operands[3] = scratch;
-  else
-    operands[3] = scratch;
-  return true;
-}
-
 /* Table of valid machine attributes.  */
 static const struct attribute_spec ix86_attribute_table[] =
 {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4e409c6..db06078 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10132,89 +10132,50 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; XOP parallel integer multiply/add instructions.
-;; Note the instruction does not allow the value being added to be a memory
-;; operation.  However by pretending via the nonimmediate_operand predicate
-;; that it does and splitting it later allows the following to be recognized:
-;;	a[i] = b[i] * c[i] + d[i];
+;; Note the XOP multiply/add instructions
+;;     a[i] = b[i] * c[i] + d[i];
+;; do not allow the value being added to be a memory operation.
 (define_insn "xop_pmacsww"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
         (plus:V8HI
 	 (mult:V8HI
-	  (match_operand:V8HI 1 "register_operand" "%x")
+	  (match_operand:V8HI 1 "nonimmediate_operand" "%x")
 	  (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
-	 (match_operand:V8HI 3 "register_operand" "x")))]
+	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-;; Split pmacsww with two memory operands into a load and the pmacsww.
-(define_split
-  [(set (match_operand:V8HI 0 "register_operand" "")
-	(plus:V8HI
-	 (mult:V8HI (match_operand:V8HI 1 "register_operand" "")
-		    (match_operand:V8HI 2 "memory_operand" ""))
-	 (match_operand:V8HI 3 "memory_operand" "")))]
-  "TARGET_XOP"
-  [(set (match_dup 0)
-        (plus:V8HI
-         (mult:V8HI (match_dup 1) (match_dup 2))
-         (match_dup 3)))]
-{
-  if (!ix86_expand_fma4_multiple_memory (operands, V8HImode))
-    FAIL;
-})
-
 (define_insn "xop_pmacssww"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
         (ss_plus:V8HI
-	 (mult:V8HI (match_operand:V8HI 1 "register_operand" "%x")
+	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
 		    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
-	 (match_operand:V8HI 3 "register_operand" "x")))]
+	 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-;; Note the instruction does not allow the value being added to be a memory
-;; operation.  However by pretending via the nonimmediate_operand predicate
-;; that it does and splitting it later allows the following to be recognized:
-;;	a[i] = b[i] * c[i] + d[i];
 (define_insn "xop_pmacsdd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
         (plus:V4SI
 	 (mult:V4SI
-	  (match_operand:V4SI 1 "register_operand" "%x")
+	  (match_operand:V4SI 1 "nonimmediate_operand" "%x")
 	  (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
-	 (match_operand:V4SI 3 "register_operand" "x")))]
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-;; Split pmacsdd with two memory operands into a load and the pmacsdd.
-(define_split
-  [(set (match_operand:V4SI 0 "register_operand" "")
-	(plus:V4SI
-	 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
-		    (match_operand:V4SI 2 "memory_operand" ""))
-	 (match_operand:V4SI 3 "memory_operand" "")))]
-  "TARGET_XOP"
-  [(set (match_dup 0)
-        (plus:V4SI
-         (mult:V4SI (match_dup 1) (match_dup 2))
-         (match_dup 3)))]
-{
-  if (!ix86_expand_fma4_multiple_memory (operands, V4SImode))
-    FAIL;
-})
-
 (define_insn "xop_pmacssdd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
         (ss_plus:V4SI
-	 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
+	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
 		    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
-	 (match_operand:V4SI 3 "register_operand" "x")))]
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
@@ -10226,14 +10187,14 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "register_operand" "%x")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	  (vec_select:V2SI
 	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	   (parallel [(const_int 1)
 		      (const_int 3)])))
-	 (match_operand:V2DI 3 "register_operand" "x")))]
+	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
@@ -10245,7 +10206,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "register_operand" "%x")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
 	    (parallel [(const_int 0)
 		       (const_int 2)])))
 	  (sign_extend:V2DI
@@ -10253,7 +10214,7 @@
 	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)
 		       (const_int 2)]))))
-	 (match_operand:V2DI 3 "register_operand" "x")))]
+	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
@@ -10265,7 +10226,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "register_operand" "%x")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	  (sign_extend:V2DI
@@ -10273,47 +10234,12 @@
 	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 1)
 		       (const_int 3)]))))
-	 (match_operand:V2DI 3 "register_operand" "x")))]
+	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "*xop_pmacsdql_mem"
-  [(set (match_operand:V2DI 0 "register_operand" "=&x")
-	(plus:V2DI
-	 (mult:V2DI
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "register_operand" "%x")
-	    (parallel [(const_int 1)
-		       (const_int 3)])))
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 1)
-		       (const_int 3)]))))
-	 (match_operand:V2DI 3 "memory_operand" "m")))]
-  "TARGET_XOP"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(match_dup 3))
-   (set (match_dup 0)
-	(plus:V2DI
-	 (mult:V2DI
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_dup 1)
-	    (parallel [(const_int 1)
-		       (const_int 3)])))
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_dup 2)
-	    (parallel [(const_int 1)
-		       (const_int 3)]))))
-	 (match_dup 0)))])
-
 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
 ;; fake it with a multiply/add.  In general, we expect the define_split to
 ;; occur before register allocation, so we have to handle the corner case where
@@ -10362,7 +10288,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "register_operand" "%x")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
 	    (parallel [(const_int 0)
 		       (const_int 2)])))
 	  (sign_extend:V2DI
@@ -10370,47 +10296,12 @@
 	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)
 		       (const_int 2)]))))
-	 (match_operand:V2DI 3 "register_operand" "x")))]
+	 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "*xop_pmacsdqh_mem"
-  [(set (match_operand:V2DI 0 "register_operand" "=&x")
-	(plus:V2DI
-	 (mult:V2DI
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "register_operand" "%x")
-	    (parallel [(const_int 0)
-		       (const_int 2)])))
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-	    (parallel [(const_int 0)
-		       (const_int 2)]))))
-	 (match_operand:V2DI 3 "memory_operand" "m")))]
-  "TARGET_XOP"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(match_dup 3))
-   (set (match_dup 0)
-	(plus:V2DI
-	 (mult:V2DI
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_dup 1)
-	    (parallel [(const_int 0)
-		       (const_int 2)])))
-	  (sign_extend:V2DI
-	   (vec_select:V2SI
-	    (match_dup 2)
-	    (parallel [(const_int 0)
-		       (const_int 2)]))))
-	 (match_dup 0)))])
-
 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
 ;; fake it with a multiply/add.  In general, we expect the define_split to
 ;; occur before register allocation, so we have to handle the corner case where
@@ -10460,7 +10351,7 @@
 	 (mult:V4SI
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
-	    (match_operand:V8HI 1 "register_operand" "%x")
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)
 		       (const_int 5)
@@ -10472,7 +10363,7 @@
 		       (const_int 3)
 		       (const_int 5)
 		       (const_int 7)]))))
-	 (match_operand:V4SI 3 "register_operand" "x")))]
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
@@ -10484,7 +10375,7 @@
 	 (mult:V4SI
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
-	    (match_operand:V8HI 1 "register_operand" "%x")
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
 	    (parallel [(const_int 1)
 		       (const_int 3)
 		       (const_int 5)
@@ -10496,7 +10387,7 @@
 		       (const_int 3)
 		       (const_int 5)
 		       (const_int 7)]))))
-	 (match_operand:V4SI 3 "register_operand" "x")))]
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
@@ -10509,7 +10400,7 @@
 	  (mult:V4SI
 	   (sign_extend:V4SI
 	    (vec_select:V4HI
-	     (match_operand:V8HI 1 "register_operand" "%x")
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
 	     (parallel [(const_int 0)
 			(const_int 2)
 			(const_int 4)
@@ -10536,7 +10427,7 @@
 			(const_int 3)
 			(const_int 5)
 			(const_int 7)])))))
-	 (match_operand:V4SI 3 "register_operand" "x")))]
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
@@ -10549,7 +10440,7 @@
 	  (mult:V4SI
 	   (sign_extend:V4SI
 	    (vec_select:V4HI
-	     (match_operand:V8HI 1 "register_operand" "%x")
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
 	     (parallel [(const_int 0)
 			(const_int 2)
 			(const_int 4)
@@ -10576,7 +10467,7 @@
 			(const_int 3)
 			(const_int 5)
 			(const_int 7)])))))
-	 (match_operand:V4SI 3 "register_operand" "x")))]
+	 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
   "TARGET_XOP"
   "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
-- 
1.6.0.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]