SSE5 patches part 2

Jan Hubicka hubicka@ucw.cz
Wed Jun 18 18:33:00 GMT 2008


> + ;; Note the instruction does not allow the value being added to be a memory
> + ;; operation.  However by pretending via the nonimmediate_operand predicate
> + ;; that it does and splitting it later allows the following to be recognized:
> + ;;	a[i] = b[i] * c[i] + d[i];
> + (define_insn "*sse5_pmacsww_vector"
> +   [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
> +         (plus:V8HI (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
> + 			      (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
> + 		   (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
> +   "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false)"

Hi,
with SSA at O0 patch, I am running into ICE with sse-14.c testcase.
Expanding:

;; D.5719 = __builtin_ia32_pmacssww (VIEW_CONVERT_EXPR<vector short int>(__A), VIEW_CONVERT_EXPR<vector short int>(__B), VIEW_CONVERT_EXPR<vector short int>(__C))
(insn 8 7 9 ./include/bmmintrin.h:139 (set (reg:V8HI 62)
        (mem/c/i:V8HI (plus:DI (reg/f:DI 54 virtual-stack-vars)
                (const_int -32 [0xffffffffffffffe0])) [0 __B+0 S16 A128])) -1 (nil))

(insn 9 8 10 ./include/bmmintrin.h:139 (set (reg:V8HI 63)
        (mem/c/i:V8HI (plus:DI (reg/f:DI 54 virtual-stack-vars)
                (const_int -48 [0xffffffffffffffd0])) [0 __C+0 S16 A128])) -1 (nil))

(insn 10 9 11 ./include/bmmintrin.h:139 (set (reg:V8HI 61)
        (ss_plus:V8HI (mult:V8HI (mem/c/i:V8HI (plus:DI (reg/f:DI 54 virtual-stack-vars)
                        (const_int -16 [0xfffffffffffffff0])) [0 __A+0 S16 A128])
                (reg:V8HI 62))
            (reg:V8HI 63))) -1 (nil))

This instruction is refused by ix86_sse5_valid_op_p because the pmacsww
instruction has only variant with second operand being in memory.  I've
fixed it by adding ix86_sse5_valid_op_p extra operand specifying the
communtativness of operation.  There turned out to be number of various
issues with constraints, operands and such across sse.md while I was
updating the patterns.  I think whole SSE5 stuff needs re-review for
consistency.  Some obviously non-commutative things (like sub) was
marked commutative, some commutative miss commutative markers, some
constraints alloving only registers are predicted by
nonimmediate_operand and vice versa.

It is also bit overdesigned to use '%' in addition to explicit symmetric
constraints and mutiple output templates, but SSE expansion code is not
able to recognize those properly, so I suppose this is easiest way
around.

If there are no complains, I will commit this after regtest/bootstrap
finishes tomorrow.  I already tested SSE testsuite that should hit most
of problems with this patch. (except for execution testing that would be
nice)

Honza

	* i386.md (sse5 cmov pattern): Update call of ix86_sse5_valid_op_p
	* sse.md (sse5 patterns): Update call of ix86_sse5_valid_op_p;
	fix predicates and constraints.
	* i386.c (ix86_sse5_valid_op_p): Add commutative parameter.
	* i386-protos.h (ix86_sse5_valid_op_p): Update declaration.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 136893)
+++ config/i386/i386.md	(working copy)
@@ -19513,7 +19513,7 @@
 	  (match_operand:MODEF 1 "register_operand" "x,0")
 	  (match_operand:MODEF 2 "register_operand" "0,x")
 	  (match_operand:MODEF 3 "register_operand" "x,x")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
   [(set_attr "type" "sse4arg")])
 
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 136893)
+++ config/i386/sse.md	(working copy)
@@ -868,7 +868,7 @@
 	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
 	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
   "TARGET_SSE5 && TARGET_FUSED_MADD
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
   "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -882,8 +882,8 @@
 	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
 	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
   "TARGET_SSE5
-   && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+   && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
    && !reg_mentioned_p (operands[0], operands[1])
    && !reg_mentioned_p (operands[0], operands[2])
    && !reg_mentioned_p (operands[0], operands[3])"
@@ -909,7 +909,7 @@
 	 (match_dup 1)
 	 (const_int 1)))]
   "TARGET_SSE5 && TARGET_FUSED_MADD
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
   "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -924,7 +924,7 @@
 	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
 	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
   "TARGET_SSE5 && TARGET_FUSED_MADD
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
   "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -938,8 +938,8 @@
 	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
 	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
   "TARGET_SSE5
-   && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+   && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
    && !reg_mentioned_p (operands[0], operands[1])
    && !reg_mentioned_p (operands[0], operands[2])
    && !reg_mentioned_p (operands[0], operands[3])"
@@ -965,7 +965,7 @@
 	 (match_dup 1)
 	 (const_int 1)))]
   "TARGET_SSE5 && TARGET_FUSED_MADD
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -982,7 +982,7 @@
 	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
 	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
   "TARGET_SSE5 && TARGET_FUSED_MADD
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
   "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -996,8 +996,8 @@
 	  (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
 	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
   "TARGET_SSE5
-   && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+   && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
    && !reg_mentioned_p (operands[0], operands[1])
    && !reg_mentioned_p (operands[0], operands[2])
    && !reg_mentioned_p (operands[0], operands[3])"
@@ -1023,7 +1023,7 @@
 	 (match_dup 1)
 	 (const_int 1)))]
   "TARGET_SSE5 && TARGET_FUSED_MADD
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
   "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1040,7 +1040,7 @@
 	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
 	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
   "TARGET_SSE5 && TARGET_FUSED_MADD
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
   "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1055,8 +1055,8 @@
 	  (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
 	 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
   "TARGET_SSE5
-   && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1)
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)
+   && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
    && !reg_mentioned_p (operands[0], operands[1])
    && !reg_mentioned_p (operands[0], operands[2])
    && !reg_mentioned_p (operands[0], operands[3])"
@@ -1083,7 +1083,7 @@
 	 (match_dup 1)
 	 (const_int 1)))]
   "TARGET_SSE5 && TARGET_FUSED_MADD
-   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)"
+   && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
   "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1121,7 +1121,7 @@
 	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
 	   (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
 	 UNSPEC_SSE5_INTRINSIC))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
   "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1151,11 +1151,11 @@
 	(unspec:SSEMODEF2P
 	 [(minus:SSEMODEF2P
 	   (mult:SSEMODEF2P
-	    (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm")
+	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
 	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
 	   (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
 	 UNSPEC_SSE5_INTRINSIC))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
   "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1191,7 +1191,7 @@
 	    (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
 	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
 	 UNSPEC_SSE5_INTRINSIC))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
   "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1224,11 +1224,11 @@
 	 [(minus:SSEMODEF2P
 	   (mult:SSEMODEF2P
 	    (neg:SSEMODEF2P
-	     (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm"))
+	     (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
 	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
 	   (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
 	 UNSPEC_SSE5_INTRINSIC))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
@@ -1271,7 +1271,7 @@
 	   (match_dup 0)
 	   (const_int 0))]
 	 UNSPEC_SSE5_INTRINSIC))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<ssescalarmode>")])
@@ -1305,13 +1305,13 @@
 	 [(vec_merge:SSEMODEF2P
 	   (minus:SSEMODEF2P
 	    (mult:SSEMODEF2P
-	     (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+	     (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
 	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
 	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
 	   (match_dup 1)
 	   (const_int 1))]
 	 UNSPEC_SSE5_INTRINSIC))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<ssescalarmode>")])
@@ -1347,12 +1347,12 @@
 	   (minus:SSEMODEF2P
 	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
 	    (mult:SSEMODEF2P
-	     (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+	     (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
 	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
 	   (match_dup 1)
 	   (const_int 1))]
 	 UNSPEC_SSE5_INTRINSIC))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
   "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<ssescalarmode>")])
@@ -1388,13 +1388,13 @@
 	   (minus:SSEMODEF2P
 	    (mult:SSEMODEF2P
 	     (neg:SSEMODEF2P
-	      (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
+	      (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
 	     (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
 	    (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
 	   (match_dup 1)
 	   (const_int 1))]
 	 UNSPEC_SSE5_INTRINSIC))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<ssescalarmode>")])
@@ -6842,10 +6842,10 @@
   [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
         (plus:V8HI
 	 (mult:V8HI
-	  (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
-	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
-	 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
+	  (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
+	 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
   "@
    pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -6861,8 +6861,8 @@
 		    (match_operand:V8HI 2 "nonimmediate_operand" ""))
 	 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
   "TARGET_SSE5
-   && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
-   && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
+   && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
+   && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
    && !reg_mentioned_p (operands[0], operands[1])
    && !reg_mentioned_p (operands[0], operands[2])
    && !reg_mentioned_p (operands[0], operands[3])"
@@ -6879,8 +6879,8 @@
         (ss_plus:V8HI
 	 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
 		    (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
-	 (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+	 (match_operand:V8HI 3 "register_operand" "0,0,0")))]
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -6898,8 +6898,8 @@
 	 (mult:V4SI
 	  (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
 	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
-	 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)"
+	 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
   "@
    pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -6915,8 +6915,8 @@
 		    (match_operand:V4SI 2 "nonimmediate_operand" ""))
 	 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
   "TARGET_SSE5
-   && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1)
-   && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)
+   && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
+   && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
    && !reg_mentioned_p (operands[0], operands[1])
    && !reg_mentioned_p (operands[0], operands[2])
    && !reg_mentioned_p (operands[0], operands[3])"
@@ -6933,8 +6933,8 @@
         (ss_plus:V4SI
 	 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
 		    (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
-	 (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+	 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -6948,7 +6948,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	   (vec_select:V2SI
@@ -6956,7 +6956,7 @@
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -6970,7 +6970,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
 	    (parallel [(const_int 0)
 		       (const_int 2)])))
 	  (sign_extend:V2DI
@@ -6979,7 +6979,7 @@
 	    (parallel [(const_int 0)
 		       (const_int 2)]))))
 	 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -6993,7 +6993,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	  (sign_extend:V2DI
@@ -7002,7 +7002,7 @@
 	    (parallel [(const_int 1)
 		       (const_int 3)]))))
 	 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -7016,7 +7016,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
 	    (parallel [(const_int 1)
 		       (const_int 3)])))
 	  (sign_extend:V2DI
@@ -7025,7 +7025,7 @@
 	    (parallel [(const_int 1)
 		       (const_int 3)]))))
 	 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
   "#"
   "&& (reload_completed
        || (!reg_mentioned_p (operands[0], operands[1])
@@ -7097,7 +7097,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
 	    (parallel [(const_int 0)
 		       (const_int 2)])))
 	  (sign_extend:V2DI
@@ -7106,7 +7106,7 @@
 	    (parallel [(const_int 0)
 		       (const_int 2)]))))
 	 (match_operand:V2DI 3 "register_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -7120,7 +7120,7 @@
 	 (mult:V2DI
 	  (sign_extend:V2DI
 	   (vec_select:V2SI
-	    (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")
+	    (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
 	    (parallel [(const_int 0)
 		       (const_int 2)])))
 	  (sign_extend:V2DI
@@ -7129,7 +7129,7 @@
 	    (parallel [(const_int 0)
 		       (const_int 2)]))))
 	 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
   "#"
   "&& (reload_completed
        || (!reg_mentioned_p (operands[0], operands[1])
@@ -7202,7 +7202,7 @@
 	 (mult:V4SI
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
-	    (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
 	    (parallel [(const_int 1)
 		       (const_int 3)
 		       (const_int 5)
@@ -7215,7 +7215,7 @@
 		       (const_int 5)
 		       (const_int 7)]))))
 	 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -7229,7 +7229,7 @@
 	 (mult:V4SI
 	  (sign_extend:V4SI
 	   (vec_select:V4HI
-	    (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
+	    (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
 	    (parallel [(const_int 1)
 		       (const_int 3)
 		       (const_int 5)
@@ -7242,7 +7242,7 @@
 		       (const_int 5)
 		       (const_int 7)]))))
 	 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -7257,7 +7257,7 @@
 	  (mult:V4SI
 	   (sign_extend:V4SI
 	    (vec_select:V4HI
-	     (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
 	     (parallel [(const_int 0)
 			(const_int 2)
 			(const_int 4)
@@ -7285,7 +7285,7 @@
 			(const_int 5)
 			(const_int 7)])))))
 	 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -7300,7 +7300,7 @@
 	  (mult:V4SI
 	   (sign_extend:V4SI
 	    (vec_select:V4HI
-	     (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")
+	     (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
 	     (parallel [(const_int 0)
 			(const_int 2)
 			(const_int 4)
@@ -7328,7 +7328,7 @@
 			(const_int 5)
 			(const_int 7)])))))
 	 (match_operand:V4SI 3 "register_operand" "0,0,0")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
   "@
    pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -7343,7 +7343,7 @@
 	  (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
 	  (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
 	  (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "@
    pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
    pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
@@ -7799,7 +7799,7 @@
 	   (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
 	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
 	  UNSPEC_SSE5_PERMUTE))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
@@ -7902,7 +7902,7 @@
 	 (truncate:V2SI
 	  (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
    (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
@@ -7915,7 +7915,7 @@
 	 (truncate:V4HI
 	  (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
    (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
@@ -7928,7 +7928,7 @@
 	 (truncate:V8QI
 	  (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
    (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
@@ -7941,7 +7941,7 @@
 	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
 	  (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
 	 UNSPEC_SSE5_PERMUTE))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "<MODE>")])
@@ -8068,7 +8068,7 @@
 	 (rotatert:SSEMODE1248
 	  (match_dup 1)
 	  (neg:SSEMODE1248 (match_dup 2)))))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
   "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
    (set_attr "mode" "TI")])
@@ -8121,7 +8121,7 @@
 	 (ashiftrt:SSEMODE1248
 	  (match_dup 1)
 	  (neg:SSEMODE1248 (match_dup 2)))))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
   "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
    (set_attr "mode" "TI")])
@@ -8138,7 +8138,7 @@
 	 (lshiftrt:SSEMODE1248
 	  (match_dup 1)
 	  (neg:SSEMODE1248 (match_dup 2)))))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)"
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
   "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
    (set_attr "mode" "TI")])
Index: config/i386/i386-protos.h
===================================================================
--- config/i386/i386-protos.h	(revision 136893)
+++ config/i386/i386-protos.h	(working copy)
@@ -209,7 +209,7 @@
 extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
 extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
 
-extern bool ix86_sse5_valid_op_p (rtx [], rtx, int, bool, int);
+extern bool ix86_sse5_valid_op_p (rtx [], rtx, int, bool, int, bool);
 extern void ix86_expand_sse5_multiple_memory (rtx [], int, enum machine_mode);
 
 /* In winnt.c  */
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 136893)
+++ config/i386/i386.c	(working copy)
@@ -25643,11 +25643,12 @@
    OPERANDS is the array of operands.
    NUM is the number of operands.
    USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
-   NUM_MEMORY is the maximum number of memory operands to accept.  */
+   NUM_MEMORY is the maximum number of memory operands to accept.  
+   when COMMUTATIVE is set, operand 1 and 2 can be swapped.  */
 
 bool
 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
-		      bool uses_oc0, int num_memory)
+		      bool uses_oc0, int num_memory, bool commutative)
 {
   int mem_mask;
   int mem_count;
@@ -25731,6 +25732,8 @@
 
       /* format, example pmacsdd:
 	 xmm1, xmm2, xmm3/mem, xmm1 */
+      if (commutative)
+	return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
       else
 	return (mem_mask == (1 << 2));
     }
@@ -25765,6 +25768,8 @@
 
          For the integer multiply/add instructions be more restrictive and
          require operands[2] and operands[3] to be the memory operands.  */
+      if (commutative)
+	return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
       else
 	return (mem_mask == ((1 << 2) | (1 << 3)));
     }



More information about the Gcc-patches mailing list