This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix SSE5 pperm/perm* constraints and check_effective_target_sse5


Hi!

gcc.target/i386/sse5-permpX.c testcase fails to assemble, generates
e.g.
permpd  %xmm1, %xmm0, src1(%rip), %xmm0
which is invalid, because destination must match either first or third
source operand (and gas segfaults on it after issuing an error, patch
for that posted to binutils ml).

The SSE5 docs in Intel syntax say:

PPERM xmm1, xmm1, xmm2, xmm3/mem128 0F 24 23 /r /drex0 For each byte position of the 16-
                                                       byte result, uses corresponding    
PPERM xmm1, xmm1, xmm3/mem128, xmm2 0F 24 23 /r /drex1 control byte in fourth operand to  
                                                       perform logical operation on one of
PPERM xmm1, xmm2, xmm3/mem128, xmm1 0F 24 27 /r /drex0 32 bytes from the second and third
                                                       source operands and writes result
PPERM xmm1, xmm3/mem128, xmm2, xmm1 0F 24 27 /r /drex1 in destination (xmm1 register).

PERMPD xmm1, xmm1, xmm2, xmm3/mem128 0F 24 21 /r /drex0 For each double-precision result,
                                                        uses corresponding control byte
PERMPD xmm1, xmm1, xmm3/mem128, xmm2 0F 24 21 /r /drex1 in the fourth operand to perform
                                                        an operation on one of 4 double-
PERMPD xmm1, xmm2, xmm3/mem128, xmm1 0F 24 25 /r /drex0 precision operands from the
                                                        second and third source operands
PERMPD xmm1, xmm3/mem128, xmm2, xmm1 0F 24 25 /r /drex1 and writes result in destination
                                                        (xmm1 register).

so destination must be the same as src1 or src3.  The various fmadd*
etc. constraints in sse.md honor this, but the third alternative for pperm/perm
insns does not - it has a dup of destination in src2, reg or memory in src1 and
reg in src3.  The following patch fixes that, though I don't have any hw to actually
test it at runtime, but at least all tests in make check-gcc RUNTESTFLAGS=i386.exp
now assemble.

The other fix is for the check_effective_target_sse5 tcl test,
which fails (__v2di isn't compatible with __v8hi) and so all SSE5 runtime tests
are UNSUPPORTED even when assembler supports SSE5.

Ok for trunk?

2007-12-29  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (sse5_pperm, sse5_pperm_pack_v2di_v4si,
	sse5_pperm_pack_v4si_v8hi, sse5_pperm_pack_v8hi_v16qi,
	sse5_perm<mode>): Fix constraints.

	* gcc.target/i386/i386.exp (check_effective_target_sse5): Use __v8hi
	rather than __v2di type.

--- gcc/config/i386/sse.md.jj	2007-12-29 20:58:15.000000000 +0100
+++ gcc/config/i386/sse.md	2007-12-29 21:12:49.000000000 +0100
@@ -8350,13 +8350,13 @@
   [(set_attr "type" "sseiadd1")])
 
 ;; SSE5 permute instructions
 (define_insn "sse5_pperm"
   [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
-	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,xm,xm")
-		       (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,0,x")
-		       (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")]
+	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
+		       (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
+		       (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
 		     UNSPEC_SSE5_PERMUTE))]
   "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
@@ -8453,52 +8453,52 @@
 ;; SSE5 pack instructions that combine two vectors into a smaller vector
 (define_insn "sse5_pperm_pack_v2di_v4si"
   [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
 	(vec_concat:V4SI
 	 (truncate:V2SI
-	  (match_operand:V2DI 1 "nonimmediate_operand" "0,0,xm,xm"))
+	  (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
 	 (truncate:V2SI
-	  (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,0,x"))))
-   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
+	  (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
   "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
 
 (define_insn "sse5_pperm_pack_v4si_v8hi"
   [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
 	(vec_concat:V8HI
 	 (truncate:V4HI
-	  (match_operand:V4SI 1 "nonimmediate_operand" "0,0,xm,xm"))
+	  (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
 	 (truncate:V4HI
-	  (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,0,x"))))
-   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
+	  (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
   "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
 
 (define_insn "sse5_pperm_pack_v8hi_v16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
 	(vec_concat:V16QI
 	 (truncate:V8QI
-	  (match_operand:V8HI 1 "nonimmediate_operand" "0,0,xm,xm"))
+	  (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
 	 (truncate:V8QI
-	  (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,0,x"))))
-   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
+	  (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
   "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
   "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "TI")])
 
 ;; Floating point permutation (permps, permpd)
 (define_insn "sse5_perm<mode>"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
 	(unspec:SSEMODEF2P
-	 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,xm,xm")
-	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,0,x")
-	  (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")]
+	 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
+	  (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
 	 UNSPEC_SSE5_PERMUTE))]
   "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
   "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "<MODE>")])
--- gcc/testsuite/gcc.target/i386/i386.exp.jj	2007-09-14 11:54:26.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/i386.exp	2007-12-29 21:42:08.000000000 +0100
@@ -68,13 +68,13 @@ proc check_effective_target_sse4a { } {
 proc check_effective_target_sse5 { } {
     return [check_no_compiler_messages sse5 object {
 	typedef long long __m128i __attribute__ ((__vector_size__ (16)));
-	typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+	typedef short __v8hi __attribute__ ((__vector_size__ (16)));
 
 	__m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
 	{
-	    return (__m128i) __builtin_ia32_pmacssww ((__v2di)__A,
-						      (__v2di)__B,
-						      (__v2di)__C);
+	    return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,
+						      (__v8hi)__B,
+						      (__v8hi)__C);
 	}
     } "-O2 -msse5" ]
 }

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]