This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix SSE5 pperm/perm* constraints and check_effective_target_sse5
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Michael Meissner <michael dot meissner at amd dot com>, Uros Bizjak <ubizjak at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Sat, 29 Dec 2007 16:07:06 -0500
- Subject: [PATCH] Fix SSE5 pperm/perm* constraints and check_effective_target_sse5
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
gcc.target/i386/sse5-permpX.c testcase fails to assemble, generates
e.g.
permpd %xmm1, %xmm0, src1(%rip), %xmm0
which is invalid, because destination must match either first or third
source operand (and gas segfaults on it after issuing an error, patch
for that posted to binutils ml).
The SSE5 docs in Intel syntax say:
PPERM xmm1, xmm1, xmm2, xmm3/mem128 0F 24 23 /r /drex0 For each byte position of the 16-
byte result, uses corresponding
PPERM xmm1, xmm1, xmm3/mem128, xmm2 0F 24 23 /r /drex1 control byte in fourth operand to
perform logical operation on one of
PPERM xmm1, xmm2, xmm3/mem128, xmm1 0F 24 27 /r /drex0 32 bytes from the second and third
source operands and writes result
PPERM xmm1, xmm3/mem128, xmm2, xmm1 0F 24 27 /r /drex1 in destination (xmm1 register).
PERMPD xmm1, xmm1, xmm2, xmm3/mem128 0F 24 21 /r /drex0 For each double-precision result,
uses corresponding control byte
PERMPD xmm1, xmm1, xmm3/mem128, xmm2 0F 24 21 /r /drex1 in the fourth operand to perform
an operation on one of 4 double-
PERMPD xmm1, xmm2, xmm3/mem128, xmm1 0F 24 25 /r /drex0 precision operands from the
second and third source operands
PERMPD xmm1, xmm3/mem128, xmm2, xmm1 0F 24 25 /r /drex1 and writes result in destination
(xmm1 register).
so destination must be the same as src1 or src3. The various fmadd*
etc. constraints in sse.md honor this, but the third alternative for pperm/perm
insns does not - it has a dup of destination in src2, reg or memory in src1 and
reg in src3. The following patch fixes that, though I don't have any hw to actually
test it at runtime, but at least all tests in make check-gcc RUNTESTFLAGS=i386.exp
now assemble.
The other fix is for the check_effective_target_sse5 tcl test,
which fails (__v2di isn't compatible with __v8hi) and so all SSE5 runtime tests
are UNSUPPORTED even when assembler supports SSE5.
Ok for trunk?
2007-12-29 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (sse5_pperm, sse5_pperm_pack_v2di_v4si,
sse5_pperm_pack_v4si_v8hi, sse5_pperm_pack_v8hi_v16qi,
sse5_perm<mode>): Fix constraints.
* gcc.target/i386/i386.exp (check_effective_target_sse5): Use __v8hi
rather than __v2di type.
--- gcc/config/i386/sse.md.jj 2007-12-29 20:58:15.000000000 +0100
+++ gcc/config/i386/sse.md 2007-12-29 21:12:49.000000000 +0100
@@ -8350,13 +8350,13 @@
[(set_attr "type" "sseiadd1")])
;; SSE5 permute instructions
(define_insn "sse5_pperm"
[(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
- (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,xm,xm")
- (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,0,x")
- (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")]
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
UNSPEC_SSE5_PERMUTE))]
"TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
"pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")
(set_attr "mode" "TI")])
@@ -8453,52 +8453,52 @@
;; SSE5 pack instructions that combine two vectors into a smaller vector
(define_insn "sse5_pperm_pack_v2di_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
(vec_concat:V4SI
(truncate:V2SI
- (match_operand:V2DI 1 "nonimmediate_operand" "0,0,xm,xm"))
+ (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
(truncate:V2SI
- (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,0,x"))))
- (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
+ (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
"TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
"pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")
(set_attr "mode" "TI")])
(define_insn "sse5_pperm_pack_v4si_v8hi"
[(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
(vec_concat:V8HI
(truncate:V4HI
- (match_operand:V4SI 1 "nonimmediate_operand" "0,0,xm,xm"))
+ (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
(truncate:V4HI
- (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,0,x"))))
- (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
"TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
"pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")
(set_attr "mode" "TI")])
(define_insn "sse5_pperm_pack_v8hi_v16qi"
[(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
(vec_concat:V16QI
(truncate:V8QI
- (match_operand:V8HI 1 "nonimmediate_operand" "0,0,xm,xm"))
+ (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
(truncate:V8QI
- (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,0,x"))))
- (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))]
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
"TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
"pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")
(set_attr "mode" "TI")])
;; Floating point permutation (permps, permpd)
(define_insn "sse5_perm<mode>"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
(unspec:SSEMODEF2P
- [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,xm,xm")
- (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,0,x")
- (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")]
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
UNSPEC_SSE5_PERMUTE))]
"TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
"perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")
(set_attr "mode" "<MODE>")])
--- gcc/testsuite/gcc.target/i386/i386.exp.jj 2007-09-14 11:54:26.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/i386.exp 2007-12-29 21:42:08.000000000 +0100
@@ -68,13 +68,13 @@ proc check_effective_target_sse4a { } {
proc check_effective_target_sse5 { } {
return [check_no_compiler_messages sse5 object {
typedef long long __m128i __attribute__ ((__vector_size__ (16)));
- typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+ typedef short __v8hi __attribute__ ((__vector_size__ (16)));
__m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i) __builtin_ia32_pmacssww ((__v2di)__A,
- (__v2di)__B,
- (__v2di)__C);
+ return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,
+ (__v8hi)__B,
+ (__v8hi)__C);
}
} "-O2 -msse5" ]
}
Jakub