This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]; Fix PR target/37191


Hello!

The comment above inline_secondary_memory_needed() says:

The macro can't work reliably when one of the CLASSES is class containing
registers from multiple units (SSE, MMX, integer). We avoid this by never
combining those units in single alternative in the machine description.
Ensure that this constraint holds to avoid unexpected surprises.


Attached patch splits alternatives in several insn patterns to avoid "unexpected surprises".

2008-08-22 Uros Bizjak <ubizjak@gmail.com>

   PR target/37191
   * config/i386/mmx.md (*vec_extractv2sf_0): Avoid combining registers
   from different units in a single alternative.
   (*vec_extractv2sf_1): Ditto.
   (*vec_extractv2si_0): Ditto.
   (*vec_extractv2si_1): Ditto.
   * config/i386/sse.md (sse2_storehpd): Ditto.
   (sse2_storelpd): Ditto.
   (sse2_loadhpd): Ditto.
   (sse2_loadlpd): Ditto.

testsuite/ChangeLog:

2008-08-22 Uros Bizjak <ubizjak@gmail.com>

   PR target/37191
   * gcc.target/i386/pr37191.c: New test.

The patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32} on 4.3 branch and mainline SVN. Tha patch was committed to mainline, although the bug is latent there. The patch will be backported to 4.3 branch when the branch opens.

Uros.

Index: testsuite/gcc.target/i386/pr37191.c
===================================================================
--- testsuite/gcc.target/i386/pr37191.c	(revision 0)
+++ testsuite/gcc.target/i386/pr37191.c	(revision 0)
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -mmmx" } */
+
+#include <mmintrin.h>
+#include <stddef.h>
+#include <stdint.h>
+
+extern const uint64_t ff_bone;
+
+static inline void transpose4x4(uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride) {
+    __m64 row0 = _mm_cvtsi32_si64(*(unsigned*)(src + (0 * src_stride)));
+    __m64 row1 = _mm_cvtsi32_si64(*(unsigned*)(src + (1 * src_stride)));
+    __m64 row2 = _mm_cvtsi32_si64(*(unsigned*)(src + (2 * src_stride)));
+    __m64 row3 = _mm_cvtsi32_si64(*(unsigned*)(src + (3 * src_stride)));
+    __m64 tmp0 = _mm_unpacklo_pi8(row0, row1);
+    __m64 tmp1 = _mm_unpacklo_pi8(row2, row3);
+    __m64 row01 = _mm_unpacklo_pi16(tmp0, tmp1);
+    __m64 row23 = _mm_unpackhi_pi16(tmp0, tmp1);
+    *((unsigned*)(dst + (0 * dst_stride))) = _mm_cvtsi64_si32(row01);
+    *((unsigned*)(dst + (1 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row01, row01));
+    *((unsigned*)(dst + (2 * dst_stride))) = _mm_cvtsi64_si32(row23);
+    *((unsigned*)(dst + (3 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row23, row23));
+}
+
+static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1)
+{
+    asm volatile(
+        ""
+        :: "r"(pix-2*stride), "r"(pix), "r"((long)stride),
+           "m"(alpha1), "m"(beta1), "m"(ff_bone)
+    );
+}
+
+void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
+{
+
+    uint8_t trans[8*4] __attribute__ ((aligned (8)));
+    transpose4x4(trans, pix-2, 8, stride);
+    transpose4x4(trans+4, pix-2+4*stride, 8, stride);
+    h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
+    transpose4x4(pix-2, trans, stride, 8);
+    transpose4x4(pix-2+4*stride, trans+4, stride, 8);
+}
Index: config/i386/mmx.md
===================================================================
--- config/i386/mmx.md	(revision 139433)
+++ config/i386/mmx.md	(working copy)
@@ -535,10 +535,12 @@
   DONE;
 })
 
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn_and_split "*vec_extractv2sf_0"
-  [(set (match_operand:SF 0 "nonimmediate_operand"     "=x,y,m,m,frxy")
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=x, m,y ,m,f,r")
 	(vec_select:SF
-	  (match_operand:V2SF 1 "nonimmediate_operand" " x,y,x,y,m")
+	  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
 	  (parallel [(const_int 0)])))]
   "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
@@ -554,18 +556,23 @@
   DONE;
 })
 
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "*vec_extractv2sf_1"
-  [(set (match_operand:SF 0 "nonimmediate_operand"     "=y,x,frxy")
+  [(set (match_operand:SF 0 "nonimmediate_operand"     "=y,x,y,x,f,r")
 	(vec_select:SF
-	  (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o")
+	  (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o")
 	  (parallel [(const_int 1)])))]
   "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    punpckhdq\t%0, %0
    unpckhps\t%0, %0
+   #
+   #
+   #
    #"
-  [(set_attr "type" "mmxcvt,sselog1,*")
-   (set_attr "mode" "DI,V4SF,SI")])
+  [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov")
+   (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")])
 
 (define_split
   [(set (match_operand:SF 0 "register_operand" "")
@@ -1214,10 +1221,12 @@
   DONE;
 })
 
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn_and_split "*vec_extractv2si_0"
-  [(set (match_operand:SI 0 "nonimmediate_operand"     "=x,y,m,m,frxy")
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=x,m,y, m,r")
 	(vec_select:SI
-	  (match_operand:V2SI 1 "nonimmediate_operand" " x,y,x,y,m")
+	  (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
 	  (parallel [(const_int 0)])))]
   "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
@@ -1233,10 +1242,12 @@
   DONE;
 })
 
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "*vec_extractv2si_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand"     "=y,Y2,Y2,x,frxy")
+  [(set (match_operand:SI 0 "nonimmediate_operand"     "=y,Y2,Y2,x,y,x,r")
 	(vec_select:SI
-	  (match_operand:V2SI 1 "nonimmediate_operand" " 0,0 ,Y2,0,o")
+	  (match_operand:V2SI 1 "nonimmediate_operand" " 0,0 ,Y2,0,o,o,o")
 	  (parallel [(const_int 1)])))]
   "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -1244,9 +1255,11 @@
    punpckhdq\t%0, %0
    pshufd\t{$85, %1, %0|%0, %1, 85}
    unpckhps\t%0, %0
+   #
+   #
    #"
-  [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,*")
-   (set_attr "mode" "DI,TI,TI,V4SF,SI")])
+  [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov")
+   (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand" "")
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 139433)
+++ config/i386/sse.md	(working copy)
@@ -2404,9 +2404,9 @@
 })
 
 (define_insn_and_split "*vec_extractv4sf_0"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
 	(vec_select:SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
+	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
 	  (parallel [(const_int 0)])))]
   "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
@@ -2659,18 +2659,22 @@
   [(set_attr "type" "sselog")
    (set_attr "mode" "V2DF")])
 
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_storehpd"
-  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
 	(vec_select:DF
-	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
+	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
 	  (parallel [(const_int 1)])))]
   "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    movhpd\t{%1, %0|%0, %1}
    unpckhpd\t%0, %0
+   #
+   #
    #"
-  [(set_attr "type" "ssemov,sselog1,ssemov")
-   (set_attr "mode" "V1DF,V2DF,DF")])
+  [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
+   (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
 
 (define_split
   [(set (match_operand:DF 0 "register_operand" "")
@@ -2683,18 +2687,22 @@
   operands[1] = adjust_address (operands[1], DFmode, 8);
 })
 
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_storelpd"
-  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
 	(vec_select:DF
-	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
+	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
 	  (parallel [(const_int 0)])))]
   "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    movlpd\t{%1, %0|%0, %1}
    #
+   #
+   #
    #"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V1DF,DF,DF")])
+  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
+   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
 
 (define_split
   [(set (match_operand:DF 0 "register_operand" "")
@@ -2723,21 +2731,25 @@
   "TARGET_SSE2"
   "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
 
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_loadhpd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o,o,o")
 	(vec_concat:V2DF
 	  (vec_select:DF
-	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
 	    (parallel [(const_int 0)]))
-	  (match_operand:DF 2 "nonimmediate_operand"     " m,x,0,x*fr")))]
+	  (match_operand:DF 2 "nonimmediate_operand"     " m,x,0,x,*f,r")))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    movhpd\t{%2, %0|%0, %2}
    unpcklpd\t{%2, %0|%0, %2}
    shufpd\t{$1, %1, %0|%0, %1, 1}
+   #
+   #
    #"
-  [(set_attr "type" "ssemov,sselog,sselog,other")
-   (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
+  [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
+   (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
 
 (define_split
   [(set (match_operand:V2DF 0 "memory_operand" "")
@@ -2760,12 +2772,14 @@
   "TARGET_SSE2"
   "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
 
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_loadlpd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m")
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m,m,m")
 	(vec_concat:V2DF
-	  (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x*fr")
+	  (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x,*f,r")
 	  (vec_select:DF
-	    (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
+	    (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
 	    (parallel [(const_int 1)]))))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
@@ -2774,9 +2788,11 @@
    movsd\t{%2, %0|%0, %2}
    shufpd\t{$2, %2, %0|%0, %2, 2}
    movhpd\t{%H1, %0|%0, %H1}
+   #
+   #
    #"
-  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
-   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
+  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
+   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
 
 (define_split
   [(set (match_operand:V2DF 0 "memory_operand" "")

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]