[patch, ARM] Fix PR target/48252

Ira Rosen IRAR@il.ibm.com
Sun May 1 07:30:00 GMT 2011



Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> wrote on 07/04/2011
03:16:44 PM:

>
> On 07/04/11 08:42, Ira Rosen wrote:
> > Hi,
> >
> > This patch makes both outputs of neon_vzip/vuzp/vtrn_internal
> > explicitly dependent on both inputs, preventing incorrect
> > optimization:
> > for
> > (a,b)<- vzip (c,d)
> > and
> > (e,f)<- vzip (g,d)
> > CSE decides that b==f, since b and f depend only on d.
> >
> > Tested on arm-linux-gnueabi. OK for trunk?
>
> This is OK for trunk.
>
> > OK for 4.6 after testing?
>
> I have no objections to this going into 4.5 and 4.6 since it corrects
> the implementation of the neon intrinsics but please check with the
> release managers.

OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi?

Thanks,
Ira

4.5 and 4.6 ChangeLog:

	Backport from mainline:
	2011-04-18  Ulrich Weigand  <ulrich.weigand@linaro.org>
                  Ira Rosen  <ira.rosen@linaro.org>

	PR target/48252
	* config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
	to match neon_vzip/vuzp/vtrn_internal.
	* config/arm/neon.md (neon_vtrn<mode>_internal): Make both
	outputs explicitly dependent on both inputs.
	(neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.

testsuite/Changelog:

	Backport from mainline:
	2011-04-18  Ulrich Weigand  <ulrich.weigand@linaro.org>
                  Ira Rosen  <ira.rosen@linaro.org>

	PR target/48252
	* gcc.target/arm/pr48252.c: New test.


4.5 patch:

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 172714)
+++ config/arm/arm.c    (working copy)
@@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
   rtx tmp1 = gen_reg_rtx (mode);
   rtx tmp2 = gen_reg_rtx (mode);

-  emit_insn (intfn (tmp1, op1, tmp2, op2));
+  emit_insn (intfn (tmp1, op1, op2, tmp2));

   emit_move_insn (mem, tmp1);
   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md  (revision 172714)
+++ config/arm/neon.md  (working copy)
@@ -3895,13 +3895,14 @@

 (define_insn "neon_vtrn<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VTRN2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VTRN1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VTRN2))]
   "TARGET_NEON"
-  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
@@ -3921,13 +3922,14 @@

 (define_insn "neon_vzip<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VZIP2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VZIP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VZIP2))]
   "TARGET_NEON"
-  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
@@ -3947,13 +3949,14 @@

 (define_insn "neon_vuzp<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
                      UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VUZP2))]
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VUZP2))]
   "TARGET_NEON"
-  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
Index: testsuite/gcc.target/arm/pr48252.c
===================================================================
--- testsuite/gcc.target/arm/pr48252.c  (revision 0)
+++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main(void)
+{
+    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
+    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
+    uint8x8x2_t vd1, vd2;
+    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
+    int i;
+
+    vd1 = vzip_u8(v1, vdup_n_u8(0));
+    vd2 = vzip_u8(v2, vdup_n_u8(0));
+
+    vst1_u8(d1.buf, vd1.val[0]);
+    vst1_u8(d2.buf, vd1.val[1]);
+    vst1_u8(d3.buf, vd2.val[0]);
+    vst1_u8(d4.buf, vd2.val[1]);
+
+    for (i = 0; i < 8; i++)
+      if ((i % 2 == 0 && d4.buf[i] != 2)
+          || (i % 2 == 1 && d4.buf[i] != 0))
+         abort ();
+
+    return 0;
+}
+


4.6 patch:

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 172810)
+++ config/arm/arm.c    (working copy)
@@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode
   rtx tmp1 = gen_reg_rtx (mode);
   rtx tmp2 = gen_reg_rtx (mode);

-  emit_insn (intfn (tmp1, op1, tmp2, op2));
+  emit_insn (intfn (tmp1, op1, op2, tmp2));

   emit_move_insn (mem, tmp1);
   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md  (revision 172810)
+++ config/arm/neon.md  (working copy)
@@ -4079,13 +4079,14 @@

 (define_insn "neon_vtrn<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VTRN2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VTRN1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+         (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VTRN2))]
   "TARGET_NEON"
-  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
@@ -4105,13 +4106,14 @@

 (define_insn "neon_vzip<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
-                    UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VZIP2))]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
+                     UNSPEC_VZIP1))
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VZIP2))]
   "TARGET_NEON"
-  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
@@ -4131,13 +4133,14 @@

 (define_insn "neon_vuzp<mode>_internal"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
-       (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")]
+        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
+                      (match_operand:VDQW 2 "s_register_operand" "w")]
                      UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=w")
-        (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")]
-                    UNSPEC_VUZP2))]
+   (set (match_operand:VDQW 3 "s_register_operand" "=2")
+        (unspec:VDQW [(match_dup 1) (match_dup 2)]
+                     UNSPEC_VUZP2))]
   "TARGET_NEON"
-  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
+  "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
   [(set (attr "neon_type")
       (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
                     (const_string "neon_bp_simple")
Index: testsuite/gcc.target/arm/pr48252.c
===================================================================
--- testsuite/gcc.target/arm/pr48252.c  (revision 0)
+++ testsuite/gcc.target/arm/pr48252.c  (revision 0)
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_neon } */
+
+#include "arm_neon.h"
+#include <stdlib.h>
+
+int main(void)
+{
+    uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1};
+    uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2};
+    uint8x8x2_t vd1, vd2;
+    union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4;
+    int i;
+
+    vd1 = vzip_u8(v1, vdup_n_u8(0));
+    vd2 = vzip_u8(v2, vdup_n_u8(0));
+
+    vst1_u8(d1.buf, vd1.val[0]);
+    vst1_u8(d2.buf, vd1.val[1]);
+    vst1_u8(d3.buf, vd2.val[0]);
+    vst1_u8(d4.buf, vd2.val[1]);
+
+    for (i = 0; i < 8; i++)
+      if ((i % 2 == 0 && d4.buf[i] != 2)
+          || (i % 2 == 1 && d4.buf[i] != 0))
+         abort ();
+
+    return 0;
+}
+


>
> cheers
> Ramana
>
> >
> > Thanks,
> > Ira
> >
> > ChangeLog:
> >
> > 2011-04-07  Ulrich Weigand<ulrich.weigand@linaro.org>
> >                    Ira Rosen<ira.rosen@linaro.org>
> >
> >       PR target/48252
> >       * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments
> >       to match neon_vzip/vuzp/vtrn_internal.
> >       * config/arm/neon.md (neon_vtrn<mode>_internal): Make both
> >       outputs explicitly dependent on both inputs.
> >       (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise.
> >
> > testsuite/Changelog:
> >
> >       PR target/48252
> >       * gcc.target/arm/pr48252.c: New test.
>



More information about the Gcc-patches mailing list