This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [PATCH, i386]: Committed: Fix PR target/22152

From: Uros Bizjak <ubizjak at gmail dot com>
To: Richard Guenther <richard dot guenther at gmail dot com>
Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
Date: Sat, 08 Mar 2008 13:43:15 +0100
Subject: Re: [PATCH, i386]: Committed: Fix PR target/22152
References: <47D185B3.80600@gmail.com> <84fc9c000803080221y4df4dfdco7dc299726126290d@mail.gmail.com>

Richard Guenther wrote:

I realize this may be hard, but with all the many tweaking patches for SSE, MMX, etc. how do we make sure to not regress in cases we fixed earlier? So, may I suggest you try to enter at least "something" into the testsuite? For example scan-assembler-not ".L6.*ebp.*.L6" (no stack operations between the .L6 loop entry and the backedge)? Maybe a little bit fragile, but at least some confidence would be there (and some testcases to eventually manually inspect) that we won't regress again?

Heh, I _did_ say that "... The testcase will be committed in a separate commit, as I have to clean it a bit."

Attached to this message, please find a couple of testcases, derived from PR 22152:

- pr22152.c is a compile time test that checks if (long long) value stays inside MMX register. Due to the cast to (long long) in the mmintrin.h, the value was dragged to and from the memory (-O2 -m32 -msse2):

.L3:
       movl    (%ebx,%eax,8), %esi
       movl    4(%ebx,%eax,8), %edi
       movl    %esi, -24(%ebp)
       movl    %edi, -20(%ebp)
       movq    -24(%ebp), %mm0
       paddq   (%ecx,%eax,8), %mm0
       addl    $1, %eax
       cmpl    %eax, %edx
       movq    %mm0, -24(%ebp)
       movq    -24(%ebp), %mm0
       ja      .L3

The situation is now much better:

.L3:
       movq    (%ebx,%eax,8), %mm0
       paddq   (%ecx,%eax,8), %mm0
       addl    $1, %eax
       cmpl    %eax, %edx
       ja      .L3

- sse2-mmx.c This is a runtime test, based on the large testcase from the PR. The test should add two huge numbers together using MMX stuff, but unfortunately, it doesn't work correctly (carry propagation logic is fatally flawed). Attached test fixes this logic, so it can be used to increase the runtime coverage of SSE2 based MMX operations.

FWIW, the loop from the testcase is now:

.L3:
       movq    (%esi,%eax,8), %mm2     #* a, _a.37
       movq    (%ebx,%eax,8), %mm3     #* b, _b
       movq    %mm2, %mm0      # D.2452, tmp94
       paddq   %mm3, %mm0      # D.2451, tmp94
       movq    %mm2, %mm1      # _a.37, D.2452
       movq    %mm3, %mm4      # _b, D.2451
       paddq   %mm5, %mm0      # carry, tmp94
       psrlq   $1, %mm1        #, D.2452
       movq    %mm0, (%ecx,%eax,8)     # tmp94,* result
       movq    %mm2, %mm0      # _a.37, tmp96
       pxor    %mm3, %mm0      # _b, tmp96
       pand    %mm5, %mm0      # carry, tmp96
       pand    %mm3, %mm2      # _b, _a.37
       por     %mm0, %mm2      # tmp96, _a.37
       psrlq   $1, %mm4        #, D.2451
       pand    %mm6, %mm2      # one.38, _a.37
       paddq   %mm4, %mm1      # D.2451, D.2452
       paddq   %mm2, %mm1      # _a.37, D.2452
       addl    $1, %eax        #, i
       psrlq   $63, %mm1       #, D.2452
       cmpl    %eax, %edx      # i, count
       movq    %mm1, %mm5      # D.2452, carry
       ja      .L3     #,
.L2:

Other than that, previous changes to MMX patterns are covered by pr22076.c, pr34256.c. In addition, all vecinit-N.c tests check that no MMX register is used in vector initialization code (we had some problems with this in the past).

2008-03-08 Uros Bizjak <ubizjak@gmail.com>

       PR target/22152
       * gcc.target/i386/pr22152.c: New test.
       * gcc.target/i386/sse2-mmx.c: Ditto.

These new tests were checked on x86_64-linux-gnu {,-m32} and are committed to mainline.

Uros.

Index: gcc.target/i386/sse2-mmx.c
===================================================================
--- gcc.target/i386/sse2-mmx.c	(revision 0)
+++ gcc.target/i386/sse2-mmx.c	(revision 0)
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+#include <mmintrin.h>
+
+#define N 4
+
+unsigned long long a[N], b[N], result[N];
+
+unsigned long long check[N] =
+  { 0x101010101010100full,
+    0x1010101010101010ull,
+    0x1010101010101010ull,
+    0x1010101010101010ull };
+
+__m64
+unsigned_add3 (const __m64 * a, const __m64 * b,
+	       __m64 * result, unsigned int count)
+{
+  __m64 _a, _b, one, sum, carry, onesCarry;
+
+  unsigned int i;
+
+  one = _mm_cmpeq_pi8 (_a, _a);
+  one = _mm_sub_si64 (_mm_xor_si64 (one, one), one);
+
+  carry = _mm_xor_si64 (one, one);
+
+  for (i = 0; i < count; i++)
+    {
+      _a = a[i];
+      _b = b[i];
+
+      sum = _mm_add_si64 (_a, _b);
+      sum = _mm_add_si64 (sum, carry);
+
+      result[i] = sum;
+
+      onesCarry = _mm_and_si64 (_mm_xor_si64 (_a, _b), carry);
+      onesCarry = _mm_or_si64 (_mm_and_si64 (_a, _b), onesCarry);
+      onesCarry = _mm_and_si64 (onesCarry, one);
+
+      _a = _mm_srli_si64 (_a, 1);
+      _b = _mm_srli_si64 (_b, 1);
+
+      carry = _mm_add_si64 (_mm_add_si64 (_a, _b), onesCarry);
+      carry = _mm_srli_si64 (carry, 63);
+    }
+
+  _mm_empty ();
+  return carry;
+}
+
+void __attribute__((noinline))
+sse2_test (void)
+{
+  unsigned long long carry;
+  int i;
+
+  /* Really long numbers.  */
+  a[3] = a[2] = a[1] = a[0] = 0xd3d3d3d3d3d3d3d3ull;
+  b[3] = b[2] = b[1] = b[0] = 0x3c3c3c3c3c3c3c3cull;
+
+  carry = (unsigned long long) unsigned_add3
+    ((__m64 *)a, (__m64 *)b, (__m64 *)result, N);
+
+  if (carry != 1)
+    abort ();
+
+  for (i = 0; i < N; i++)
+    if (result [i] != check[i])
+      abort ();
+}
Index: gcc.target/i386/pr22152.c
===================================================================
--- gcc.target/i386/pr22152.c	(revision 0)
+++ gcc.target/i386/pr22152.c	(revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+#include <mmintrin.h>
+
+__m64
+unsigned_add3 (const __m64 * a, const __m64 * b, unsigned long count)
+{
+  __m64 sum;
+  unsigned int i;
+
+  for (i = 1; i < count; i++)
+    sum = _mm_add_si64 (a[i], b[i]);
+
+  return sum;
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+.*%mm" 1 } } */

References:
- [PATCH, i386]: Committed: Fix PR target/22152
  - From: Uros Bizjak
- Re: [PATCH, i386]: Committed: Fix PR target/22152
  - From: Richard Guenther

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]