This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, i386]: Committed: Fix PR target/22152
Richard Guenther wrote:
I realize this may be hard, but with all the many tweaking patches for SSE, MMX,
etc. how do we make sure to not regress in cases we fixed earlier? So, may I
suggest you try to enter at least "something" into the testsuite? For example
scan-assembler-not ".L6.*ebp.*.L6" (no stack operations between the .L6 loop
entry and the backedge)? Maybe a little bit fragile, but at least
some confidence
would be there (and some testcases to eventually manually inspect) that
we won't regress again?
Heh, I _did_ say that "... The testcase will be committed in a separate
commit, as I have to clean it a bit."
Attached to this message, please find a couple of testcases, derived
from PR 22152:
- pr22152.c is a compile time test that checks if (long long) value
stays inside MMX register. Due to the cast to (long long) in the
mmintrin.h, the value was dragged to and from the memory (-O2 -m32 -msse2):
.L3:
movl (%ebx,%eax,8), %esi
movl 4(%ebx,%eax,8), %edi
movl %esi, -24(%ebp)
movl %edi, -20(%ebp)
movq -24(%ebp), %mm0
paddq (%ecx,%eax,8), %mm0
addl $1, %eax
cmpl %eax, %edx
movq %mm0, -24(%ebp)
movq -24(%ebp), %mm0
ja .L3
The situation is now much better:
.L3:
movq (%ebx,%eax,8), %mm0
paddq (%ecx,%eax,8), %mm0
addl $1, %eax
cmpl %eax, %edx
ja .L3
- sse2-mmx.c
This is a runtime test, based on the large testcase from the PR. The
test should add two huge numbers together using MMX stuff, but
unfortunately, it doesn't work correctly (carry propagation logic is
fatally flawed). Attached test fixes this logic, so it can be used to
increase the runtime coverage of SSE2 based MMX operations.
FWIW, the loop from the testcase is now:
.L3:
movq (%esi,%eax,8), %mm2 #* a, _a.37
movq (%ebx,%eax,8), %mm3 #* b, _b
movq %mm2, %mm0 # D.2452, tmp94
paddq %mm3, %mm0 # D.2451, tmp94
movq %mm2, %mm1 # _a.37, D.2452
movq %mm3, %mm4 # _b, D.2451
paddq %mm5, %mm0 # carry, tmp94
psrlq $1, %mm1 #, D.2452
movq %mm0, (%ecx,%eax,8) # tmp94,* result
movq %mm2, %mm0 # _a.37, tmp96
pxor %mm3, %mm0 # _b, tmp96
pand %mm5, %mm0 # carry, tmp96
pand %mm3, %mm2 # _b, _a.37
por %mm0, %mm2 # tmp96, _a.37
psrlq $1, %mm4 #, D.2451
pand %mm6, %mm2 # one.38, _a.37
paddq %mm4, %mm1 # D.2451, D.2452
paddq %mm2, %mm1 # _a.37, D.2452
addl $1, %eax #, i
psrlq $63, %mm1 #, D.2452
cmpl %eax, %edx # i, count
movq %mm1, %mm5 # D.2452, carry
ja .L3 #,
.L2:
Other than that, previous changes to MMX patterns are covered by
pr22076.c, pr34256.c. In addition, all vecinit-N.c tests check that no
MMX register is used in vector initialization code (we had some problems
with this in the past).
2008-03-08 Uros Bizjak <ubizjak@gmail.com>
PR target/22152
* gcc.target/i386/pr22152.c: New test.
* gcc.target/i386/sse2-mmx.c: Ditto.
These new tests were checked on x86_64-linux-gnu {,-m32} and are
committed to mainline.
Uros.
Index: gcc.target/i386/sse2-mmx.c
===================================================================
--- gcc.target/i386/sse2-mmx.c (revision 0)
+++ gcc.target/i386/sse2-mmx.c (revision 0)
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "sse2-check.h"
+
+#include <mmintrin.h>
+
+#define N 4
+
+unsigned long long a[N], b[N], result[N];
+
+unsigned long long check[N] =
+ { 0x101010101010100full,
+ 0x1010101010101010ull,
+ 0x1010101010101010ull,
+ 0x1010101010101010ull };
+
+__m64
+unsigned_add3 (const __m64 * a, const __m64 * b,
+ __m64 * result, unsigned int count)
+{
+ __m64 _a, _b, one, sum, carry, onesCarry;
+
+ unsigned int i;
+
+ one = _mm_cmpeq_pi8 (_a, _a);
+ one = _mm_sub_si64 (_mm_xor_si64 (one, one), one);
+
+ carry = _mm_xor_si64 (one, one);
+
+ for (i = 0; i < count; i++)
+ {
+ _a = a[i];
+ _b = b[i];
+
+ sum = _mm_add_si64 (_a, _b);
+ sum = _mm_add_si64 (sum, carry);
+
+ result[i] = sum;
+
+ onesCarry = _mm_and_si64 (_mm_xor_si64 (_a, _b), carry);
+ onesCarry = _mm_or_si64 (_mm_and_si64 (_a, _b), onesCarry);
+ onesCarry = _mm_and_si64 (onesCarry, one);
+
+ _a = _mm_srli_si64 (_a, 1);
+ _b = _mm_srli_si64 (_b, 1);
+
+ carry = _mm_add_si64 (_mm_add_si64 (_a, _b), onesCarry);
+ carry = _mm_srli_si64 (carry, 63);
+ }
+
+ _mm_empty ();
+ return carry;
+}
+
+void __attribute__((noinline))
+sse2_test (void)
+{
+ unsigned long long carry;
+ int i;
+
+ /* Really long numbers. */
+ a[3] = a[2] = a[1] = a[0] = 0xd3d3d3d3d3d3d3d3ull;
+ b[3] = b[2] = b[1] = b[0] = 0x3c3c3c3c3c3c3c3cull;
+
+ carry = (unsigned long long) unsigned_add3
+ ((__m64 *)a, (__m64 *)b, (__m64 *)result, N);
+
+ if (carry != 1)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ if (result [i] != check[i])
+ abort ();
+}
Index: gcc.target/i386/pr22152.c
===================================================================
--- gcc.target/i386/pr22152.c (revision 0)
+++ gcc.target/i386/pr22152.c (revision 0)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+#include <mmintrin.h>
+
+__m64
+unsigned_add3 (const __m64 * a, const __m64 * b, unsigned long count)
+{
+ __m64 sum;
+ unsigned int i;
+
+ for (i = 1; i < count; i++)
+ sum = _mm_add_si64 (a[i], b[i]);
+
+ return sum;
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+.*%mm" 1 } } */