This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Fix PR target/30970: xmm register zeroing by pxor N,N should be moved out of loop


Hello!

Currently, following testcase:

--cut here--
#define N 256
int b[N];

void test()
{ int i;


 for (i = 0; i < N; i++)
   b[i] = 0;
}
--cut here--

compiles to (-O2 -msse2 -ftree-vectorize):

test:
       movl    $16, %eax
       pxor    %xmm0, %xmm0
       movdqa  %xmm0, b
       .p2align 4,,7
.L2:
       pxor    %xmm0, %xmm0
       movdqa  %xmm0, b(%eax)
       addl    $16, %eax
       cmpl    $1024, %eax
       jne     .L2
       rep ; ret

Please note the pxor inside the loop.

Attached patch fixes this by disabling expansion of move patterns for invalid operand combinations (i.e. move of vector 0 into memory) to enable various subsequent (g)cse optimizations. Resulting ASM is then:

test:
       movl    $16, %eax
       pxor    %xmm0, %xmm0
       movdqa  %xmm0, b
       .p2align 4,,7
.L2:
       movdqa  %xmm0, b(%eax)
       addl    $16, %eax
       cmpl    $1024, %eax
       jne     .L2
       rep ; ret

There are some problems with subreg lowering patch. For some reason, this pass splits pxor into two 64bit zero loads. Resulting code with patched gcc on x86_64 target is:

test:
.LFB2:
       xorl    %eax, %eax
       xorl    %edx, %edx
       movl    $16, %ecx
       movq    %rax, -24(%rsp)
       movq    %rdx, -16(%rsp)
       movdqa  -24(%rsp), %xmm0
       movdqa  %xmm0, b(%rip)
       .p2align 4,,7
.L2:
       movq    %rax, -40(%rsp)
       movq    %rdx, -32(%rsp)
       movdqa  -40(%rsp), %xmm0
       movdqa  %xmm0, b(%rcx)
       addq    $16, %rcx
       cmpq    $1024, %rcx
       jne     .L2
       rep ; ret

However, using -fno-split-wide-types, gcc generates expected code:

test:
.LFB2:
       pxor    %xmm0, %xmm0
       movl    $16, %eax
       movdqa  %xmm0, b(%rip)
       .p2align 4,,7
.L2:
       movdqa  %xmm0, b(%rax)
       addq    $16, %rax
       cmpq    $1024, %rax
       jne     .L2
       rep ; ret

2007-02-26 Uros Bizjak <ubizjak@gmail.com>

   PR target/30970
   * config/i386/sse.md (*mov<mode>_internal, *movv4sf_internal,
   *movv2df_internal): Enable pattern only for valid operand
   combinations.

testsuite/ChangeLog:

2007-02-26 Uros Bizjak <ubizjak@gmail.com>

   PR target/30970
   * gcc.target/i386/gfortran.dg/pr30970.c: New test.

Patch is bootstrapped on i686-pc-linux-gnu and x86_64-pc-linux-gnu, regression tested for c,c++ and gfortran. As this patch exposes (although harmless) regression in subreg lowering, I'd like to ask Ian, if this patch should be committed to SVN anyway.

Uros.
Index: testsuite/gcc.target/i386/pr30970.c
===================================================================
--- testsuite/gcc.target/i386/pr30970.c	(revision 0)
+++ testsuite/gcc.target/i386/pr30970.c	(revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } }
+/* { dg-options "-msse2 -O2 -ftree-vectorize" } */
+
+#define N 256
+int b[N];
+
+void test()
+{  
+  int i;
+
+  for (i = 0; i < N; i++)
+    b[i] = 0;
+}
+
+/* { dg-final { scan-assembler-times "pxor" 1 } } */
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 122336)
+++ config/i386/sse.md	(working copy)
@@ -60,7 +60,9 @@
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
 	(match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
-  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_SSE
+   && !(MEM_P (operands[0])
+	&& (MEM_P (operands[1]) || standard_sse_constant_p (operands[1]) > 0))"
 {
   switch (which_alternative)
     {
@@ -140,7 +142,9 @@
 (define_insn "*movv4sf_internal"
   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
 	(match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
-  "TARGET_SSE"
+  "TARGET_SSE
+   && !(MEM_P (operands[0])
+	&& (MEM_P (operands[1]) || standard_sse_constant_p (operands[1]) > 0))"
 {
   switch (which_alternative)
     {
@@ -182,7 +186,9 @@
 (define_insn "*movv2df_internal"
   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
 	(match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
-  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_SSE
+   && !(MEM_P (operands[0])
+	&& (MEM_P (operands[1]) || standard_sse_constant_p (operands[1]) > 0))"
 {
   switch (which_alternative)
     {

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]