This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug c/42367] New: long vector operation causes gcc to copy arguments


Why GCC copies vectorized buffers to and from stack ?
Am I doing something wrong ?

===
Compiler:
===
gcc -v
Using built-in specs.
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man
--infodir=/usr/share/info --enable-shared --enable-threads=posix
--enable-checking=release --with-system-zlib --enable-__cxa_atexit
--disable-libunwind-exceptions --enable-libgcj-multifile
--enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk
--disable-dssi --enable-plugin
--with-java-home=/usr/lib/jvm/java-1.4.2-gcj-1.4.2.0/jre --with-cpu=generic
--host=x86_64-redhat-linux
Thread model: posix
gcc version 4.1.2 20080704 (Red Hat 4.1.2-46)



===
Source a.c:
===
typedef int BLOCK512 __attribute__((__vector_size__(512)));

void f (BLOCK512 *d, const BLOCK512 *s0, const BLOCK512 *s1) {
    *d = *s0 ^ *s1;
}



===
Command:
===
gcc -O3 a.c -c -o a.o


===
Result (note 3 calls to memcpy):
===
Disassembly of section .text:

0000000000000000 <f>:
   0:   41 54                   push   %r12
   2:   49 89 fc                mov    %rdi,%r12
   5:   53                      push   %rbx
   6:   48 89 d3                mov    %rdx,%rbx
   9:   ba 00 02 00 00          mov    $0x200,%edx
   e:   48 81 ec 08 06 00 00    sub    $0x608,%rsp
  15:   48 8d bc 24 00 02 00    lea    0x200(%rsp),%rdi
  1c:   00 
  1d:   e8 00 00 00 00          callq  22 <f+0x22>
                        1e: R_X86_64_PC32       memcpy+0xfffffffffffffffc
  22:   48 8d bc 24 00 04 00    lea    0x400(%rsp),%rdi
  29:   00 
  2a:   48 89 de                mov    %rbx,%rsi
  2d:   ba 00 02 00 00          mov    $0x200,%edx
  32:   e8 00 00 00 00          callq  37 <f+0x37>
                        33: R_X86_64_PC32       memcpy+0xfffffffffffffffc
  37:   66 0f 6f 84 24 00 04    movdqa 0x400(%rsp),%xmm0
  3e:   00 00 
  40:   48 89 e6                mov    %rsp,%rsi
  43:   4c 89 e7                mov    %r12,%rdi
  46:   ba 00 02 00 00          mov    $0x200,%edx
  4b:   66 0f ef 84 24 00 02    pxor   0x200(%rsp),%xmm0
  52:   00 00 
  54:   66 0f 7f 04 24          movdqa %xmm0,(%rsp)
  59:   66 0f 6f 84 24 10 04    movdqa 0x410(%rsp),%xmm0
  60:   00 00 
  62:   66 0f ef 84 24 10 02    pxor   0x210(%rsp),%xmm0
  69:   00 00 
  6b:   66 0f 7f 44 24 10       movdqa %xmm0,0x10(%rsp)
  71:   66 0f 6f 84 24 20 04    movdqa 0x420(%rsp),%xmm0
  78:   00 00 
  7a:   66 0f ef 84 24 20 02    pxor   0x220(%rsp),%xmm0
  81:   00 00 
  83:   66 0f 7f 44 24 20       movdqa %xmm0,0x20(%rsp)
  89:   66 0f 6f 84 24 30 04    movdqa 0x430(%rsp),%xmm0
  90:   00 00 
  92:   66 0f ef 84 24 30 02    pxor   0x230(%rsp),%xmm0
  99:   00 00 
  9b:   66 0f 7f 44 24 30       movdqa %xmm0,0x30(%rsp)
  a1:   66 0f 6f 84 24 40 04    movdqa 0x440(%rsp),%xmm0
  a8:   00 00 
  aa:   66 0f ef 84 24 40 02    pxor   0x240(%rsp),%xmm0
  b1:   00 00 
  b3:   66 0f 7f 44 24 40       movdqa %xmm0,0x40(%rsp)
  b9:   66 0f 6f 84 24 50 04    movdqa 0x450(%rsp),%xmm0
  c0:   00 00 
  c2:   66 0f ef 84 24 50 02    pxor   0x250(%rsp),%xmm0
  c9:   00 00 
  cb:   66 0f 7f 44 24 50       movdqa %xmm0,0x50(%rsp)
  d1:   66 0f 6f 84 24 60 04    movdqa 0x460(%rsp),%xmm0
  d8:   00 00 
  da:   66 0f ef 84 24 60 02    pxor   0x260(%rsp),%xmm0
  e1:   00 00 
  e3:   66 0f 7f 44 24 60       movdqa %xmm0,0x60(%rsp)
  e9:   66 0f 6f 84 24 70 04    movdqa 0x470(%rsp),%xmm0
  f0:   00 00 
  f2:   66 0f ef 84 24 70 02    pxor   0x270(%rsp),%xmm0
  f9:   00 00 
  fb:   66 0f 7f 44 24 70       movdqa %xmm0,0x70(%rsp)
 101:   66 0f 6f 84 24 80 04    movdqa 0x480(%rsp),%xmm0
 108:   00 00 
 10a:   66 0f ef 84 24 80 02    pxor   0x280(%rsp),%xmm0
 111:   00 00 
 113:   66 0f 7f 84 24 80 00    movdqa %xmm0,0x80(%rsp)
 11a:   00 00 
 11c:   66 0f 6f 84 24 90 04    movdqa 0x490(%rsp),%xmm0
 123:   00 00 
 125:   66 0f ef 84 24 90 02    pxor   0x290(%rsp),%xmm0
 12c:   00 00 
 12e:   66 0f 7f 84 24 90 00    movdqa %xmm0,0x90(%rsp)
 135:   00 00 
 137:   66 0f 6f 84 24 a0 04    movdqa 0x4a0(%rsp),%xmm0
 13e:   00 00 
 140:   66 0f ef 84 24 a0 02    pxor   0x2a0(%rsp),%xmm0
 147:   00 00 
 149:   66 0f 7f 84 24 a0 00    movdqa %xmm0,0xa0(%rsp)
 150:   00 00 
 152:   66 0f 6f 84 24 b0 04    movdqa 0x4b0(%rsp),%xmm0
 159:   00 00 
 15b:   66 0f ef 84 24 b0 02    pxor   0x2b0(%rsp),%xmm0
 162:   00 00 
 164:   66 0f 7f 84 24 b0 00    movdqa %xmm0,0xb0(%rsp)
 16b:   00 00 
 16d:   66 0f 6f 84 24 c0 04    movdqa 0x4c0(%rsp),%xmm0
 174:   00 00 
 176:   66 0f ef 84 24 c0 02    pxor   0x2c0(%rsp),%xmm0
 17d:   00 00 
 17f:   66 0f 7f 84 24 c0 00    movdqa %xmm0,0xc0(%rsp)
 186:   00 00 
 188:   66 0f 6f 84 24 d0 04    movdqa 0x4d0(%rsp),%xmm0
 18f:   00 00 
 191:   66 0f ef 84 24 d0 02    pxor   0x2d0(%rsp),%xmm0
 198:   00 00 
 19a:   66 0f 7f 84 24 d0 00    movdqa %xmm0,0xd0(%rsp)
 1a1:   00 00 
 1a3:   66 0f 6f 84 24 e0 04    movdqa 0x4e0(%rsp),%xmm0
 1aa:   00 00 
 1ac:   66 0f ef 84 24 e0 02    pxor   0x2e0(%rsp),%xmm0
 1b3:   00 00 
 1b5:   66 0f 7f 84 24 e0 00    movdqa %xmm0,0xe0(%rsp)
 1bc:   00 00 
 1be:   66 0f 6f 84 24 f0 04    movdqa 0x4f0(%rsp),%xmm0
 1c5:   00 00 
 1c7:   66 0f ef 84 24 f0 02    pxor   0x2f0(%rsp),%xmm0
 1ce:   00 00 
 1d0:   66 0f 7f 84 24 f0 00    movdqa %xmm0,0xf0(%rsp)
 1d7:   00 00 
 1d9:   66 0f 6f 84 24 00 05    movdqa 0x500(%rsp),%xmm0
 1e0:   00 00 
 1e2:   66 0f ef 84 24 00 03    pxor   0x300(%rsp),%xmm0
 1e9:   00 00 
 1eb:   66 0f 7f 84 24 00 01    movdqa %xmm0,0x100(%rsp)
 1f2:   00 00 
 1f4:   66 0f 6f 84 24 10 05    movdqa 0x510(%rsp),%xmm0
 1fb:   00 00 
 1fd:   66 0f ef 84 24 10 03    pxor   0x310(%rsp),%xmm0
 204:   00 00 
 206:   66 0f 7f 84 24 10 01    movdqa %xmm0,0x110(%rsp)
 20d:   00 00 
 20f:   66 0f 6f 84 24 20 05    movdqa 0x520(%rsp),%xmm0
 216:   00 00 
 218:   66 0f ef 84 24 20 03    pxor   0x320(%rsp),%xmm0
 21f:   00 00 
 221:   66 0f 7f 84 24 20 01    movdqa %xmm0,0x120(%rsp)
 228:   00 00 
 22a:   66 0f 6f 84 24 30 05    movdqa 0x530(%rsp),%xmm0
 231:   00 00 
 233:   66 0f ef 84 24 30 03    pxor   0x330(%rsp),%xmm0
 23a:   00 00 
 23c:   66 0f 7f 84 24 30 01    movdqa %xmm0,0x130(%rsp)
 243:   00 00 
 245:   66 0f 6f 84 24 40 05    movdqa 0x540(%rsp),%xmm0
 24c:   00 00 
 24e:   66 0f ef 84 24 40 03    pxor   0x340(%rsp),%xmm0
 255:   00 00 
 257:   66 0f 7f 84 24 40 01    movdqa %xmm0,0x140(%rsp)
 25e:   00 00 
 260:   66 0f 6f 84 24 50 05    movdqa 0x550(%rsp),%xmm0
 267:   00 00 
 269:   66 0f ef 84 24 50 03    pxor   0x350(%rsp),%xmm0
 270:   00 00 
 272:   66 0f 7f 84 24 50 01    movdqa %xmm0,0x150(%rsp)
 279:   00 00 
 27b:   66 0f 6f 84 24 60 05    movdqa 0x560(%rsp),%xmm0
 282:   00 00 
 284:   66 0f ef 84 24 60 03    pxor   0x360(%rsp),%xmm0
 28b:   00 00 
 28d:   66 0f 7f 84 24 60 01    movdqa %xmm0,0x160(%rsp)
 294:   00 00 
 296:   66 0f 6f 84 24 70 05    movdqa 0x570(%rsp),%xmm0
 29d:   00 00 
 29f:   66 0f ef 84 24 70 03    pxor   0x370(%rsp),%xmm0
 2a6:   00 00 
 2a8:   66 0f 7f 84 24 70 01    movdqa %xmm0,0x170(%rsp)
 2af:   00 00 
 2b1:   66 0f 6f 84 24 80 05    movdqa 0x580(%rsp),%xmm0
 2b8:   00 00 
 2ba:   66 0f ef 84 24 80 03    pxor   0x380(%rsp),%xmm0
 2c1:   00 00 
 2c3:   66 0f 7f 84 24 80 01    movdqa %xmm0,0x180(%rsp)
 2ca:   00 00 
 2cc:   66 0f 6f 84 24 90 05    movdqa 0x590(%rsp),%xmm0
 2d3:   00 00 
 2d5:   66 0f ef 84 24 90 03    pxor   0x390(%rsp),%xmm0
 2dc:   00 00 
 2de:   66 0f 7f 84 24 90 01    movdqa %xmm0,0x190(%rsp)
 2e5:   00 00 
 2e7:   66 0f 6f 84 24 a0 05    movdqa 0x5a0(%rsp),%xmm0
 2ee:   00 00 
 2f0:   66 0f ef 84 24 a0 03    pxor   0x3a0(%rsp),%xmm0
 2f7:   00 00 
 2f9:   66 0f 7f 84 24 a0 01    movdqa %xmm0,0x1a0(%rsp)
 300:   00 00 
 302:   66 0f 6f 84 24 b0 05    movdqa 0x5b0(%rsp),%xmm0
 309:   00 00 
 30b:   66 0f ef 84 24 b0 03    pxor   0x3b0(%rsp),%xmm0
 312:   00 00 
 314:   66 0f 7f 84 24 b0 01    movdqa %xmm0,0x1b0(%rsp)
 31b:   00 00 
 31d:   66 0f 6f 84 24 c0 05    movdqa 0x5c0(%rsp),%xmm0
 324:   00 00 
 326:   66 0f ef 84 24 c0 03    pxor   0x3c0(%rsp),%xmm0
 32d:   00 00 
 32f:   66 0f 7f 84 24 c0 01    movdqa %xmm0,0x1c0(%rsp)
 336:   00 00 
 338:   66 0f 6f 84 24 d0 05    movdqa 0x5d0(%rsp),%xmm0
 33f:   00 00 
 341:   66 0f ef 84 24 d0 03    pxor   0x3d0(%rsp),%xmm0
 348:   00 00 
 34a:   66 0f 7f 84 24 d0 01    movdqa %xmm0,0x1d0(%rsp)
 351:   00 00 
 353:   66 0f 6f 84 24 e0 05    movdqa 0x5e0(%rsp),%xmm0
 35a:   00 00 
 35c:   66 0f ef 84 24 e0 03    pxor   0x3e0(%rsp),%xmm0
 363:   00 00 
 365:   66 0f 7f 84 24 e0 01    movdqa %xmm0,0x1e0(%rsp)
 36c:   00 00 
 36e:   66 0f 6f 84 24 f0 05    movdqa 0x5f0(%rsp),%xmm0
 375:   00 00 
 377:   66 0f ef 84 24 f0 03    pxor   0x3f0(%rsp),%xmm0
 37e:   00 00 
 380:   66 0f 7f 84 24 f0 01    movdqa %xmm0,0x1f0(%rsp)
 387:   00 00 
 389:   e8 00 00 00 00          callq  38e <f+0x38e>
                        38a: R_X86_64_PC32      memcpy+0xfffffffffffffffc
 38e:   48 81 c4 08 06 00 00    add    $0x608,%rsp
 395:   5b                      pop    %rbx
 396:   41 5c                   pop    %r12
 398:   c3


-- 
           Summary: long vector operation causes gcc to copy arguments
           Product: gcc
           Version: 4.1.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: Shvaiger_Felix at emc dot com
 GCC build triplet: x86_64-redhat-linux
  GCC host triplet: x86_64-redhat-linux
GCC target triplet: x86_64-redhat-linux


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42367


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]