This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[Bug rtl-optimization/56511] New: memcpy misses chance to use AVX instructions

From: "jyasskin at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
To: gcc-bugs at gcc dot gnu dot org
Date: Sun, 03 Mar 2013 06:14:02 +0000
Subject: [Bug rtl-optimization/56511] New: memcpy misses chance to use AVX instructions
Auto-submitted: auto-generated

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56511

             Bug #: 56511
           Summary: memcpy misses chance to use AVX instructions
    Classification: Unclassified
           Product: gcc
           Version: 4.7.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: jyasskin@gcc.gnu.org


When operating on sufficiently aligned storage, memcpy should be able to use
vector instructions.

$ cat test.c
#include <string.h>

typedef float vec __attribute__((vector_size(32)));
typedef struct S {
  vec v;
  char __attribute__((aligned(__alignof__(vec)))) c[sizeof(vec)];
} S;
void assign_vec(S* s, const vec* v) { s->v = *v; }
void memcpy_vec(S* s, const vec* v) { memcpy(&s->v, v, sizeof(vec)); }
void memcpy_char(S* s, const vec* v) { memcpy(s->c, v, sizeof(vec)); }

$ gcc -mavx -S test.c -O2  -Wall -o - 
        .file   "test.c"
        .text
        .p2align 4,,15
        .globl  assign_vec
        .type   assign_vec, @function
assign_vec:
.LFB12:
        .cfi_startproc
        vmovaps (%rsi), %ymm0
        vmovaps %ymm0, (%rdi)
        vzeroupper
        ret
        .cfi_endproc
.LFE12:
        .size   assign_vec, .-assign_vec
        .p2align 4,,15
        .globl  memcpy_vec
        .type   memcpy_vec, @function
memcpy_vec:
.LFB13:
        .cfi_startproc
        movq    (%rsi), %rax
        movq    %rax, (%rdi)
        movq    8(%rsi), %rax
        movq    %rax, 8(%rdi)
        movq    16(%rsi), %rax
        movq    %rax, 16(%rdi)
        movq    24(%rsi), %rax
        movq    %rax, 24(%rdi)
        ret
        .cfi_endproc
.LFE13:
        .size   memcpy_vec, .-memcpy_vec
        .p2align 4,,15
        .globl  memcpy_char
        .type   memcpy_char, @function
memcpy_char:
.LFB14:
        .cfi_startproc
        movq    (%rsi), %rdx
        movq    %rdx, 32(%rdi)
        movq    8(%rsi), %rdx
        movq    %rdx, 40(%rdi)
        movq    16(%rsi), %rdx
        movq    %rdx, 48(%rdi)
        movq    24(%rsi), %rdx
        movq    %rdx, 56(%rdi)
        ret
        .cfi_endproc
.LFE14:
        .size   memcpy_char, .-memcpy_char


I don't have a gcc-4.8 around to test with, but I believe it's also missing
this optimization.

Follow-Ups:
- [Bug rtl-optimization/56511] memcpy misses chance to use AVX instructions
  - From: jyasskin at gcc dot gnu.org
- [Bug target/56511] memcpy misses chance to use AVX instructions
  - From: pinskia at gcc dot gnu.org
- [Bug target/56511] memcpy misses chance to use AVX instructions
  - From: izamyatin at gmail dot com
- [Bug target/56511] memcpy misses chance to use AVX instructions
  - From: izamyatin at gmail dot com

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]