This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug tree-optimization/15419] memcpy pessimization


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15419

Steven Bosscher <steven at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Last reconfirmed|2009-04-22 23:01:37         |2011-05-22 17:12:37

--- Comment #3 from Steven Bosscher <steven at gcc dot gnu.org> 2011-05-22 15:13:52 UTC ---
$ cat t.c
typedef long size_t;

extern void *memcpy (void *__restrict __dest,
             __const void *__restrict __src, size_t __n)
  __attribute__ ((__nothrow__)) __attribute__ ((__nonnull__ (1, 2)));


typedef unsigned uint32_t;

uint32_t get_uint32(const void *p) {
    uint32_t w;
    memcpy(&w, p, sizeof (uint32_t));
    return w;
}

uint32_t get_uint32a(const void *p) {
    const struct { uint32_t w; } __attribute__((packed)) *wp = p;
    return wp->w;
}

$ ./cc1 -quiet -m32 -O2 t.c -fdump-tree-optimized
$ cat t.s
    .file    "t.c"
    .text
    .p2align 4,,15
    .globl    get_uint32
    .type    get_uint32, @function
get_uint32:
.LFB0:
    .cfi_startproc
    subl    $16, %esp
    .cfi_def_cfa_offset 20
    movl    20(%esp), %eax
    movl    (%eax), %eax
    addl    $16, %esp
    .cfi_def_cfa_offset 4
    ret
    .cfi_endproc
.LFE0:
    .size    get_uint32, .-get_uint32
    .p2align 4,,15
    .globl    get_uint32a
    .type    get_uint32a, @function
get_uint32a:
.LFB1:
    .cfi_startproc
    movl    4(%esp), %eax
    movl    (%eax), %eax
    ret
    .cfi_endproc
.LFE1:
    .size    get_uint32a, .-get_uint32a
    .ident    "GCC: (GNU) 4.6.0 20110312 (experimental) [trunk revision
170907]"
    .section    .note.GNU-stack,"",@progbits
$ cat t.c.143t.optimized 

;; Function get_uint32 (get_uint32)

get_uint32 (const void * p)
{
  uint32_t w;
  uint32_t D.1997;

<bb 2>:
  memcpy (&w, p_2(D), 4);
  D.1997_3 = w;
  return D.1997_3;

}



;; Function get_uint32a (get_uint32a)

get_uint32a (const void * p)
{
  uint32_t D.1994;

<bb 2>:
  D.1994_3 = MEM[(const struct  *)p_1(D)].w;
  return D.1994_3;

}


Isn't it possible to fold memcpy for selected small lengths to explicit memory
stores?

The code for x86_64 is identical for both functions.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]