This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug c/68725] New: suboptimal handling of constant compound literals


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68725

            Bug ID: 68725
           Summary: suboptimal handling of constant compound literals
           Product: gcc
           Version: 5.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: rv at rasmusvillemoes dot dk
                CC: rostedt at goodmis dot org
  Target Milestone: ---

The motivation for this comes from the linux kernel's
include/trace/trace_events.h file (hence the cc Steven Rostedt), in
particular the __print_flags and __print_symbolic macros. They are
invoked multiple times with the same list of flag_array initializers,
which causes identical copies of the defined static array to be stored
in .rodata. The obvious solution, to define the array once in some .c
file and declare it extern, is rather inconvenient to try to retrofit
to the way the tracing subsystem works.

So I decided to try and see if using anonymous objects (compound
literals) would work. It turns out it did, and then it didn't. gcc is
smart enough to only emit a single copy to .rodata (at least within a
single translation unit, but that's good enough in this
case). However, at each use site, gcc decides to make a stack copy and
pass a pointer to that copy on to trace_print_flags_seq function. This
is very inefficient, completely redundant, and since some of the
arrays are over 1k in size, unacceptable in kernel code.

The problem can be seen in the example below. For smaller arrays, gcc
doesn't seem to put a copy in .rodata; it constructs the array on the
stack with a sequence of movq instructions, which is even more
inefficient (the .text to build the array takes more space than a copy
of the array in .rodata + a memcpy would) and equally wasteful. And it
gets even worse when one looks at the g functions, where the stack use
is doubled, and the copy/construction is done twice.

For const-qualified compound literals with compile-time constant
initializers, which are not explicitly used to initialize another
object, I don't see any reason to actually construct such an
object. [Maybe as an optimization if the object only takes up a few
words, but even that is questionable.] As soon as the size is greater
than, say, 32 bytes, I think it would much better to just refer to a
single copy in .rodata.

I've tried gcc 4.9, 5.1 and both -O2, -O3, and they all show the same
behaviour.

// gcc -std=gnu89 -O2 -o complit.o -c complit.c

#include <stddef.h>

struct flag_name { unsigned long mask; const char *name; };

#define FLAG_0 (1UL << 0)
#define FLAG_1 (1UL << 1)
#define FLAG_2 (1UL << 2)
#define FLAG_3 (1UL << 3)
#define FLAG_4 (1UL << 4)
#define FLAG_5 (1UL << 5)
#define FLAG_6 (1UL << 6)
#define FLAG_7 (1UL << 7)
#define FLAG_8 (1UL << 8)
#define FLAG_9 (1UL << 9)
#define FLAG_10 (1UL << 10)
#define FLAG_11 (1UL << 11)
#define FLAG_12 (1UL << 12)
#define FLAG_13 (1UL << 13)
#define FLAG_14 (1UL << 14)
#define FLAG_15 (1UL << 15)
#define FLAG_16 (1UL << 16)
#define FLAG_17 (1UL << 17)
#define FLAG_18 (1UL << 18)
#define FLAG_19 (1UL << 19)
#define FLAG_20 (1UL << 20)
#define FLAG_21 (1UL << 21)
#define FLAG_22 (1UL << 22)
#define FLAG_23 (1UL << 23)
#define FLAG_24 (1UL << 24)
#define FLAG_25 (1UL << 25)
#define FLAG_26 (1UL << 26)
#define FLAG_27 (1UL << 27)
#define FLAG_28 (1UL << 28)
#define FLAG_29 (1UL << 29)
#define FLAG_30 (1UL << 30)
#define FLAG_31 (1UL << 31)
#define FLAG_32 (1UL << 32)

#define flag_pair(f) {f, #f}
#define FLAG_NAMES         \
        flag_pair(FLAG_0), \
        flag_pair(FLAG_1), \
        flag_pair(FLAG_2), \
        flag_pair(FLAG_3), \
        flag_pair(FLAG_4), \
        flag_pair(FLAG_5), \
        flag_pair(FLAG_6), \
        flag_pair(FLAG_7), \
        flag_pair(FLAG_8), \
        flag_pair(FLAG_9), \
        flag_pair(FLAG_10), \
        flag_pair(FLAG_11), \
        flag_pair(FLAG_12), \
        flag_pair(FLAG_13), \
        flag_pair(FLAG_14), \
        flag_pair(FLAG_15), \
        flag_pair(FLAG_16), \
        flag_pair(FLAG_17), \
        flag_pair(FLAG_18), \
        flag_pair(FLAG_19), \
        flag_pair(FLAG_20), \
        flag_pair(FLAG_21), \
        flag_pair(FLAG_22), \
        flag_pair(FLAG_23), \
        flag_pair(FLAG_24), \
        flag_pair(FLAG_25), \
        flag_pair(FLAG_26), \
        flag_pair(FLAG_27), \
        flag_pair(FLAG_28), \
        flag_pair(FLAG_29), \
        flag_pair(FLAG_30), \
        flag_pair(FLAG_31), \
        flag_pair(FLAG_32)

#define FLAG_NAMES2        \
        flag_pair(FLAG_0), \
        flag_pair(FLAG_1), \
        flag_pair(FLAG_2), \
        flag_pair(FLAG_3), \
        flag_pair(FLAG_4)

void print_flags(const char *s, unsigned long flags, const struct flag_name
*names);

void f(unsigned long flags)
{
        print_flags("foo", flags, (const struct flag_name[]){ FLAG_NAMES,
{-1UL, NULL}});
}

void g(unsigned long flags)
{
        print_flags("bar", flags, (const struct flag_name[]){ FLAG_NAMES,
{-1UL, NULL}});
        flags &= 0x07;
        print_flags("baz", flags, (const struct flag_name[]){ FLAG_NAMES,
{-1UL, NULL}});
}

void f2(unsigned long flags)
{
        print_flags("foo", flags, (const struct flag_name[]){ FLAG_NAMES2,
{-1UL, NULL}});
}

void g2(unsigned long flags)
{
        print_flags("bar", flags, (const struct flag_name[]){ FLAG_NAMES2,
{-1UL, NULL}});
        flags &= 0x07;
        print_flags("baz", flags, (const struct flag_name[]){ FLAG_NAMES2,
{-1UL, NULL}});
}

objdump output:

complit.o:     file format elf64-x86-64


Disassembly of section .text:

0000000000000000 <f>:
   0:   48 81 ec 28 02 00 00                    sub    $0x228,%rsp
   7:   49 89 f8                                mov    %rdi,%r8
   a:   be 00 00 00 00                          mov    $0x0,%esi
                        b: R_X86_64_32  .rodata
   f:   48 89 e7                                mov    %rsp,%rdi
  12:   48 89 e2                                mov    %rsp,%rdx
  15:   b9 44 00 00 00                          mov    $0x44,%ecx
  1a:   f3 48 a5                                rep movsq %ds:(%rsi),%es:(%rdi)
  1d:   4c 89 c6                                mov    %r8,%rsi
  20:   bf 00 00 00 00                          mov    $0x0,%edi
                        21: R_X86_64_32 .rodata.str1.1
  25:   e8 00 00 00 00                          callq  2a <f+0x2a>
                        26: R_X86_64_PC32       print_flags-0x4
  2a:   48 81 c4 28 02 00 00                    add    $0x228,%rsp
  31:   c3                                      retq   
  32:   66 66 66 66 66 2e 0f 1f 84 00 00 00     data16 data16 data16 data16
nopw %cs:0x0(%rax,%rax,1)
  3e:   00 00 

0000000000000040 <g>:
  40:   53                                      push   %rbx
  41:   48 89 fb                                mov    %rdi,%rbx
  44:   be 00 00 00 00                          mov    $0x0,%esi
                        45: R_X86_64_32 .rodata
  49:   b9 44 00 00 00                          mov    $0x44,%ecx
  4e:   48 81 ec 40 04 00 00                    sub    $0x440,%rsp
  55:   48 89 e2                                mov    %rsp,%rdx
  58:   48 89 e7                                mov    %rsp,%rdi
  5b:   f3 48 a5                                rep movsq %ds:(%rsi),%es:(%rdi)
  5e:   48 89 de                                mov    %rbx,%rsi
  61:   bf 00 00 00 00                          mov    $0x0,%edi
                        62: R_X86_64_32 .rodata.str1.1+0x102
  66:   83 e3 07                                and    $0x7,%ebx
  69:   e8 00 00 00 00                          callq  6e <g+0x2e>
                        6a: R_X86_64_PC32       print_flags-0x4
  6e:   48 8d 94 24 20 02 00 00                 lea    0x220(%rsp),%rdx
  76:   48 8d bc 24 20 02 00 00                 lea    0x220(%rsp),%rdi
  7e:   be 00 00 00 00                          mov    $0x0,%esi
                        7f: R_X86_64_32 .rodata
  83:   b9 44 00 00 00                          mov    $0x44,%ecx
  88:   f3 48 a5                                rep movsq %ds:(%rsi),%es:(%rdi)
  8b:   48 89 de                                mov    %rbx,%rsi
  8e:   bf 00 00 00 00                          mov    $0x0,%edi
                        8f: R_X86_64_32 .rodata.str1.1+0x106
  93:   e8 00 00 00 00                          callq  98 <g+0x58>
                        94: R_X86_64_PC32       print_flags-0x4
  98:   48 81 c4 40 04 00 00                    add    $0x440,%rsp
  9f:   5b                                      pop    %rbx
  a0:   c3                                      retq   
  a1:   66 66 66 66 66 66 2e 0f 1f 84 00 00     data16 data16 data16 data16
data16 nopw %cs:0x0(%rax,%rax,1)
  ad:   00 00 00 

00000000000000b0 <f2>:
  b0:   48 83 ec 68                             sub    $0x68,%rsp
  b4:   48 89 fe                                mov    %rdi,%rsi
  b7:   bf 00 00 00 00                          mov    $0x0,%edi
                        b8: R_X86_64_32 .rodata.str1.1
  bc:   48 89 e2                                mov    %rsp,%rdx
  bf:   48 c7 04 24 01 00 00 00                 movq   $0x1,(%rsp)
  c7:   48 c7 44 24 08 00 00 00 00              movq   $0x0,0x8(%rsp)
                        cc: R_X86_64_32S        .rodata.str1.1+0x4
  d0:   48 c7 44 24 10 02 00 00 00              movq   $0x2,0x10(%rsp)
  d9:   48 c7 44 24 18 00 00 00 00              movq   $0x0,0x18(%rsp)
                        de: R_X86_64_32S        .rodata.str1.1+0xb
  e2:   48 c7 44 24 20 04 00 00 00              movq   $0x4,0x20(%rsp)
  eb:   48 c7 44 24 28 00 00 00 00              movq   $0x0,0x28(%rsp)
                        f0: R_X86_64_32S        .rodata.str1.1+0x12
  f4:   48 c7 44 24 30 08 00 00 00              movq   $0x8,0x30(%rsp)
  fd:   48 c7 44 24 38 00 00 00 00              movq   $0x0,0x38(%rsp)
                        102: R_X86_64_32S       .rodata.str1.1+0x19
 106:   48 c7 44 24 40 10 00 00 00              movq   $0x10,0x40(%rsp)
 10f:   48 c7 44 24 48 00 00 00 00              movq   $0x0,0x48(%rsp)
                        114: R_X86_64_32S       .rodata.str1.1+0x20
 118:   48 c7 44 24 50 ff ff ff ff              movq  
$0xffffffffffffffff,0x50(%rsp)
 121:   48 c7 44 24 58 00 00 00 00              movq   $0x0,0x58(%rsp)
 12a:   e8 00 00 00 00                          callq  12f <f2+0x7f>
                        12b: R_X86_64_PC32      print_flags-0x4
 12f:   48 83 c4 68                             add    $0x68,%rsp
 133:   c3                                      retq   
 134:   66 66 66 2e 0f 1f 84 00 00 00 00 00     data16 data16 nopw
%cs:0x0(%rax,%rax,1)

0000000000000140 <g2>:
 140:   55                                      push   %rbp
 141:   53                                      push   %rbx
 142:   48 c7 c5 ff ff ff ff                    mov    $0xffffffffffffffff,%rbp
 149:   48 89 fe                                mov    %rdi,%rsi
 14c:   48 89 fb                                mov    %rdi,%rbx
 14f:   bf 00 00 00 00                          mov    $0x0,%edi
                        150: R_X86_64_32        .rodata.str1.1+0x102
 154:   48 81 ec c8 00 00 00                    sub    $0xc8,%rsp
 15b:   83 e3 07                                and    $0x7,%ebx
 15e:   48 89 e2                                mov    %rsp,%rdx
 161:   48 89 6c 24 50                          mov    %rbp,0x50(%rsp)
 166:   48 c7 04 24 01 00 00 00                 movq   $0x1,(%rsp)
 16e:   48 c7 44 24 08 00 00 00 00              movq   $0x0,0x8(%rsp)
                        173: R_X86_64_32S       .rodata.str1.1+0x4
 177:   48 c7 44 24 10 02 00 00 00              movq   $0x2,0x10(%rsp)
 180:   48 c7 44 24 18 00 00 00 00              movq   $0x0,0x18(%rsp)
                        185: R_X86_64_32S       .rodata.str1.1+0xb
 189:   48 c7 44 24 20 04 00 00 00              movq   $0x4,0x20(%rsp)
 192:   48 c7 44 24 28 00 00 00 00              movq   $0x0,0x28(%rsp)
                        197: R_X86_64_32S       .rodata.str1.1+0x12
 19b:   48 c7 44 24 30 08 00 00 00              movq   $0x8,0x30(%rsp)
 1a4:   48 c7 44 24 38 00 00 00 00              movq   $0x0,0x38(%rsp)
                        1a9: R_X86_64_32S       .rodata.str1.1+0x19
 1ad:   48 c7 44 24 40 10 00 00 00              movq   $0x10,0x40(%rsp)
 1b6:   48 c7 44 24 48 00 00 00 00              movq   $0x0,0x48(%rsp)
                        1bb: R_X86_64_32S       .rodata.str1.1+0x20
 1bf:   48 c7 44 24 58 00 00 00 00              movq   $0x0,0x58(%rsp)
 1c8:   e8 00 00 00 00                          callq  1cd <g2+0x8d>
                        1c9: R_X86_64_PC32      print_flags-0x4
 1cd:   48 8d 54 24 60                          lea    0x60(%rsp),%rdx
 1d2:   48 89 de                                mov    %rbx,%rsi
 1d5:   bf 00 00 00 00                          mov    $0x0,%edi
                        1d6: R_X86_64_32        .rodata.str1.1+0x106
 1da:   48 89 ac 24 b0 00 00 00                 mov    %rbp,0xb0(%rsp)
 1e2:   48 c7 44 24 60 01 00 00 00              movq   $0x1,0x60(%rsp)
 1eb:   48 c7 44 24 68 00 00 00 00              movq   $0x0,0x68(%rsp)
                        1f0: R_X86_64_32S       .rodata.str1.1+0x4
 1f4:   48 c7 44 24 70 02 00 00 00              movq   $0x2,0x70(%rsp)
 1fd:   48 c7 44 24 78 00 00 00 00              movq   $0x0,0x78(%rsp)
                        202: R_X86_64_32S       .rodata.str1.1+0xb
 206:   48 c7 84 24 80 00 00 00 04 00 00 00     movq   $0x4,0x80(%rsp)
 212:   48 c7 84 24 88 00 00 00 00 00 00 00     movq   $0x0,0x88(%rsp)
                        21a: R_X86_64_32S       .rodata.str1.1+0x12
 21e:   48 c7 84 24 90 00 00 00 08 00 00 00     movq   $0x8,0x90(%rsp)
 22a:   48 c7 84 24 98 00 00 00 00 00 00 00     movq   $0x0,0x98(%rsp)
                        232: R_X86_64_32S       .rodata.str1.1+0x19
 236:   48 c7 84 24 a0 00 00 00 10 00 00 00     movq   $0x10,0xa0(%rsp)
 242:   48 c7 84 24 a8 00 00 00 00 00 00 00     movq   $0x0,0xa8(%rsp)
                        24a: R_X86_64_32S       .rodata.str1.1+0x20
 24e:   48 c7 84 24 b8 00 00 00 00 00 00 00     movq   $0x0,0xb8(%rsp)
 25a:   e8 00 00 00 00                          callq  25f <g2+0x11f>
                        25b: R_X86_64_PC32      print_flags-0x4
 25f:   48 81 c4 c8 00 00 00                    add    $0xc8,%rsp
 266:   5b                                      pop    %rbx
 267:   5d                                      pop    %rbp
 268:   c3                                      retq

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]