This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Add leafify function attribute
- From: Richard Guenther <rguenth at tat dot physik dot uni-tuebingen dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 4 May 2004 19:28:56 +0200 (CEST)
- Subject: [PATCH] Add leafify function attribute
This patch adds the __attribute__((leafify)) function attribute that was
(in conjunction with inlining in general) much discussed last year. For
expression template code like POOMA this gains about 30% performance
improvements on ia32 for my applications, for ia64 the margin is more
like 50%. Compile time impact on leafified functions is non-neglible
(goes up to 150% for my applications) and is attributed to loop
optimization passes.
Bootstrapped and regtested (including the two new tests) on
i686-pc-linux-gnu for c and c++.
The patch (or previous forms of it) also is in use in my local trees
since nearly two years. tree-ssa currently miscompiles my performance
testcase with leafify enabled, but 3.5 is fine.
Ok? (I don't have CVS write access)
Richard.
2004-02-05 Richard Guenther <richard.guenther@uni-tuebingen.de>
* c-common.c (handle_leafify_attribute): New.
(struct c_common_attributes): Add leafify.
cgraphunit.c (cgraph_find_cycles): New.
(cgraph_leafify_node): New.
(cgraph_decide_inlining): Use them to handle leafify
attribute.
extend.texi: Document leafify function attribute.
testsuite/gcc.dg/attr-leafify-1.c: New test.
testsuite/gcc.dg/attr-leafify-2.c: New test.
? gcc/testsuite/gcc.dg/attr-leafify-1.c
? gcc/testsuite/gcc.dg/attr-leafify-2.c
Index: gcc/c-common.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-common.c,v
retrieving revision 1.496
diff -u -c -3 -p -r1.496 c-common.c
*** gcc/c-common.c 23 Apr 2004 22:50:07 -0000 1.496
--- gcc/c-common.c 2 May 2004 19:53:41 -0000
*************** static tree handle_noreturn_attribute (t
*** 740,745 ****
--- 740,746 ----
static tree handle_noinline_attribute (tree *, tree, tree, int, bool *);
static tree handle_always_inline_attribute (tree *, tree, tree, int,
bool *);
+ static tree handle_leafify_attribute (tree *, tree, tree, int, bool *);
static tree handle_used_attribute (tree *, tree, tree, int, bool *);
static tree handle_unused_attribute (tree *, tree, tree, int, bool *);
static tree handle_const_attribute (tree *, tree, tree, int, bool *);
*************** const struct attribute_spec c_common_att
*** 801,806 ****
--- 802,809 ----
handle_noinline_attribute },
{ "always_inline", 0, 0, true, false, false,
handle_always_inline_attribute },
+ { "leafify", 0, 0, true, false, false,
+ handle_leafify_attribute },
{ "used", 0, 0, true, false, false,
handle_used_attribute },
{ "unused", 0, 0, false, false, false,
*************** handle_always_inline_attribute (tree *no
*** 4373,4378 ****
--- 4376,4404 ----
return NULL_TREE;
}
+
+ /* Handle a "leafify" attribute; arguments as in
+ struct attribute_spec.handler. */
+
+ static tree
+ handle_leafify_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+ {
+ if (TREE_CODE (*node) == FUNCTION_DECL)
+ {
+ /* Do nothing else, just set the attribute. We'll get at
+ it later with lookup_attribute. */
+ }
+ else
+ {
+ warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+ }
+
/* Handle a "used" attribute; arguments as in
struct attribute_spec.handler. */
Index: gcc/cgraphunit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraphunit.c,v
retrieving revision 1.58
diff -u -c -3 -p -r1.58 cgraphunit.c
*** gcc/cgraphunit.c 30 Apr 2004 16:27:16 -0000 1.58
--- gcc/cgraphunit.c 2 May 2004 19:53:44 -0000
*************** cgraph_decide_inlining_of_small_function
*** 1347,1352 ****
--- 1347,1413 ----
free (heap_node);
}
+ /* Find callgraph nodes closing a circle in the graph. The
+ resulting hashtab can be used to avoid walking the circles.
+ Uses the cgraph nodes ->aux field which needs to be zero
+ before and will be zero after operation. */
+
+ static void
+ cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
+ {
+ struct cgraph_edge *e;
+
+ if (node->aux)
+ {
+ void **slot;
+ slot = htab_find_slot (cycles, node, INSERT);
+ if (!*slot)
+ {
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, "Cycle contains %s\n", cgraph_node_name (node));
+ *slot = node;
+ }
+ return;
+ }
+
+ node->aux = node;
+ for (e = node->callees; e; e = e->next_callee)
+ {
+ cgraph_find_cycles (e->callee, cycles);
+ }
+ node->aux = 0;
+ }
+
+ /* Leafify the cgraph node. We have to be careful in recursing
+ as to not run endlessly in circles of the callgraph.
+ We do so by using a hashtab of cycle entering nodes as generated
+ by cgraph_find_cycles. */
+
+ static void
+ cgraph_leafify_node (struct cgraph_node *node, htab_t cycles)
+ {
+ struct cgraph_edge *e;
+
+ for (e = node->callees; e; e = e->next_callee)
+ {
+ /* Inline call, if possible, and recurse. Be sure we are not
+ entering callgraph circles here. */
+ if (e->inline_failed
+ && e->callee->local.inlinable
+ && !cgraph_recursive_inlining_p (node, e->callee,
+ &e->inline_failed)
+ && !htab_find (cycles, e->callee))
+ {
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, " inlining %s", cgraph_node_name (e->callee));
+ cgraph_mark_inline_edge (e);
+ cgraph_leafify_node (e->callee, cycles);
+ }
+ else if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, " !inlining %s", cgraph_node_name (e->callee));
+ }
+ }
+
/* Decide on the inlining. We do so in the topological order to avoid
expenses on updating data structures. */
*************** cgraph_decide_inlining (void)
*** 1384,1389 ****
--- 1445,1468 ----
struct cgraph_edge *e;
node = order[i];
+
+ /* Handle nodes to be leafified, but don't update overall unit size. */
+ if (lookup_attribute ("leafify", DECL_ATTRIBUTES (node->decl)) != NULL)
+ {
+ int old_overall_insns = overall_insns;
+ htab_t cycles;
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file,
+ "Leafifying %s\n", cgraph_node_name (node));
+ cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
+ cgraph_find_cycles (node, cycles);
+ cgraph_leafify_node (node, cycles);
+ htab_delete (cycles);
+ overall_insns = old_overall_insns;
+ /* We don't need to consider always_inline functions inside the leafified
+ function anymore. */
+ continue;
+ }
for (e = node->callees; e; e = e->next_callee)
if (e->callee->local.disregard_inline_limits)
Index: gcc/doc/extend.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/extend.texi,v
retrieving revision 1.192
diff -u -c -3 -p -r1.192 extend.texi
*** gcc/doc/extend.texi 18 Apr 2004 22:01:15 -0000 1.192
--- gcc/doc/extend.texi 2 May 2004 19:54:14 -0000
*************** The keyword @code{__attribute__} allows
*** 1893,1899 ****
attributes when making a declaration. This keyword is followed by an
attribute specification inside double parentheses. The following
attributes are currently defined for functions on all targets:
! @code{noreturn}, @code{noinline}, @code{always_inline},
@code{pure}, @code{const}, @code{nothrow},
@code{format}, @code{format_arg}, @code{no_instrument_function},
@code{section}, @code{constructor}, @code{destructor}, @code{used},
--- 1893,1899 ----
attributes when making a declaration. This keyword is followed by an
attribute specification inside double parentheses. The following
attributes are currently defined for functions on all targets:
! @code{noreturn}, @code{noinline}, @code{always_inline}, @code{leafify},
@code{pure}, @code{const}, @code{nothrow},
@code{format}, @code{format_arg}, @code{no_instrument_function},
@code{section}, @code{constructor}, @code{destructor}, @code{used},
*************** inlining.
*** 1969,1974 ****
--- 1969,1982 ----
Generally, functions are not inlined unless optimization is specified.
For functions declared inline, this attribute inlines the function even
if no optimization level was specified.
+
+ @cindex @code{leafify} function attribute
+ @item leafify
+ Generally, inlining into a function is limited. For a function marked with
+ this attribute, every call inside this function will be inlined, if possible.
+ Whether the function itself is considered for inlining depends on its size and
+ the current inlining parameters. The @code{leafify} attribute only works
+ reliably in unit-at-a-time mode.
@cindex @code{pure} function attribute
@item pure
/* Test leafify attribute, which forces inlining of all functions
down the callgraph in unit-at-a-time mode. */
/* Origin: Richard Guenther <richard.guenther@uni-tuebingen.de>. */
/* { dg-do compile } */
/* { dg-options "-funit-at-a-time -finline" } */
static int foobar(int i);
static int bar(int i);
static int foobar(int i)
{
return i-1;
}
static int bar(int i)
{
return i + foobar(i);
}
int __attribute__((leafify)) leaf0a(int i)
{
return bar(i);
}
int __attribute__((leafify)) leaf0b(int i)
{
return foobar(i);
}
int __attribute__((leafify)) leaf1(int i)
{
return bar(foobar(i));
}
int __attribute__((leafify)) leaf2(int i)
{
int j;
j = foobar(i);
return bar(j);
}
int __attribute__((leafify)) leaf3(int i)
{
int j;
j = bar(i);
j += bar(i);
return j;
}
/* { dg-final { scan-assembler-not "foobar" } } */
/* { dg-final { scan-assembler-not "bar" } } */
/* Test leafify attribute corner cases where we need to avoid inlining. */
/* Origin: Richard Guenther <richard.guenther@uni-tuebingen.de>. */
/* { dg-do compile } */
/* { dg-options "-funit-at-a-time -finline" } */
void __attribute__((leafify)) direct(void)
{
direct();
}
void __attribute__((leafify)) indirect(void);
static void indirect1(void)
{
indirect();
}
void __attribute__((leafify)) indirect(void)
{
indirect1();
}
void __attribute__((leafify)) doubleindirect(void);
static void doubleindirect2(void)
{
doubleindirect();
}
static void doubleindirect1(void)
{
doubleindirect2();
}
void __attribute__((leafify)) doubleindirect(void)
{
doubleindirect1();
}
static void subcycle1(void);
static void subcycle2(void)
{
subcycle1();
}
static void subcycle1(void)
{
subcycle2();
}
void __attribute__((leafify)) subcycle(void)
{
subcycle1();
}
static void doublesubcycle1(void);
static void doublesubcycle2(void);
static void doublesubcycle3(void)
{
doublesubcycle1();
}
static void doublesubcycle2(void)
{
doublesubcycle3();
}
static void doublesubcycle1(void)
{
doublesubcycle2();
}
void __attribute__((leafify)) doublesubcycle(void)
{
doublesubcycle1();
}