This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[tree-ssa][PATCH] leafify function attribute
- From: Richard Guenther <rguenth at tat dot physik dot uni-tuebingen dot de>
- To: Jan Hubicka <jh at suse dot cz>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Thu, 5 Feb 2004 11:57:47 +0100 (CET)
- Subject: [tree-ssa][PATCH] leafify function attribute
- References: <Pine.LNX.4.53.0402041455000.9162@bellatrix.tat.physik.uni-tuebingen.de><Pine.LNX.4.53.0402041626570.9162@bellatrix.tat.physik.uni-tuebingen.de><Pine.LNX.4.53.0402041652560.9162@bellatrix.tat.physik.uni-tuebingen.de><20040204162847.GK21148@kam.mff.cuni.cz><Pine.LNX.4.53.0402041800310.9162@bellatrix.tat.physik.uni-tuebingen.de><20040204170714.GM21148@kam.mff.cuni.cz> <Pine.LNX.4.58.0402042330530.1555@goofy><20040204233811.GF21096@kam.mff.cuni.cz>
On Thu, 5 Feb 2004, Jan Hubicka wrote:
> On Wed, 4 Feb 2004, Richard Guenther wrote:
>
> > Yes, you are right. Working patch is included below for reference.
> >
> > Actually avoiding cycles in the callgraph while still inlining as leafify
> > is supposed to do was harder than I thought. The solution was to do a
> > walk over the sub-graph to be leafified to search for cycle entering nodes
> > and store these into a hash table. In the leafifying walk we can then
> > easily avoid recursing into the cycles.
>
> This sounds acceptable. In earlier incarnations of the code I used to
> have cycle discovery in cgraph_recursive_inlining_p predicate but it
> turned out to be bottleneck for very large units and inline depths.
> Simply breaking cycles this way will work.
>
> As far as cgraph code is concerned, you patch looks right to me (modulo
> the return in cgraph verifier :). You may want to add to add changelogs
> and send it to the lists. I can approve (and will :) the cgraph portion
> of code, but can not decide about the attribute itself.
This patch brings the leafify function attribute to tree-ssa using the
improved cgraph infrastructure. It's probably not the right time to merge
this (it was never...), but here it is. It'll be sitting in my tree now.
Maybe we can reach consensus on adding this (and maybe some other)
inlining hints to the compiler? I can think of at least a #pragma inline
being useful to teach the compiler to inline a particular call.
The patch survived bootstrapping on i686-pc-linux-gnu and the resulting
compiler was used successfully on a real-world application and some
testcases for corner-cases (appended after the patch).
Thanks,
Richard.
2004-02-05 Richard Guenther <richard.guenther@uni-tuebingen.de>
* c-common.c (handle_leafify_attribute): New.
(struct c_common_attributes): Add leafify.
cgraphunit.c (cgraph_find_cycles): New.
(cgraph_leafify_node): New.
(cgraph_decide_inlining): Use them to handle leafify
attribute.
extend.texi: Document leafify function attribute.
Index: gcc/c-common.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-common.c,v
retrieving revision 1.344.2.59
diff -u -c -3 -p -r1.344.2.59 c-common.c
*** gcc/c-common.c 30 Jan 2004 13:13:29 -0000 1.344.2.59
--- gcc/c-common.c 5 Feb 2004 09:42:20 -0000
*************** static tree handle_noreturn_attribute (t
*** 763,768 ****
--- 763,769 ----
static tree handle_noinline_attribute (tree *, tree, tree, int, bool *);
static tree handle_always_inline_attribute (tree *, tree, tree, int,
bool *);
+ static tree handle_leafify_attribute (tree *, tree, tree, int, bool *);
static tree handle_used_attribute (tree *, tree, tree, int, bool *);
static tree handle_unused_attribute (tree *, tree, tree, int, bool *);
static tree handle_const_attribute (tree *, tree, tree, int, bool *);
*************** const struct attribute_spec c_common_att
*** 825,830 ****
--- 826,833 ----
handle_noinline_attribute },
{ "always_inline", 0, 0, true, false, false,
handle_always_inline_attribute },
+ { "leafify", 0, 0, true, false, false,
+ handle_leafify_attribute },
{ "used", 0, 0, true, false, false,
handle_used_attribute },
{ "unused", 0, 0, false, false, false,
*************** handle_always_inline_attribute (tree *no
*** 4513,4518 ****
--- 4516,4544 ----
return NULL_TREE;
}
+
+ /* Handle a "leafify" attribute; arguments as in
+ struct attribute_spec.handler. */
+
+ static tree
+ handle_leafify_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+ {
+ if (TREE_CODE (*node) == FUNCTION_DECL)
+ {
+ /* Do nothing else, just set the attribute. We'll get at
+ it later with lookup_attribute. */
+ }
+ else
+ {
+ warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+ }
+
/* Handle a "used" attribute; arguments as in
struct attribute_spec.handler. */
Index: gcc/cgraphunit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraphunit.c,v
retrieving revision 1.1.4.33
diff -u -c -3 -p -r1.1.4.33 cgraphunit.c
*** gcc/cgraphunit.c 1 Feb 2004 12:42:11 -0000 1.1.4.33
--- gcc/cgraphunit.c 5 Feb 2004 09:42:20 -0000
*************** cgraph_decide_inlining_of_small_function
*** 1280,1285 ****
--- 1280,1346 ----
free (heap_node);
}
+ /* Find callgraph nodes closing a circle in the graph. The
+ resulting hashtab can be used to avoid walking the circles.
+ Uses the cgraph nodes ->aux field which needs to be zero
+ before and will be zero after operation. */
+
+ static void
+ cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
+ {
+ struct cgraph_edge *e;
+
+ if (node->aux)
+ {
+ void **slot;
+ slot = htab_find_slot (cycles, node, INSERT);
+ if (!*slot)
+ {
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, "Cycle contains %s\n", cgraph_node_name (node));
+ *slot = node;
+ }
+ return;
+ }
+
+ node->aux = node;
+ for (e = node->callees; e; e = e->next_callee)
+ {
+ cgraph_find_cycles (e->callee, cycles);
+ }
+ node->aux = 0;
+ }
+
+ /* Leafify the cgraph node. We have to be careful in recursing
+ as to not run endlessly in circles of the callgraph.
+ We do so by using a hashtab of cycle entering nodes as generated
+ by cgraph_find_cycles. */
+
+ static void
+ cgraph_leafify_node (struct cgraph_node *node, htab_t cycles)
+ {
+ struct cgraph_edge *e;
+
+ for (e = node->callees; e; e = e->next_callee)
+ {
+ /* Inline call, if possible, and recurse. Be sure we are not
+ entering callgraph circles here. */
+ if (e->inline_failed
+ && e->callee->local.inlinable
+ && !cgraph_recursive_inlining_p (node, e->callee,
+ &e->inline_failed)
+ && !htab_find (cycles, e->callee))
+ {
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, " inlining %s", cgraph_node_name (e->callee));
+ cgraph_mark_inline_edge (e);
+ cgraph_leafify_node (e->callee, cycles);
+ }
+ else if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, " !inlining %s", cgraph_node_name (e->callee));
+ }
+ }
+
/* Decide on the inlining. We do so in the topological order to avoid
expenses on updating datastructures. */
*************** cgraph_decide_inlining (void)
*** 1317,1322 ****
--- 1378,1401 ----
struct cgraph_edge *e;
node = order[i];
+
+ /* Handle nodes to be leafified, but don't update overall unit size. */
+ if (lookup_attribute ("leafify", DECL_ATTRIBUTES (node->decl)) != NULL)
+ {
+ int old_overall_insns = overall_insns;
+ htab_t cycles;
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file,
+ "Leafifying %s\n", cgraph_node_name (node));
+ cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
+ cgraph_find_cycles (node, cycles);
+ cgraph_leafify_node (node, cycles);
+ htab_delete (cycles);
+ overall_insns = old_overall_insns;
+ /* We don't need to consider always_inline functions inside the leafified
+ function anymore. */
+ continue;
+ }
for (e = node->callees; e; e = e->next_callee)
if (e->callee->local.disregard_inline_limits)
Index: gcc/doc/extend.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/extend.texi,v
retrieving revision 1.82.2.34
diff -u -c -3 -p -r1.82.2.34 extend.texi
*** gcc/doc/extend.texi 30 Jan 2004 13:16:33 -0000 1.82.2.34
--- gcc/doc/extend.texi 5 Feb 2004 09:42:21 -0000
*************** The keyword @code{__attribute__} allows
*** 1893,1899 ****
attributes when making a declaration. This keyword is followed by an
attribute specification inside double parentheses. The following
attributes are currently defined for functions on all targets:
! @code{noreturn}, @code{noinline}, @code{always_inline},
@code{pure}, @code{const}, @code{nothrow},
@code{format}, @code{format_arg}, @code{no_instrument_function},
@code{section}, @code{constructor}, @code{destructor}, @code{used},
--- 1893,1899 ----
attributes when making a declaration. This keyword is followed by an
attribute specification inside double parentheses. The following
attributes are currently defined for functions on all targets:
! @code{noreturn}, @code{noinline}, @code{always_inline}, @code{leafify},
@code{pure}, @code{const}, @code{nothrow},
@code{format}, @code{format_arg}, @code{no_instrument_function},
@code{section}, @code{constructor}, @code{destructor}, @code{used},
*************** inlining.
*** 1969,1974 ****
--- 1969,1982 ----
Generally, functions are not inlined unless optimization is specified.
For functions declared inline, this attribute inlines the function even
if no optimization level was specified.
+
+ @cindex @code{leafify} function attribute
+ @item leafify
+ Generally, inlining into a function is limited. For a function marked with
+ this attribute, every call inside this function will be inlined, if possible.
+ Whether the function itself is considered for inlining depends on its size and
+ the current inlining parameters. The @code{leafify} attribute only works
+ reliably in unit-at-a-time mode.
@cindex @code{pure} function attribute
@item pure
leafify.c: check leafify functions don't contain call expressions
----------
static int foobar(int i);
static int bar(int i);
int __attribute__((leafify)) leaf0a(int i)
{
return bar(i);
}
int __attribute__((leafify)) leaf0b(int i)
{
return foobar(i);
}
int __attribute__((leafify)) leaf1(int i)
{
return bar(foobar(i));
}
int __attribute__((leafify)) leaf2(int i)
{
int j;
j = foobar(i);
return bar(j);
}
static int foobar(int i)
{
return i-1;
}
static int bar(int i)
{
return i + foobar(i);
}
static int g(int i)
{
return i*5+1;
}
static int f(int i)
{
return g(i);
}
int __attribute__((leafify)) leaf3(int i)
{
int j;
j = f(i);
j += f(i);
return j;
}
recurse.c: check the compilation terminates
----------
void __attribute__((leafify)) direct(void)
{
direct();
}
void __attribute__((leafify)) indirect(void);
static void indirect1(void)
{
indirect();
}
void __attribute__((leafify)) indirect(void)
{
indirect1();
}
void __attribute__((leafify)) doubleindirect(void);
static void doubleindirect2(void)
{
doubleindirect();
}
static void doubleindirect1(void)
{
doubleindirect2();
}
void __attribute__((leafify)) doubleindirect(void)
{
doubleindirect1();
}
static void subcycle1(void);
static void subcycle2(void)
{
subcycle1();
}
static void subcycle1(void)
{
subcycle2();
}
void __attribute__((leafify)) subcycle(void)
{
subcycle1();
}
static void doublesubcycle1(void);
static void doublesubcycle2(void);
static void doublesubcycle3(void)
{
doublesubcycle1();
}
static void doublesubcycle2(void)
{
doublesubcycle3();
}
static void doublesubcycle1(void)
{
doublesubcycle2();
}
void __attribute__((leafify)) doublesubcycle(void)
{
doublesubcycle1();
}