This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: [lno] 50% runtime performance regression since yesterday
On Wed, 23 Jun 2004, Sebastian Pop wrote:
> On Wed, Jun 23, 2004 at 03:45:45PM +0200, Sebastian Pop wrote:
> > On Wed, Jun 23, 2004 at 03:26:41PM +0200, Sebastian Pop wrote:
> > >
> > > Does the following patch fix the optimization regression?
> > >
> >
> > Not yet.
> >
>
> Or maybe yes, could you check please?
>
> There is no difference between the code generated by the compiler as
> of 2004-06-18 and the version after the patch. The command line for
> compiling the testcase was: objdir/gcc/cc1plus -O2 -funroll-loops tramp3d-v3.cpp
>
> $ diff -dup ./20040618/tramp3d-v3.s ./20040623/tramp3d-v3.s
> --- ./20040618/tramp3d-v3.s 2004-06-23 10:30:24.000000000 -0400
> +++ ./20040623/tramp3d-v3.s 2004-06-23 09:53:11.000000000 -0400
> @@ -1028,5 +1028,5 @@ _ZTI11CreateEvent:
> _ZTS11CreateEvent:
> .string "11CreateEvent"
> .globl memmove
> - .ident "GCC: (GNU) 3.5-tree-ssa-lno 20040618 (merged 20040612)"
> + .ident "GCC: (GNU) 3.5-tree-ssa-lno 20040623 (merged 20040612)"
> .section .note.GNU-stack,"",@progbits
Which may be due to the fact you don't use a leafify enabled compiler? (I
do, maybe I should have said so...) But I'm checking now anyways. For
reference, a leafify patch agaist lno is appended below.
Thanks,
Richard.
Index: c-common.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-common.c,v
retrieving revision 1.344.2.56.2.6
diff -u -u -r1.344.2.56.2.6 c-common.c
--- c-common.c 14 Jun 2004 01:57:40 -0000 1.344.2.56.2.6
+++ c-common.c 23 Jun 2004 14:51:40 -0000
@@ -751,6 +751,7 @@
static tree handle_noinline_attribute (tree *, tree, tree, int, bool *);
static tree handle_always_inline_attribute (tree *, tree, tree, int,
bool *);
+static tree handle_leafify_attribute (tree *, tree, tree, int, bool *);
static tree handle_used_attribute (tree *, tree, tree, int, bool *);
static tree handle_unused_attribute (tree *, tree, tree, int, bool *);
static tree handle_const_attribute (tree *, tree, tree, int, bool *);
@@ -812,6 +813,8 @@
handle_noinline_attribute },
{ "always_inline", 0, 0, true, false, false,
handle_always_inline_attribute },
+ { "leafify", 0, 0, true, false, false,
+ handle_leafify_attribute },
{ "used", 0, 0, true, false, false,
handle_used_attribute },
{ "unused", 0, 0, false, false, false,
@@ -4543,6 +4546,29 @@
return NULL_TREE;
}
+/* Handle a "leafify" attribute; arguments as in
+ struct attribute_spec.handler. */
+
+static tree
+handle_leafify_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) == FUNCTION_DECL)
+ {
+ /* Do nothing else, just set the attribute. We'll get at
+ it later with lookup_attribute. */
+ }
+ else
+ {
+ warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
+
/* Handle a "used" attribute; arguments as in
struct attribute_spec.handler. */
Index: cgraphunit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraphunit.c,v
retrieving revision 1.1.4.26.2.6
diff -u -u -r1.1.4.26.2.6 cgraphunit.c
--- cgraphunit.c 14 Jun 2004 01:57:47 -0000 1.1.4.26.2.6
+++ cgraphunit.c 23 Jun 2004 14:51:40 -0000
@@ -1439,6 +1439,67 @@
free (heap_node);
}
+/* Find callgraph nodes closing a circle in the graph. The
+ resulting hashtab can be used to avoid walking the circles.
+ Uses the cgraph nodes ->aux field which needs to be zero
+ before and will be zero after operation. */
+
+static void
+cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
+{
+ struct cgraph_edge *e;
+
+ if (node->aux)
+ {
+ void **slot;
+ slot = htab_find_slot (cycles, node, INSERT);
+ if (!*slot)
+ {
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, "Cycle contains %s\n", cgraph_node_name (node));
+ *slot = node;
+ }
+ return;
+ }
+
+ node->aux = node;
+ for (e = node->callees; e; e = e->next_callee)
+ {
+ cgraph_find_cycles (e->callee, cycles);
+ }
+ node->aux = 0;
+}
+
+/* Leafify the cgraph node. We have to be careful in recursing
+ as to not run endlessly in circles of the callgraph.
+ We do so by using a hashtab of cycle entering nodes as generated
+ by cgraph_find_cycles. */
+
+static void
+cgraph_leafify_node (struct cgraph_node *node, htab_t cycles)
+{
+ struct cgraph_edge *e;
+
+ for (e = node->callees; e; e = e->next_callee)
+ {
+ /* Inline call, if possible, and recurse. Be sure we are not
+ entering callgraph circles here. */
+ if (e->inline_failed
+ && e->callee->local.inlinable
+ && !cgraph_recursive_inlining_p (node, e->callee,
+ &e->inline_failed)
+ && !htab_find (cycles, e->callee))
+ {
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, " inlining %s", cgraph_node_name (e->callee));
+ cgraph_mark_inline_edge (e);
+ cgraph_leafify_node (e->callee, cycles);
+ }
+ else if (cgraph_dump_file)
+ fprintf (cgraph_dump_file, " !inlining %s", cgraph_node_name (e->callee));
+ }
+}
+
/* Decide on the inlining. We do so in the topological order to avoid
expenses on updating data structures. */
@@ -1477,6 +1538,24 @@
node = order[i];
+ /* Handle nodes to be leafified, but don't update overall unit size. */
+ if (lookup_attribute ("leafify", DECL_ATTRIBUTES (node->decl)) != NULL)
+ {
+ int old_overall_insns = overall_insns;
+ htab_t cycles;
+ if (cgraph_dump_file)
+ fprintf (cgraph_dump_file,
+ "Leafifying %s\n", cgraph_node_name (node));
+ cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
+ cgraph_find_cycles (node, cycles);
+ cgraph_leafify_node (node, cycles);
+ htab_delete (cycles);
+ overall_insns = old_overall_insns;
+ /* We don't need to consider always_inline functions inside the leafified
+ function anymore. */
+ continue;
+ }
+
if (!node->local.disregard_inline_limits)
continue;
if (cgraph_dump_file)