This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Add leafify function attribute



This patch adds the __attribute__((leafify)) function attribute that was (in conjunction with inlining in general) much discussed last year. For expression template code like POOMA this gains about 30% performance improvements on ia32 for my applications, for ia64 the margin is more like 50%. Compile time impact on leafified functions is non-neglible (goes up to 150% for my applications) and is attributed to loop optimization passes.

Bootstrapped and regtested (including the two new tests) on
i686-pc-linux-gnu for c and c++.

The patch (or previous forms of it) also is in use in my local trees
since nearly two years.  tree-ssa currently miscompiles my performance
testcase with leafify enabled, but 3.5 is fine.

Ok? (I don't have CVS write access)

Richard.


2004-02-05 Richard Guenther <richard.guenther@uni-tuebingen.de>


         * c-common.c (handle_leafify_attribute): New.
         (struct c_common_attributes): Add leafify.
         cgraphunit.c (cgraph_find_cycles): New.
         (cgraph_leafify_node): New.
         (cgraph_decide_inlining): Use them to handle leafify
         attribute.
         extend.texi: Document leafify function attribute.
         testsuite/gcc.dg/attr-leafify-1.c: New test.
         testsuite/gcc.dg/attr-leafify-2.c: New test.
? gcc/testsuite/gcc.dg/attr-leafify-1.c
? gcc/testsuite/gcc.dg/attr-leafify-2.c
Index: gcc/c-common.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/c-common.c,v
retrieving revision 1.496
diff -u -c -3 -p -r1.496 c-common.c
*** gcc/c-common.c	23 Apr 2004 22:50:07 -0000	1.496
--- gcc/c-common.c	2 May 2004 19:53:41 -0000
*************** static tree handle_noreturn_attribute (t
*** 740,745 ****
--- 740,746 ----
  static tree handle_noinline_attribute (tree *, tree, tree, int, bool *);
  static tree handle_always_inline_attribute (tree *, tree, tree, int,
  					    bool *);
+ static tree handle_leafify_attribute (tree *, tree, tree, int, bool *);
  static tree handle_used_attribute (tree *, tree, tree, int, bool *);
  static tree handle_unused_attribute (tree *, tree, tree, int, bool *);
  static tree handle_const_attribute (tree *, tree, tree, int, bool *);
*************** const struct attribute_spec c_common_att
*** 801,806 ****
--- 802,809 ----
  			      handle_noinline_attribute },
    { "always_inline",          0, 0, true,  false, false,
  			      handle_always_inline_attribute },
+   { "leafify",                0, 0, true,  false, false,
+                               handle_leafify_attribute },
    { "used",                   0, 0, true,  false, false,
  			      handle_used_attribute },
    { "unused",                 0, 0, false, false, false,
*************** handle_always_inline_attribute (tree *no
*** 4373,4378 ****
--- 4376,4404 ----
  
    return NULL_TREE;
  }
+ 
+ /* Handle a "leafify" attribute; arguments as in
+    struct attribute_spec.handler.  */
+ 
+ static tree
+ handle_leafify_attribute (tree *node, tree name,
+                           tree args ATTRIBUTE_UNUSED,
+                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+ {
+   if (TREE_CODE (*node) == FUNCTION_DECL)
+     {
+       /* Do nothing else, just set the attribute.  We'll get at
+          it later with lookup_attribute.  */
+     }
+   else
+     {
+       warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
+       *no_add_attrs = true;
+     }
+ 
+   return NULL_TREE;
+ }
+ 
  
  /* Handle a "used" attribute; arguments as in
     struct attribute_spec.handler.  */
Index: gcc/cgraphunit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cgraphunit.c,v
retrieving revision 1.58
diff -u -c -3 -p -r1.58 cgraphunit.c
*** gcc/cgraphunit.c	30 Apr 2004 16:27:16 -0000	1.58
--- gcc/cgraphunit.c	2 May 2004 19:53:44 -0000
*************** cgraph_decide_inlining_of_small_function
*** 1347,1352 ****
--- 1347,1413 ----
    free (heap_node);
  }
  
+ /* Find callgraph nodes closing a circle in the graph.  The
+    resulting hashtab can be used to avoid walking the circles.
+    Uses the cgraph nodes ->aux field which needs to be zero
+    before and will be zero after operation.  */
+ 
+ static void
+ cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
+ {
+   struct cgraph_edge *e;
+ 
+   if (node->aux)
+     {
+       void **slot;
+       slot = htab_find_slot (cycles, node, INSERT);
+       if (!*slot)
+ 	{
+ 	  if (cgraph_dump_file)
+ 	    fprintf (cgraph_dump_file, "Cycle contains %s\n", cgraph_node_name (node));
+ 	  *slot = node;
+ 	}
+       return;
+     }
+ 
+   node->aux = node;
+   for (e = node->callees; e; e = e->next_callee)
+     {
+        cgraph_find_cycles (e->callee, cycles); 
+     }
+   node->aux = 0;
+ }
+ 
+ /* Leafify the cgraph node.  We have to be careful in recursing
+    as to not run endlessly in circles of the callgraph.
+    We do so by using a hashtab of cycle entering nodes as generated
+    by cgraph_find_cycles.  */
+ 
+ static void
+ cgraph_leafify_node (struct cgraph_node *node, htab_t cycles)
+ {
+   struct cgraph_edge *e;
+ 
+   for (e = node->callees; e; e = e->next_callee)
+     {
+       /* Inline call, if possible, and recurse.  Be sure we are not
+ 	 entering callgraph circles here.  */
+       if (e->inline_failed
+ 	  && e->callee->local.inlinable
+ 	  && !cgraph_recursive_inlining_p (node, e->callee,
+ 				  	   &e->inline_failed)
+ 	  && !htab_find (cycles, e->callee))
+ 	{
+ 	  if (cgraph_dump_file)
+     	    fprintf (cgraph_dump_file, " inlining %s", cgraph_node_name (e->callee));
+           cgraph_mark_inline_edge (e);
+ 	  cgraph_leafify_node (e->callee, cycles);
+ 	}
+       else if (cgraph_dump_file)
+ 	fprintf (cgraph_dump_file, " !inlining %s", cgraph_node_name (e->callee));
+     }
+ }
+ 
  /* Decide on the inlining.  We do so in the topological order to avoid
     expenses on updating data structures.  */
  
*************** cgraph_decide_inlining (void)
*** 1384,1389 ****
--- 1445,1468 ----
        struct cgraph_edge *e;
  
        node = order[i];
+ 
+       /* Handle nodes to be leafified, but don't update overall unit size.  */
+       if (lookup_attribute ("leafify", DECL_ATTRIBUTES (node->decl)) != NULL)
+         {
+ 	  int old_overall_insns = overall_insns;
+ 	  htab_t cycles;
+   	  if (cgraph_dump_file)
+     	    fprintf (cgraph_dump_file,
+ 	     	     "Leafifying %s\n", cgraph_node_name (node));
+ 	  cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
+ 	  cgraph_find_cycles (node, cycles);
+ 	  cgraph_leafify_node (node, cycles);
+ 	  htab_delete (cycles);
+ 	  overall_insns = old_overall_insns;
+ 	  /* We don't need to consider always_inline functions inside the leafified
+ 	     function anymore.  */
+ 	  continue;
+         }
  
        for (e = node->callees; e; e = e->next_callee)
  	if (e->callee->local.disregard_inline_limits)
Index: gcc/doc/extend.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/extend.texi,v
retrieving revision 1.192
diff -u -c -3 -p -r1.192 extend.texi
*** gcc/doc/extend.texi	18 Apr 2004 22:01:15 -0000	1.192
--- gcc/doc/extend.texi	2 May 2004 19:54:14 -0000
*************** The keyword @code{__attribute__} allows 
*** 1893,1899 ****
  attributes when making a declaration.  This keyword is followed by an
  attribute specification inside double parentheses.  The following
  attributes are currently defined for functions on all targets:
! @code{noreturn}, @code{noinline}, @code{always_inline},
  @code{pure}, @code{const}, @code{nothrow},
  @code{format}, @code{format_arg}, @code{no_instrument_function},
  @code{section}, @code{constructor}, @code{destructor}, @code{used},
--- 1893,1899 ----
  attributes when making a declaration.  This keyword is followed by an
  attribute specification inside double parentheses.  The following
  attributes are currently defined for functions on all targets:
! @code{noreturn}, @code{noinline}, @code{always_inline}, @code{leafify},
  @code{pure}, @code{const}, @code{nothrow},
  @code{format}, @code{format_arg}, @code{no_instrument_function},
  @code{section}, @code{constructor}, @code{destructor}, @code{used},
*************** inlining.
*** 1969,1974 ****
--- 1969,1982 ----
  Generally, functions are not inlined unless optimization is specified.
  For functions declared inline, this attribute inlines the function even
  if no optimization level was specified.
+ 
+ @cindex @code{leafify} function attribute
+ @item leafify
+ Generally, inlining into a function is limited.  For a function marked with
+ this attribute, every call inside this function will be inlined, if possible.
+ Whether the function itself is considered for inlining depends on its size and
+ the current inlining parameters.  The @code{leafify} attribute only works
+ reliably in unit-at-a-time mode.
  
  @cindex @code{pure} function attribute
  @item pure

/* Test leafify attribute, which forces inlining of all functions
   down the callgraph in unit-at-a-time mode.  */
/* Origin: Richard Guenther <richard.guenther@uni-tuebingen.de>.  */
/* { dg-do compile } */
/* { dg-options "-funit-at-a-time -finline" } */

static int foobar(int i);
static int bar(int i);
static int foobar(int i)
{
  return i-1;
}
static int bar(int i)
{
  return i + foobar(i);
}


int __attribute__((leafify)) leaf0a(int i)
{
  return bar(i);
}
int __attribute__((leafify)) leaf0b(int i)
{
  return foobar(i);
}
int __attribute__((leafify)) leaf1(int i)
{
  return bar(foobar(i));
}
int __attribute__((leafify)) leaf2(int i)
{
  int j;
  j = foobar(i);
  return bar(j);
}
int __attribute__((leafify)) leaf3(int i)
{
  int j;
  j = bar(i);
  j += bar(i);
  return j;
}

/* { dg-final { scan-assembler-not "foobar" } } */
/* { dg-final { scan-assembler-not "bar" } } */

/* Test leafify attribute corner cases where we need to avoid inlining.  */
/* Origin: Richard Guenther <richard.guenther@uni-tuebingen.de>.  */
/* { dg-do compile } */
/* { dg-options "-funit-at-a-time -finline" } */

void __attribute__((leafify)) direct(void)
{
  direct();
}

void __attribute__((leafify)) indirect(void);
static void indirect1(void)
{
  indirect();
}
void __attribute__((leafify)) indirect(void)
{
  indirect1();
}

void __attribute__((leafify)) doubleindirect(void);
static void doubleindirect2(void)
{
  doubleindirect();
}
static void doubleindirect1(void)
{
  doubleindirect2();
}
void __attribute__((leafify)) doubleindirect(void)
{
  doubleindirect1();
}

static void subcycle1(void);
static void subcycle2(void)
{
  subcycle1();
}
static void subcycle1(void)
{
  subcycle2();
}
void __attribute__((leafify)) subcycle(void)
{
  subcycle1();
}

static void doublesubcycle1(void);
static void doublesubcycle2(void);
static void doublesubcycle3(void)
{
  doublesubcycle1();
}
static void doublesubcycle2(void)
{
  doublesubcycle3();
}
static void doublesubcycle1(void)
{
  doublesubcycle2();
}
void __attribute__((leafify)) doublesubcycle(void)
{
  doublesubcycle1();
}



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]