Example program takes 2000 times as long to compile under C++ as C
Zack Weinberg
zack@wolery.cumb.org
Tue Sep 5 13:36:00 GMT 2000
On Tue, Sep 05, 2000 at 12:38:15AM -0700, Mark Mitchell wrote:
>
> Zack --
>
> I couldn't do better than you. The SAVE_EXPRs are coming from
> constant folding: we are turning
>
> (a ? b : c) | x
>
> into:
>
> a ? (b | x) : (c | x)
>
> This blows up the size of the expression (if the folding doesn't
> actually fold anything).
Aha. I agree, it doesn't seem like a useful transformation in this
case.
> However, it is never-the-less a good thing to avoid walking the same
> tree multiple times, where possible. (It's definitely not safe to
> always do this.)
>
> Here's the patch I came up with, which does cut the compile time
> down to almost zero for your test case.
This looks an awful lot like what I was playing with, only tidier.
I have an incremental patch on top of this, which adds another tree
walker that only examines *_STMT nodes. It cuts out even more overhead
in the contexts where it can be used, such as prune_unused_decls.
It could theoretically be used for verify_stmt_tree too, but that's
not used except for debugging so I left it alone.
The patch also contains an additional tweak to prune_unused_decls. If
a BLOCK node ends up with no variables after unused ones are
discarded, it can be zapped entirely. The SCOPE_STMT it's attached to
then becomes nullified. We can't get rid of the SCOPE_STMT itself,
because then expand_{start,end}_bindings won't get called, but they do
a fair bit less work if SCOPE_NULLIFIED_P is true.
Also it appears to be unnecessary for walk_tree to dink with the
current line number, but I may be missing something. The comment says
it's done for instantiate_decl and inlinable_function_p, but as far as
I can tell neither of them uses walk_tree (even transitively) and it's
cleaner for it not to do that.
This passes the C++ test suite with no regressions. I'm running a
bootstrap now.
[N.B. There are a couple places where I wrote "This can be tail
recursion optimized..." - we don't do it, though. :(]
zw
cp:
* tree.c (walk_tree): Do not modify the current line
number. Expose tail recursion.
(walk_stmt_tree): New function.
* cp-tree.h: Prototype walk_stmt_tree.
* semantics.c (prune_unused_decls): Operate on SCOPE_STMTs not
the BLOCKs directly. If a BLOCK has no variables after
pruning, discard it.
(finish_stmt_tree): Use walk_stmt_tree. No need to save and
restore the line number.
===================================================================
Index: cp/cp-tree.h
--- cp/cp-tree.h 2000/09/05 07:31:26 1.521
+++ cp/cp-tree.h 2000/09/05 20:26:40
@@ -4543,6 +4543,9 @@ extern tree walk_tree
extern tree walk_tree_without_duplicates PARAMS ((tree *,
walk_tree_fn,
void *));
+extern tree walk_stmt_tree PARAMS ((tree *,
+ walk_tree_fn,
+ void *));
extern tree copy_tree_r PARAMS ((tree *, int *, void *));
extern int cp_valid_lang_attribute PARAMS ((tree, tree, tree, tree));
extern tree make_ptrmem_cst PARAMS ((tree, tree));
===================================================================
Index: cp/semantics.c
--- cp/semantics.c 2000/09/05 07:31:27 1.166
+++ cp/semantics.c 2000/09/05 20:26:40
@@ -2274,21 +2274,28 @@ prune_unused_decls (tp, walk_subtrees, d
return prune_unused_decls (tp, walk_subtrees, data);
}
}
- else if (TREE_CODE (t) == BLOCK)
+ else if (TREE_CODE (t) == SCOPE_STMT)
{
- /* walk_tree doesn't inspect BLOCK_VARS, so we must do it by hand. */
- tree *vp;
+ /* Remove all unused decls from the BLOCK of this SCOPE_STMT. */
+ tree block = SCOPE_STMT_BLOCK (t);
- for (vp = &BLOCK_VARS (t); *vp; )
+ if (block)
{
- tree v = *vp;
- if (! TREE_USED (v) && DECL_NAME (v) && DECL_SOURCE_LINE (v) == 0)
- *vp = TREE_CHAIN (v); /* drop */
- else
- vp = &TREE_CHAIN (v); /* advance */
+ tree *vp;
+
+ for (vp = &BLOCK_VARS (block); *vp; )
+ {
+ tree v = *vp;
+ if (! TREE_USED (v) && DECL_NAME (v) && DECL_SOURCE_LINE (v) == 0)
+ *vp = TREE_CHAIN (v); /* drop */
+ else
+ vp = &TREE_CHAIN (v); /* advance */
+ }
+ /* If there are now no variables, the entire BLOCK can be dropped.
+ (This causes SCOPE_NULLIFIED_P (t) to be true.) */
+ if (BLOCK_VARS (block) == NULL_TREE)
+ SCOPE_STMT_BLOCK (t) = NULL_TREE;
}
- if (BLOCK_VARS (t) == NULL_TREE)
- TREE_USED (t) = 0;
}
return NULL_TREE;
}
@@ -2314,18 +2321,14 @@ finish_stmt_tree (t)
tree *t;
{
tree stmt;
- int old_lineno;
/* Remove the fake extra statement added in begin_stmt_tree. */
stmt = TREE_CHAIN (*t);
*t = stmt;
SET_LAST_STMT (NULL_TREE);
- /* Remove unused decls from the stmt tree. walk_tree messes with
- the line number, so save/restore it. */
- old_lineno = lineno;
- walk_tree_without_duplicates (t, prune_unused_decls, NULL);
- lineno = old_lineno;
+ /* Remove unused decls from the stmt tree. */
+ walk_stmt_tree (t, prune_unused_decls, NULL);
if (cfun)
{
===================================================================
Index: cp/tree.c
--- cp/tree.c 2000/09/05 07:31:27 1.213
+++ cp/tree.c 2000/09/05 20:26:41
@@ -1284,11 +1284,6 @@ walk_tree (tp, func, data, htab)
{
int i, len;
- /* Set lineno here so we get the right instantiation context
- if we call instantiate_decl from inlinable_function_p. */
- if (statement_code_p (code) && !STMT_LINENO_FOR_FN_P (*tp))
- lineno = STMT_LINENO (*tp);
-
/* Walk over all the sub-trees of this operand. */
len = first_rtl_op (code);
/* TARGET_EXPRs are peculiar: operands 1 and 3 can be the same.
@@ -1319,7 +1314,8 @@ walk_tree (tp, func, data, htab)
WALK_SUBTREE (DECL_SIZE_UNIT (DECL_STMT_DECL (*tp)));
}
- WALK_SUBTREE (TREE_CHAIN (*tp));
+ /* This can be tail-recursion optimized if we write it this way. */
+ return walk_tree (&TREE_CHAIN (*tp), func, data, htab);
}
/* We didn't find what we were looking for. */
@@ -1452,6 +1448,73 @@ walk_tree_without_duplicates (tp, func,
result = walk_tree (tp, func, data, htab);
htab_delete (htab);
return result;
+}
+
+/* Like walk_tree, but only examines statement nodes. We don't need a
+ without_duplicates variant of this one because the statement tree is
+ a tree, not a graph. */
+
+tree
+walk_stmt_tree (tp, func, data)
+ tree *tp;
+ walk_tree_fn func;
+ void *data;
+{
+ enum tree_code code;
+ int walk_subtrees;
+ tree result;
+ int i, len;
+
+#define WALK_SUBTREE(NODE) \
+ do \
+ { \
+ result = walk_stmt_tree (&(NODE), func, data); \
+ if (result) \
+ return result; \
+ } \
+ while (0)
+
+ /* Skip empty subtrees. */
+ if (!*tp)
+ return NULL_TREE;
+
+ /* Skip subtrees below non-statement nodes. */
+ if (!statement_code_p (TREE_CODE (*tp)))
+ return NULL_TREE;
+
+ /* Call the function. */
+ walk_subtrees = 1;
+ result = (*func) (tp, &walk_subtrees, data);
+
+ /* If we found something, return it. */
+ if (result)
+ return result;
+
+ /* Even if we didn't, FUNC may have decided that there was nothing
+ interesting below this point in the tree. */
+ if (!walk_subtrees)
+ return NULL_TREE;
+
+ /* FUNC may have modified the tree, recheck that we're looking at a
+ statement node. */
+ code = TREE_CODE (*tp);
+ if (!statement_code_p (code))
+ return NULL_TREE;
+
+ /* Walk over all the sub-trees of this operand. Statement nodes never
+ contain RTL, and we needn't worry about TARGET_EXPRs. */
+ len = TREE_CODE_LENGTH (code);
+
+ /* Go through the subtrees. We need to do this in forward order so
+ that the scope of a FOR_EXPR is handled properly. */
+ for (i = 0; i < len; ++i)
+ WALK_SUBTREE (TREE_OPERAND (*tp, i));
+
+ /* Finally visit the chain. This can be tail-recursion optimized if
+ we write it this way. */
+ return walk_stmt_tree (&TREE_CHAIN (*tp), func, data);
+
+#undef WALK_SUBTREE
}
/* Called from count_trees via walk_tree. */
More information about the Gcc-bugs
mailing list