This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
C++: enum parsing speedups

From: Zack Weinberg <zack at codesourcery dot com>
To: gcc-patches at gcc dot gnu dot org, Mark Mitchell <mark at codesourcery dot com>
Date: Mon, 30 Aug 2004 21:24:06 -0700
Subject: C++: enum parsing speedups
This patch speeds up a couple of paths in the C++ parser's handling of
'enum' declarations/definitions.  These have fairly restricted syntax,
so it is possible to replace the backtracking that was being done with
a two-token lookahead.  More significantly, build_enumerator was
constructing an ADD_EXPR just to fold it out again.  It's much faster
to call add_double and int_fits_type_p directly.

The combination of these changes speeds up a contrived benchmark 
(a very large, machine-generated file consisting entirely of enum
definitions and declarations using elaborated-type-specifiers headed
by 'enum', compiled with -fsyntax-only) by 15%.  I expect more like 1%
on real code -- headers frequently do contain very large enums, after all.

Mark: I'd like you to check my work, because in order to get the
correct locations assigned for enum TYPE_DECLs (where 'correct' is
defined by g++.old-deja/g++.other/enum2.C) I had to add calls to
cp_lexer_set_source_position_from_token to both
cp_lexer_peek_nth_token and cp_lexer_consume_token.  It seems to me
that this is consistent with the intent of the parser generally, but
I'm not sure.  (It is lame that we need this at all, but that's the
way the world is.)

I took the opportunity to replace a few calls to cp_lexer_peek_token
with cp_lexer_next_token_is/is_not.

Bootstrapped amd64-linux, C/C++ only, no regressions.  Won't be
checked in just yet.

zw

cp:
        * decl.c (build_enumerator): Call add_double and int_fits_type_p
        directly, to avoid creating short-lived trees.
        * parser.c (cp_lexer_peek_nth_token, cp_lexer_consume_token):
        Call cp_lexer_set_source_position_from_token.
        (cp_parser_type_specifier <RID_ENUM>): Use two-token lookahead
        instead of backtracking.  Move some code to avoid a conditional.
        (cp_parser_enum_specifier): Rely on caller's parsing a little
        to avoid duplicated effort.  Use cp_lexer_next_token_is_not.
        (cp_parser_enumerator_list, cp_parser_enumerator_list):
        Use cp_lexer_next_token_is/cp_lexer_next_token_is_not where
        appropriate.

===================================================================
Index: cp/decl.c
--- cp/decl.c	30 Aug 2004 15:28:39 -0000	1.1284
+++ cp/decl.c	31 Aug 2004 04:19:11 -0000
@@ -9504,18 +9504,25 @@ build_enumerator (tree name, tree value,
       /* Default based on previous value.  */
       if (value == NULL_TREE)
 	{
-	  tree prev_value;
-
 	  if (TYPE_VALUES (enumtype))
 	    {
-	      /* The next value is the previous value ...  */
+	      HOST_WIDE_INT hi;
+	      unsigned HOST_WIDE_INT lo;
+	      tree prev_value;
+	      bool overflowed;
+
+	      /* The next value is the previous value plus one.  We can
+	         safely assume that the previous value is an INTEGER_CST.
+		 add_double doesn't know the type of the target expression,
+		 so we must check with int_fits_type_p as well.  */
 	      prev_value = DECL_INITIAL (TREE_VALUE (TYPE_VALUES (enumtype)));
-	      /* ... plus one.  */
-	      value = cp_build_binary_op (PLUS_EXPR,
-					  prev_value,
-					  integer_one_node);
+	      overflowed = add_double (TREE_INT_CST_LOW (prev_value),
+				       TREE_INT_CST_HIGH (prev_value),
+				       1, 0, &lo, &hi);
+	      value = build_int_cst_wide (TREE_TYPE (prev_value), lo, hi);
+	      overflowed |= !int_fits_type_p (value, TREE_TYPE (prev_value));
 
-	      if (tree_int_cst_lt (value, prev_value))
+	      if (overflowed)
 		error ("overflow in enumeration values at `%D'", name);
 	    }
 	  else
===================================================================
Index: cp/parser.c
--- cp/parser.c	30 Aug 2004 16:03:45 -0000	1.241
+++ cp/parser.c	31 Aug 2004 04:19:11 -0000
@@ -735,6 +735,7 @@ cp_lexer_peek_nth_token (cp_lexer* lexer
 	token = cp_lexer_read_token (lexer);
     }
 
+  cp_lexer_set_source_position_from_token (lexer, token);
   return token;
 }
 
@@ -780,6 +781,7 @@ cp_lexer_consume_token (cp_lexer* lexer)
       fprintf (cp_lexer_debug_stream, "\n");
     }
 
+  cp_lexer_set_source_position_from_token (lexer, token);
   return token;
 }
 
@@ -9248,21 +9250,36 @@ cp_parser_type_specifier (cp_parser* par
   keyword = token->keyword;
   switch (keyword)
     {
+    case RID_ENUM:
+      /* 'enum' [identifier] '{' introduces an enum-specifier;
+	 'enum' <anything else> introduces an elaborated-type-specifier.  */
+      if (cp_lexer_peek_nth_token (parser->lexer, 2)->type == CPP_OPEN_BRACE
+	  || (cp_lexer_peek_nth_token (parser->lexer, 2)->type == CPP_NAME
+	      && cp_lexer_peek_nth_token (parser->lexer, 3)->type
+	         == CPP_OPEN_BRACE))
+	{
+	  type_spec = cp_parser_enum_specifier (parser);
+	  if (declares_class_or_enum)
+	    *declares_class_or_enum = 2;
+	  if (decl_specs)
+	    cp_parser_set_decl_spec_type (decl_specs,
+					  type_spec,
+					  /*user_defined_p=*/true);
+	  return type_spec;
+	}
+      else
+	goto elaborated_type_specifier;
+
       /* Any of these indicate either a class-specifier, or an
 	 elaborated-type-specifier.  */
     case RID_CLASS:
     case RID_STRUCT:
     case RID_UNION:
-    case RID_ENUM:
       /* Parse tentatively so that we can back up if we don't find a
-	 class-specifier or enum-specifier.  */
+	 class-specifier.  */
       cp_parser_parse_tentatively (parser);
-      /* Look for the class-specifier or enum-specifier.  */
-      if (keyword == RID_ENUM)
-	type_spec = cp_parser_enum_specifier (parser);
-      else
-	type_spec = cp_parser_class_specifier (parser);
-
+      /* Look for the class-specifier.  */
+      type_spec = cp_parser_class_specifier (parser);
       /* If that worked, we're done.  */
       if (cp_parser_parse_definitely (parser))
 	{
@@ -9276,7 +9293,12 @@ cp_parser_type_specifier (cp_parser* par
 	}
 
       /* Fall through.  */
+    elaborated_type_specifier:
+      /* We're declaring (not defining) a class or enum.  */
+      if (declares_class_or_enum)
+	*declares_class_or_enum = 1;
 
+      /* Fall through.  */
     case RID_TYPENAME:
       /* Look for an elaborated-type-specifier.  */
       type_spec
@@ -9284,10 +9306,6 @@ cp_parser_type_specifier (cp_parser* par
 	   (parser,
 	    decl_specs && decl_specs->specs[(int) ds_friend],
 	    is_declaration));
-      /* We're declaring a class or enum -- unless we're using
-	 `typename'.  */
-      if (declares_class_or_enum && keyword != RID_TYPENAME)
-	*declares_class_or_enum = 1;
       if (decl_specs)
 	cp_parser_set_decl_spec_type (decl_specs,
 				      type_spec,
@@ -9889,40 +9907,33 @@ cp_parser_elaborated_type_specifier (cp_
 static tree
 cp_parser_enum_specifier (cp_parser* parser)
 {
-  cp_token *token;
-  tree identifier = NULL_TREE;
+  tree identifier;
   tree type;
 
-  /* Look for the `enum' keyword.  */
-  if (!cp_parser_require_keyword (parser, RID_ENUM, "`enum'"))
-    return error_mark_node;
-  /* Peek at the next token.  */
-  token = cp_lexer_peek_token (parser->lexer);
+  /* Caller guarantees that the current token is 'enum', an identifier
+     possibly follows, and the token after that is an opening brace.
+     If we don't have an identifier, fabricate an anonymous name for
+     the enumeration being defined.  */
+  cp_lexer_consume_token (parser->lexer);
 
-  /* See if it is an identifier.  */
-  if (token->type == CPP_NAME)
+  if (cp_lexer_next_token_is (parser->lexer, CPP_NAME))
     identifier = cp_parser_identifier (parser);
+  else
+    identifier = make_anon_name ();
 
-  /* Look for the `{'.  */
-  if (!cp_parser_require (parser, CPP_OPEN_BRACE, "`{'"))
-    return error_mark_node;
-
-  /* At this point, we're going ahead with the enum-specifier, even
-     if some other problem occurs.  */
-  cp_parser_commit_to_tentative_parse (parser);
+  cp_lexer_consume_token (parser->lexer);
 
   /* Issue an error message if type-definitions are forbidden here.  */
   cp_parser_check_type_definition (parser);
 
   /* Create the new type.  */
-  type = start_enum (identifier ? identifier : make_anon_name ());
+  type = start_enum (identifier);
 
-  /* Peek at the next token.  */
-  token = cp_lexer_peek_token (parser->lexer);
-  /* If it's not a `}', then there are some enumerators.  */
-  if (token->type != CPP_CLOSE_BRACE)
+  /* If the next token is not '}', then there are some enumerators.  */
+  if (cp_lexer_next_token_is_not (parser->lexer, CPP_CLOSE_BRACE))
     cp_parser_enumerator_list (parser, type);
-  /* Look for the `}'.  */
+
+  /* Consume the final '}'.  */
   cp_parser_require (parser, CPP_CLOSE_BRACE, "`}'");
 
   /* Finish up the enumeration.  */
@@ -9943,15 +9954,12 @@ cp_parser_enumerator_list (cp_parser* pa
 {
   while (true)
     {
-      cp_token *token;
-
       /* Parse an enumerator-definition.  */
       cp_parser_enumerator_definition (parser, type);
-      /* Peek at the next token.  */
-      token = cp_lexer_peek_token (parser->lexer);
-      /* If it's not a `,', then we've reached the end of the
-	 list.  */
-      if (token->type != CPP_COMMA)
+
+      /* If the next token is not a ',', we've reached the end of
+	 the list.  */
+      if (cp_lexer_next_token_is_not (parser->lexer, CPP_COMMA))
 	break;
       /* Otherwise, consume the `,' and keep going.  */
       cp_lexer_consume_token (parser->lexer);
@@ -9978,7 +9986,6 @@ cp_parser_enumerator_list (cp_parser* pa
 static void
 cp_parser_enumerator_definition (cp_parser* parser, tree type)
 {
-  cp_token *token;
   tree identifier;
   tree value;
 
@@ -9987,10 +9994,8 @@ cp_parser_enumerator_definition (cp_pars
   if (identifier == error_mark_node)
     return;
 
-  /* Peek at the next token.  */
-  token = cp_lexer_peek_token (parser->lexer);
-  /* If it's an `=', then there's an explicit value.  */
-  if (token->type == CPP_EQ)
+  /* If the next token is an '=', then there is an explicit value.  */
+  if (cp_lexer_next_token_is (parser->lexer, CPP_EQ))
     {
       /* Consume the `=' token.  */
       cp_lexer_consume_token (parser->lexer);
Follow-Ups:
- Re: C++: enum parsing speedups
  - From: Mark Mitchell
- Re: C++: enum parsing speedups
  - From: Andrew Pinski
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]