C++ ping: reduce size of cp_token

Tom Tromey tromey@redhat.com
Thu Mar 20 19:58:00 GMT 2008


>>>>> "Jakub" == Jakub Jelinek <jakub@redhat.com> writes:

Jakub> What size increase does it cause on parser.o's .text section?

In the end I didn't look at this, I just rewrote the patch following
your suggestion, since I agreed with your logic.

Instead of re-purposing flag bits in the 'flags' field, I just turned
the needed flags into boolean bitfields in cp_token, since that is
clearer (IMO).  Also, it lets us make it clear that we have 4 unused
bits.

Bootstrapped and regression tested on the compile farm.  Ok?

Tom

gcc/cp/ChangeLog:
2008-03-20  Tom Tromey  <tromey@redhat.com>

	* parser.c (struct cp_token) <flags>: Remove.
	<flag_prev_white, flag_digraph, flag_pure_zero>: New fields.
	<location>: Move earlier.
	(eof_token): Update.
	(cp_lexer_get_preprocessor_token): Set flag fields.
	(check_empty_body): Update.
	(cp_parser_template_id): Likewise.
	(cp_parser_pure_specifier): Likewise.
	(cp_parser_nth_token_starts_template_argument_list_p): Likewise.

Index: gcc/cp/parser.c
===================================================================
--- gcc/cp/parser.c	(revision 133353)
+++ gcc/cp/parser.c	(working copy)
@@ -58,7 +58,8 @@
   tree qualifying_scope;
 };
 
-/* A C++ token.  */
+/* A C++ token.  Note that this is carefully laid out to have the
+   least amount of padding.  */
 
 typedef struct cp_token GTY (())
 {
@@ -67,8 +68,12 @@
   /* If this token is a keyword, this value indicates which keyword.
      Otherwise, this value is RID_MAX.  */
   ENUM_BITFIELD (rid) keyword : 8;
-  /* Token flags.  */
-  unsigned char flags;
+  /* Token flag: if there was whitespace before the token.  */
+  BOOL_BITFIELD flag_prev_white : 1;
+  /* Token flag: if the token was a digraph.  */
+  BOOL_BITFIELD flag_digraph : 1;
+  /* Token flag: if the token was a single 0 digit.  */
+  BOOL_BITFIELD flag_pure_zero : 1;
   /* Identifier for the pragma.  */
   ENUM_BITFIELD (pragma_kind) pragma_kind : 6;
   /* True if this token is from a system header.  */
@@ -79,6 +84,9 @@
      KEYWORD is RID_MAX) iff this name was looked up and found to be
      ambiguous.  An error has already been reported.  */
   BOOL_BITFIELD ambiguous_p : 1;
+  /* Note: 4 free bits.  */
+  /* The location at which this token was found.  */
+  location_t location;
   /* The value associated with this token, if any.  */
   union cp_token_value {
     /* Used for CPP_NESTED_NAME_SPECIFIER and CPP_TEMPLATE_ID.  */
@@ -86,8 +94,6 @@
     /* Use for all other tokens.  */
     tree GTY((tag ("0"))) value;
   } GTY((desc ("(%1.type == CPP_TEMPLATE_ID) || (%1.type == CPP_NESTED_NAME_SPECIFIER)"))) u;
-  /* The location at which this token was found.  */
-  location_t location;
 } cp_token;
 
 /* We use a stack of token pointer for saving token sets.  */
@@ -97,8 +103,8 @@
 
 static cp_token eof_token =
 {
-  CPP_EOF, RID_MAX, 0, PRAGMA_NONE, 0, false, 0, { NULL },
-  0
+  CPP_EOF, RID_MAX, false, false, false, PRAGMA_NONE,
+  false, false, false, 0, { NULL }
 };
 
 /* The cp_lexer structure represents the C++ lexer.  It is responsible
@@ -401,14 +407,18 @@
 cp_lexer_get_preprocessor_token (cp_lexer *lexer, cp_token *token)
 {
   static int is_extern_c = 0;
+  unsigned char flags;
 
    /* Get a new token from the preprocessor.  */
   token->type
-    = c_lex_with_flags (&token->u.value, &token->location, &token->flags,
+    = c_lex_with_flags (&token->u.value, &token->location, &flags,
 			lexer == NULL ? 0 : C_LEX_RAW_STRINGS);
   token->keyword = RID_MAX;
   token->pragma_kind = PRAGMA_NONE;
   token->in_system_header = in_system_header;
+  token->flag_prev_white = (flags & PREV_WHITE) ? 1 : 0;
+  token->flag_digraph = (flags & DIGRAPH) ? 1 : 0;
+  token->flag_pure_zero = (flags & PURE_ZERO) ? 1 : 0;
 
   /* On some systems, some header files are surrounded by an
      implicit extern "C" block.  Set a flag in the token if it
@@ -7195,8 +7205,7 @@
   close_loc = expand_location (close_paren->location);
   token = cp_lexer_peek_nth_token (parser->lexer, 2);
 
-  if (token->type != CPP_SEMICOLON
-      || (token->flags & PREV_WHITE))
+  if (token->type != CPP_SEMICOLON || token->flag_prev_white)
     return;
 
   semi_loc =  expand_location (token->location);
@@ -9751,9 +9760,9 @@
   next_token = cp_lexer_peek_token (parser->lexer);
   next_token_2 = cp_lexer_peek_nth_token (parser->lexer, 2);
   if (next_token->type == CPP_OPEN_SQUARE
-      && next_token->flags & DIGRAPH
+      && next_token->flag_digraph
       && next_token_2->type == CPP_COLON
-      && !(next_token_2->flags & PREV_WHITE))
+      && !next_token_2->flag_prev_white)
     {
       cp_parser_parse_tentatively (parser);
       /* Change `:' into `::'.  */
@@ -15298,7 +15307,7 @@
   /* Look for the `0' token.  */
   token = cp_lexer_consume_token (parser->lexer);
   /* c_lex_with_flags marks a single digit '0' with PURE_ZERO.  */
-  if (token->type != CPP_NUMBER || !(token->flags & PURE_ZERO))
+  if (token->type != CPP_NUMBER || !token->flag_pure_zero)
     {
       cp_parser_error (parser,
 		       "invalid pure specifier (only `= 0' is allowed)");
@@ -17965,11 +17974,11 @@
   /* Check for the sequence `<::' in the original code. It would be lexed as
      `[:', where `[' is a digraph, and there is no whitespace before
      `:'.  */
-  if (token->type == CPP_OPEN_SQUARE && token->flags & DIGRAPH)
+  if (token->type == CPP_OPEN_SQUARE && token->flag_digraph)
     {
       cp_token *token2;
       token2 = cp_lexer_peek_nth_token (parser->lexer, n+1);
-      if (token2->type == CPP_COLON && !(token2->flags & PREV_WHITE))
+      if (token2->type == CPP_COLON && !token2->flag_prev_white)
 	return true;
     }
   return false;



More information about the Gcc-patches mailing list