This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Speed up _cpp_clean_line and _cpp_skip_block_comment


This patch does a bit of hand tuning on two hot spots in cpplex.c.
_cpp_skip_block_comment is just changed to use a local pointer - gcc
doesn't do store sinking (at least in this case), so we were writing
back buffer->cur on every character scanned.  _cpp_clean_line has its
main loop cloned to provide a short-circuit for the common case that
we don't have to make any edits to the line -- we were reading each
character and then writing it back to the same memory location.

I benchmarked this by creating an 128MB input file consisting entirely
of whitespace and /* */ comments in a random pattern, and running this
through cc1 -P -E.  According to kcachegrind:

                                before            after         change    %
_cpp_clean_line
  instructions           2 210 404 682    1 801 726 448    -399 678 234  18
  memory reads             516 097 743      240 090 189    -276 007 554  53
  memory writes            161 268 766       25 054 442    -136 214 334  84

_cpp_skip_block_comment
  instructions             758 363 288      589 357 957    -169 005 331  22
  memory reads             107 773 080      107 773 080               0   0
  memory writes            105 088 372       19 690 732    - 85 397 640  81

I don't think it's possible to get _cpp_skip_block_comment going any
faster, short of somehow managing to do word-sized memory reads.
There are further improvements possible to _cpp_clean_line but they're
going to make the code even messier, and I need to get back to machine
modes.

Bootstrapped i686-linux, no regressions.

zw

        * cpplex.c (_cpp_clean_line): In the common case of a line
        with no trigraphs and no \-newline, avoid writing to memory.
        (_cpp_skip_block_comment): Use a local 'cur' pointer instead
        of the buffer member.  Make c an uchar to avoid unnecessary
        sign extensions.

===================================================================
Index: cpplex.c
--- cpplex.c	2 Oct 2003 07:20:36 -0000	1.248
+++ cpplex.c	13 Oct 2003 18:30:59 -0000
@@ -114,7 +114,57 @@ _cpp_clean_line (cpp_reader *pfile)
 
   if (!buffer->from_stage3)
     {
-      d = (uchar *) s;
+      /* Short circuit for the common case of an un-escaped line with
+	 no trigraphs.  The primary win here is by not writing any
+	 data back to memory until we have to.  */
+      for (;;)
+	{
+	  c = *++s;
+	  if (c == '\n' || c == '\r')
+	    {
+	      d = (uchar *) s;
+
+	      if (s == buffer->rlimit)
+		goto done;
+
+	      /* DOS line ending? */
+	      if (c == '\r' && s[1] == '\n')
+		s++;
+
+	      if (s == buffer->rlimit)
+		goto done;
+
+	      /* check for escaped newline */
+	      p = d;
+	      while (p != buffer->next_line && is_nvspace (p[-1]))
+		p--;
+	      if (p == buffer->next_line || p[-1] != '\\')
+		goto done;
+
+	      /* Have an escaped newline; process it and proceed to
+		 the slow path.  */
+	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
+	      d = p - 2;
+	      buffer->next_line = p - 1;
+	      break;
+	    }
+	  if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
+	    {
+	      /* Have a trigraph.  We may or may not have to convert
+		 it.  Add a line note regardless, for -Wtrigraphs.  */
+	      add_line_note (buffer, s, s[2]);
+	      if (CPP_OPTION (pfile, trigraphs))
+		{
+		  /* We do, and that means we have to switch to the
+		     slow path.  */
+		  d = (uchar *) s;
+		  *d = _cpp_trigraph_map[s[2]];
+		  s += 2;
+		  break;
+		}
+	    }
+	}
+
 
       for (;;)
 	{
@@ -164,6 +214,7 @@ _cpp_clean_line (cpp_reader *pfile)
 	s++;
     }
 
+ done:
   *d = '\n';
   /* A sentinel note that should never be processed.  */
   add_line_note (buffer, d + 1, '\n');
@@ -266,43 +317,49 @@ bool
 _cpp_skip_block_comment (cpp_reader *pfile)
 {
   cpp_buffer *buffer = pfile->buffer;
-  cppchar_t c;
+  const uchar *cur = buffer->cur;
+  uchar c;
 
-  buffer->cur++;
-  if (*buffer->cur == '/')
-    buffer->cur++;
+  cur++;
+  if (*cur == '/')
+    cur++;
 
   for (;;)
     {
-      c = *buffer->cur++;
-
       /* People like decorating comments with '*', so check for '/'
 	 instead for efficiency.  */
+      c = *cur++;
+
       if (c == '/')
 	{
-	  if (buffer->cur[-2] == '*')
+	  if (cur[-2] == '*')
 	    break;
 
 	  /* Warn about potential nested comments, but not if the '/'
 	     comes immediately before the true comment delimiter.
 	     Don't bother to get it right across escaped newlines.  */
 	  if (CPP_OPTION (pfile, warn_comments)
-	      && buffer->cur[0] == '*' && buffer->cur[1] != '/')
-	    cpp_error_with_line (pfile, DL_WARNING,
-				 pfile->line, CPP_BUF_COL (buffer),
-				 "\"/*\" within comment");
+	      && cur[0] == '*' && cur[1] != '/')
+	    {
+	      buffer->cur = cur;
+	      cpp_error_with_line (pfile, DL_WARNING,
+				   pfile->line, CPP_BUF_COL (buffer),
+				   "\"/*\" within comment");
+	    }
 	}
       else if (c == '\n')
 	{
-	  buffer->cur--;
+	  buffer->cur = cur - 1;
 	  _cpp_process_line_notes (pfile, true);
 	  if (buffer->next_line >= buffer->rlimit)
 	    return true;
 	  _cpp_clean_line (pfile);
 	  pfile->line++;
+	  cur = buffer->cur;
 	}
     }
 
+  buffer->cur = cur;
   _cpp_process_line_notes (pfile, true);
   return false;
 }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]