cpplib: final symlink patch

Neil Booth neil@daikokuya.demon.co.uk
Thu Apr 5 22:43:00 GMT 2001


This is the final patch to fix incorrectly simplifying symlinks in
the path.  I'll apply this to mainline and let it stew there for a
while - I'm not confident enough to apply it to branch yet.

It has bootstrapped the branch for me.  I can't get mainline to
bootstrap, but since CPP is the same for both, this shouldn't
cause a problem.

Neil.

	* configure.in: Add check for lstat.
	* configure, config.in: Regenerate.
	* cppinit.c (append_include_chain): Make empty path ".".
        * cpplib.c (do_line): Don't simplify #line paths.
        * cppfiles.c (remove_component_p): New function.
	(find_or_create_entry): Acknowledge stat () errors during
	path simplification.
	(handle_missing_header): Don't simplify paths.
        (_cpp_simplify_pathname): Don't simplify VMS paths.  Return
        the empty path untouched.  Don't leave a trailing '/'.

Index: configure.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/configure.in,v
retrieving revision 1.483.2.9
diff -u -p -r1.483.2.9 configure.in
--- configure.in	2001/04/03 09:02:25	1.483.2.9
+++ configure.in	2001/04/06 05:36:10
@@ -543,7 +543,7 @@ fi
 AC_CHECK_FUNCS(strtoul bsearch putenv popen bcopy \
 	strchr strrchr kill getrlimit setrlimit atoll atoq \
 	sysconf isascii gettimeofday strsignal putc_unlocked fputc_unlocked \
-	fputs_unlocked getrusage iconv nl_langinfo)
+	fputs_unlocked getrusage iconv nl_langinfo lstat)
 
 AC_CHECK_TYPE(ssize_t, int)
 
Index: cppfiles.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppfiles.c,v
retrieving revision 1.101.4.6
diff -u -p -r1.101.4.6 cppfiles.c
--- cppfiles.c	2001/04/05 06:35:38	1.101.4.6
+++ cppfiles.c	2001/04/06 05:36:13
@@ -101,6 +101,7 @@ static int report_missing_guard		PARAMS 
 static splay_tree_node find_or_create_entry PARAMS ((cpp_reader *,
 						     const char *));
 static void handle_missing_header PARAMS ((cpp_reader *, const char *, int));
+static int remove_component_p	PARAMS ((const char *));
 
 /* Set up the splay tree we use to store information about all the
    file names seen in this compilation.  We also have entries for each
@@ -155,7 +156,8 @@ _cpp_never_reread (file)
 }
 
 /* Lookup a filename, which is simplified after making a copy, and
-   create an entry if none exists.  */
+   create an entry if none exists.  errno is nonzero iff a (reported)
+   stat() error occurred during simplification.  */
 static splay_tree_node
 find_or_create_entry (pfile, fname)
      cpp_reader *pfile;
@@ -208,6 +210,9 @@ open_file (pfile, filename)
   splay_tree_node nd = find_or_create_entry (pfile, filename);
   struct include_file *file = (struct include_file *) nd->value;
 
+  if (errno)
+    file->fd = -2;
+
   /* Don't retry opening if we failed previously.  */
   if (file->fd == -2)
     return 0;
@@ -643,7 +648,6 @@ handle_missing_header (pfile, fname, ang
 	      p[len++] = '/';
 	    }
 	  memcpy (p + len, fname, fname_len + 1);
-	  _cpp_simplify_pathname (p);
 	  deps_add_dep (pfile->deps, p);
 	}
     }
@@ -1006,6 +1010,26 @@ remap_filename (pfile, name, loc)
   return name;
 }
 
+/* Returns true if it is safe to remove the final component of path,
+   when it is followed by a ".." component.  We use lstat to avoid
+   symlinks if we have it.  If not, we can still catch errors with
+   stat ().  */
+static int
+remove_component_p (path)
+     const char *path;
+{
+  struct stat s;
+  int result;
+
+#ifdef HAVE_LSTAT
+  result = lstat (path, &s);
+#else
+  result = stat (path, &s);
+#endif
+
+  return result == 0 && S_ISDIR (s.st_mode);
+}
+
 /* Simplify a path name in place, deleting redundant components.  This
    reduces OS overhead and guarantees that equivalent paths compare
    the same (modulo symlinks).
@@ -1016,125 +1040,123 @@ remap_filename (pfile, name, loc)
    foo//bar		foo/bar
    /../quux		/quux
    //quux		//quux  (POSIX allows leading // as a namespace escape)
+
+   Guarantees no trailing slashes.  All transforms reduce the length
+   of the string.  Returns PATH.  errno is 0 if no error occurred;
+   nonzero if an error occurred when using stat () or lstat ().  */
 
-   Guarantees no trailing slashes. All transforms reduce the length
-   of the string.  Returns PATH;
- */
 char *
 _cpp_simplify_pathname (path)
     char *path;
 {
-    char *from, *to;
-    char *base;
-    int absolute = 0;
+#ifndef VMS
+  char *from, *to;
+  char *base, *orig_base;
+  int absolute = 0;
+
+  errno = 0;
+  /* Don't overflow the empty path by putting a '.' in it below.  */
+  if (*path == '\0')
+    return path;
 
 #if defined (HAVE_DOS_BASED_FILE_SYSTEM)
-    /* Convert all backslashes to slashes. */
-    for (from = path; *from; from++)
-	if (*from == '\\') *from = '/';
+  /* Convert all backslashes to slashes. */
+  for (from = path; *from; from++)
+    if (*from == '\\') *from = '/';
     
-    /* Skip over leading drive letter if present. */
-    if (ISALPHA (path[0]) && path[1] == ':')
-	from = to = &path[2];
-    else
-	from = to = path;
-#else
+  /* Skip over leading drive letter if present. */
+  if (ISALPHA (path[0]) && path[1] == ':')
+    from = to = &path[2];
+  else
     from = to = path;
+#else
+  from = to = path;
 #endif
     
-    /* Remove redundant initial /s.  */
-    if (*from == '/')
+  /* Remove redundant leading /s.  */
+  if (*from == '/')
     {
-	absolute = 1;
-	to++;
-	from++;
-	if (*from == '/')
+      absolute = 1;
+      to++;
+      from++;
+      if (*from == '/')
 	{
-	    if (*++from == '/')
-		/* 3 or more initial /s are equivalent to 1 /.  */
-		while (*++from == '/');
-	    else
-		/* On some hosts // differs from /; Posix allows this.  */
-		to++;
+	  if (*++from == '/')
+	    /* 3 or more initial /s are equivalent to 1 /.  */
+	    while (*++from == '/');
+	  else
+	    /* On some hosts // differs from /; Posix allows this.  */
+	    to++;
 	}
     }
-    base = to;
-    
-    for (;;)
+
+  base = orig_base = to;
+  for (;;)
     {
-	while (*from == '/')
-	    from++;
+      int move_base = 0;
+
+      while (*from == '/')
+	from++;
+
+      if (*from == '\0')
+	break;
 
-	if (from[0] == '.' && from[1] == '/')
-	    from += 2;
-	else if (from[0] == '.' && from[1] == '\0')
-	    goto done;
-	else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
+      if (*from == '.')
 	{
-	    if (base == to)
-	    {
-		if (absolute)
-		    from += 3;
-		else
-		{
-		    *to++ = *from++;
-		    *to++ = *from++;
-		    *to++ = *from++;
-		    base = to;
-		}
-	    }
-	    else
+	  if (from[1] == '\0')
+	    break;
+	  if (from[1] == '/')
 	    {
-		to -= 2;
-		while (to > base && *to != '/') to--;
-		if (*to == '/')
-		    to++;
-		from += 3;
+	      from += 2;
+	      continue;
 	    }
-	}
-	else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
-	{
-	    if (base == to)
+	  else if (from[1] == '.' && (from[2] == '/' || from[2] == '\0'))
 	    {
-		if (!absolute)
+	      /* Don't simplify if there was no previous component.  */
+	      if (absolute && orig_base == to)
 		{
-		    *to++ = *from++;
-		    *to++ = *from++;
+		  from += 2;
+		  continue;
 		}
-	    }
-	    else
-	    {
-		to -= 2;
-		while (to > base && *to != '/') to--;
-		if (*to == '/')
-		    to++;
-	    }
-	    goto done;
-	}
-	else
-	    /* Copy this component and trailing /, if any.  */
-	    while ((*to++ = *from++) != '/')
-	    {
-		if (!to[-1])
+	      /* Don't simplify if the previous component was "../",
+		 or if an error has already occurred with (l)stat.  */
+	      if (base != to && errno == 0)
 		{
-		    to--;
-		    goto done;
+		  /* We don't back up if it's a symlink.  */
+		  *to = '\0';
+		  if (remove_component_p (path))
+		    {
+		      while (to > base && *to != '/')
+			to--;
+		      from += 2;
+		      continue;
+		    }
 		}
+	      move_base = 1;
 	    }
-	
+	}
+
+      /* Add the component separator.  */
+      if (to > orig_base)
+	*to++ = '/';
+
+      /* Copy this component until the trailing null or '/'.  */
+      while (*from != '\0' && *from != '/')
+	*to++ = *from++;
+
+      if (move_base)
+	base = to;
     }
-    
- done:
-    /* Trim trailing slash */
-    if (to[0] == '/' && (!absolute || to > path+1))
-	to--;
-
-    /* Change the empty string to "." so that stat() on the result
-       will always work. */
-    if (to == path)
-      *to++ = '.';
     
-    *to = '\0';
-
-    return path;
+  /* Change the empty string to "." so that it is not treated as stdin.
+     Null terminate.  */
+  if (to == path)
+    *to++ = '.';
+  *to = '\0';
+
+  return path;
+#else /* VMS  */
+  errno = 0;
+  return path;
+#endif /* !VMS  */
 }
Index: cppinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppinit.c,v
retrieving revision 1.147.2.6
diff -u -p -r1.147.2.6 cppinit.c
--- cppinit.c	2001/03/27 06:39:39	1.147.2.6
+++ cppinit.c	2001/04/06 05:36:21
@@ -211,10 +211,12 @@ append_include_chain (pfile, dir, path, 
   struct stat st;
   unsigned int len;
 
+  if (*dir == '\0')
+    dir = xstrdup (".");
   _cpp_simplify_pathname (dir);
   if (stat (dir, &st))
     {
-      /* Dirs that don't exist are silently ignored. */
+      /* Dirs that don't exist are silently ignored.  */
       if (errno != ENOENT)
 	cpp_notice_from_errno (pfile, dir);
       else if (CPP_OPTION (pfile, verbose))
Index: cpplib.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cpplib.c,v
retrieving revision 1.239.2.3
diff -u -p -r1.239.2.3 cpplib.c
--- cpplib.c	2001/03/27 06:39:40	1.239.2.3
+++ cpplib.c	2001/04/06 05:36:24
@@ -730,12 +730,7 @@ do_line (pfile)
   cpp_get_token (pfile, &token);
   if (token.type == CPP_STRING)
     {
-      char *fname;
-      unsigned int len = token.val.str.len + 1;
-
-      fname = (char *) _cpp_pool_alloc (&pfile->ident_pool, len);
-      memcpy (fname, token.val.str.text, len);
-      _cpp_simplify_pathname (fname);
+      const char *fname = (const char *) token.val.str.text;
 
       /* Only accept flags for the # 55 form.  */
       if (! pfile->state.line_extension)



More information about the Gcc-patches mailing list