This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: cpplib: Be safe in presence of symlinks


Zack Weinberg wrote:-

> What happens if I write -I "" ?  Right now that's equivalent to -I.

Fixed below, thanks.  Is this actually what we want to do, though?
append_include_chain and friends are a bit messy.  They leak memory,
too, if a path doesn't stat() or if it's not a directory.

> On the other hand,
> 
> $ echo blah | ./cpp0
> # 1 "."
> blah
> $

What should we do here?  Have something pretty like # 1 "<stdin>"?  The
only problem I can see is if we have a file called "<stdin>".  That's
probably not worth worrying about.

> It's not hard.  One thing I don't know is, do all systems that have
> lstat declare it in <sys/stat.h>?  If not, you may have to add a
> NEED_DECLARATION_LSTAT check as well as plain HAVE_LSTAT.

W.R. Stevens seems to think it's enough.  Let's see if anyone complains.

> Maybe you should use stat if there's no lstat, because of the example
> below.

Yup, I've done this.

> #include <stdio.h/../stdlib.h>
> 
> and it would be treated equivalent to #include <stdlib.h>.  I think
> you've now made this an error, which is fine by me, but does it get
> reported, and if so where, and is the error message useful?  One would
> hope for something with "stdio.h" and "Not a directory" in it.

We can't report an error in _cpp_simplify_pathname, because it's
legitimate there to use non-existing paths.  So we need to leave it
to the open () call.  This actually turns out quite well; it reports

/tmp/test.c:1:31: stdlib.h/../stdio.h: not a directory

To get anything better is not worth the effort IMO.

The code below also hooks up append_include_chain to notice if
_cpp_simplify_pathname had a stat error, and doesn't bother with its
own stat on the full path if so.

> Now I look at it, that example is a convincing reason to stop blindly
> simplifying paths, all by itself. :-)

Quite.  Here's a new patch, would you look at it?  If you're OK with it,
I'll commit it to mainline (if and when I can bootstrap).  I've not
included the files regenerated by autoconf and autoheader.

Thanks,

Neil.

	* configure.in: Check for lstat.
	* config.in: Regenerate.
	* configure: Regenerate.
	* cpplib.c (do_line): Don't simplify #line paths.
	* cppinit.c (append_include_chain): Convert empty paths into
	".".  Don't re-stat if _cpp_simplify_include had an error.
	* cppfiles.c (remove_component_p): New function.
	(handle_missing_header): Don't simplify the path.
	(_cpp_simplify_pathname): Don't simplify VMS paths.  Return
	the empty path untouched.  Don't leave a trailing '/'.
				
Index: configure.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/configure.in,v
retrieving revision 1.505
diff -u -p -c -r1.505 configure.in
*** configure.in	2001/03/28 05:24:17	1.505
--- configure.in	2001/04/01 13:59:00
*************** dnl gcc_AC_C_ENUM_BF_UNSIGNED
*** 547,553 ****
  AC_CHECK_FUNCS(strtoul bsearch putenv popen \
  	strchr strrchr kill getrlimit setrlimit atoll atoq \
  	sysconf isascii gettimeofday strsignal putc_unlocked fputc_unlocked \
! 	fputs_unlocked getrusage iconv nl_langinfo)
  
  AC_CHECK_TYPE(ssize_t, int)
  
--- 547,553 ----
  AC_CHECK_FUNCS(strtoul bsearch putenv popen \
  	strchr strrchr kill getrlimit setrlimit atoll atoq \
  	sysconf isascii gettimeofday strsignal putc_unlocked fputc_unlocked \
! 	fputs_unlocked getrusage iconv nl_langinfo lstat)
  
  AC_CHECK_TYPE(ssize_t, int)
  
Index: cppfiles.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppfiles.c,v
retrieving revision 1.114
diff -u -p -c -r1.114 cppfiles.c
*** cppfiles.c	2001/03/16 05:19:46	1.114
--- cppfiles.c	2001/04/01 13:59:03
*************** static int report_missing_guard		PARAMS 
*** 101,106 ****
--- 101,107 ----
  static splay_tree_node find_or_create_entry PARAMS ((cpp_reader *,
  						     const char *));
  static void handle_missing_header PARAMS ((cpp_reader *, const char *, int));
+ static int remove_component_p	PARAMS ((const char *));
  
  /* Set up the splay tree we use to store information about all the
     file names seen in this compilation.  We also have entries for each
*************** _cpp_never_reread (file)
*** 155,161 ****
  }
  
  /* Lookup a filename, which is simplified after making a copy, and
!    create an entry if none exists.  */
  static splay_tree_node
  find_or_create_entry (pfile, fname)
       cpp_reader *pfile;
--- 156,163 ----
  }
  
  /* Lookup a filename, which is simplified after making a copy, and
!    create an entry if none exists.  errno is nonzero iff a (reported)
!    stat() error occurred during simplification.  */
  static splay_tree_node
  find_or_create_entry (pfile, fname)
       cpp_reader *pfile;
*************** open_file (pfile, filename)
*** 208,213 ****
--- 210,218 ----
    splay_tree_node nd = find_or_create_entry (pfile, filename);
    struct include_file *file = (struct include_file *) nd->value;
  
+   if (errno)
+     file->fd = -2;
+ 
    /* Don't retry opening if we failed previously.  */
    if (file->fd == -2)
      return 0;
*************** handle_missing_header (pfile, fname, ang
*** 639,645 ****
  	      p[len++] = '/';
  	    }
  	  memcpy (p + len, fname, fname_len + 1);
- 	  _cpp_simplify_pathname (p);
  	  deps_add_dep (pfile->deps, p);
  	}
      }
--- 644,649 ----
*************** remap_filename (pfile, name, loc)
*** 1002,1007 ****
--- 1006,1031 ----
    return name;
  }
  
+ /* Returns true if it is safe to remove the final component of path,
+    when it is followed by a ".." component.  We use lstat to avoid
+    symlinks if we have it.  If not, we can still catch errors with
+    stat ().  */
+ static int
+ remove_component_p (path)
+      const char *path;
+ {
+   struct stat s;
+   int result;
+ 
+ #ifdef HAVE_LSTAT
+   result = lstat (path, &s);
+ #else
+   result = stat (path, &s);
+ #endif
+ 
+   return result == 0 && S_ISDIR (s.st_mode);
+ }
+ 
  /* Simplify a path name in place, deleting redundant components.  This
     reduces OS overhead and guarantees that equivalent paths compare
     the same (modulo symlinks).
*************** remap_filename (pfile, name, loc)
*** 1012,1136 ****
     foo//bar		foo/bar
     /../quux		/quux
     //quux		//quux  (POSIX allows leading // as a namespace escape)
  
-    Guarantees no trailing slashes. All transforms reduce the length
-    of the string.  Returns PATH;
-  */
  char *
  _cpp_simplify_pathname (path)
      char *path;
  {
!     char *from, *to;
!     char *base;
!     int absolute = 0;
  
  #if defined (HAVE_DOS_BASED_FILE_SYSTEM)
!     /* Convert all backslashes to slashes. */
!     for (from = path; *from; from++)
! 	if (*from == '\\') *from = '/';
      
!     /* Skip over leading drive letter if present. */
!     if (ISALPHA (path[0]) && path[1] == ':')
! 	from = to = &path[2];
!     else
! 	from = to = path;
! #else
      from = to = path;
  #endif
      
!     /* Remove redundant initial /s.  */
!     if (*from == '/')
      {
! 	absolute = 1;
! 	to++;
! 	from++;
! 	if (*from == '/')
  	{
! 	    if (*++from == '/')
! 		/* 3 or more initial /s are equivalent to 1 /.  */
! 		while (*++from == '/');
! 	    else
! 		/* On some hosts // differs from /; Posix allows this.  */
! 		to++;
  	}
      }
!     base = to;
!     
!     for (;;)
      {
! 	while (*from == '/')
! 	    from++;
  
! 	if (from[0] == '.' && from[1] == '/')
! 	    from += 2;
! 	else if (from[0] == '.' && from[1] == '\0')
! 	    goto done;
! 	else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
  	{
! 	    if (base == to)
! 	    {
! 		if (absolute)
! 		    from += 3;
! 		else
! 		{
! 		    *to++ = *from++;
! 		    *to++ = *from++;
! 		    *to++ = *from++;
! 		    base = to;
! 		}
! 	    }
! 	    else
  	    {
! 		to -= 2;
! 		while (to > base && *to != '/') to--;
! 		if (*to == '/')
! 		    to++;
! 		from += 3;
  	    }
! 	}
! 	else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
! 	{
! 	    if (base == to)
  	    {
! 		if (!absolute)
  		{
! 		    *to++ = *from++;
! 		    *to++ = *from++;
  		}
! 	    }
! 	    else
! 	    {
! 		to -= 2;
! 		while (to > base && *to != '/') to--;
! 		if (*to == '/')
! 		    to++;
! 	    }
! 	    goto done;
! 	}
! 	else
! 	    /* Copy this component and trailing /, if any.  */
! 	    while ((*to++ = *from++) != '/')
! 	    {
! 		if (!to[-1])
  		{
! 		    to--;
! 		    goto done;
  		}
  	    }
! 	
      }
-     
-  done:
-     /* Trim trailing slash */
-     if (to[0] == '/' && (!absolute || to > path+1))
- 	to--;
- 
-     /* Change the empty string to "." so that stat() on the result
-        will always work. */
-     if (to == path)
-       *to++ = '.';
      
!     *to = '\0';
! 
!     return path;
  }
--- 1036,1158 ----
     foo//bar		foo/bar
     /../quux		/quux
     //quux		//quux  (POSIX allows leading // as a namespace escape)
+ 
+    Guarantees no trailing slashes.  All transforms reduce the length
+    of the string.  Returns PATH.  errno is 0 if no error occurred;
+    nonzero if an error occurred when using stat () or lstat ().  */
  
  char *
  _cpp_simplify_pathname (path)
      char *path;
  {
! #ifndef VMS
!   char *from, *to;
!   char *base, *orig_base;
!   int absolute = 0;
! 
!   errno = 0;
!   /* Don't overflow the empty path by putting a '.' in it below.  */
!   if (*path == '\0')
!     return path;
  
  #if defined (HAVE_DOS_BASED_FILE_SYSTEM)
!   /* Convert all backslashes to slashes. */
!   for (from = path; *from; from++)
!     if (*from == '\\') *from = '/';
      
!   /* Skip over leading drive letter if present. */
!   if (ISALPHA (path[0]) && path[1] == ':')
!     from = to = &path[2];
!   else
      from = to = path;
+ #else
+   from = to = path;
  #endif
      
!   /* Remove redundant leading /s.  */
!   if (*from == '/')
      {
!       absolute = 1;
!       to++;
!       from++;
!       if (*from == '/')
  	{
! 	  if (*++from == '/')
! 	    /* 3 or more initial /s are equivalent to 1 /.  */
! 	    while (*++from == '/');
! 	  else
! 	    /* On some hosts // differs from /; Posix allows this.  */
! 	    to++;
  	}
      }
! 
!   base = orig_base = to;
!   for (;;)
      {
!       int move_base = 0;
! 
!       while (*from == '/')
! 	from++;
! 
!       if (*from == '\0')
! 	break;
  
!       if (*from == '.')
  	{
! 	  if (from[1] == '\0')
! 	    break;
! 	  if (from[1] == '/')
  	    {
! 	      from += 2;
! 	      continue;
  	    }
! 	  else if (from[1] == '.' && (from[2] == '/' || from[2] == '\0'))
  	    {
! 	      /* Don't simplify if there was no previous component.  */
! 	      if (absolute && orig_base == to)
  		{
! 		  from += 2;
! 		  continue;
  		}
! 	      /* Don't simplify if the previous component was "../",
! 		 or if an error has already occurred with (l)stat.  */
! 	      if (base != to && errno == 0)
  		{
! 		  /* We don't back up if it's a symlink.  */
! 		  *to = '\0';
! 		  if (remove_component_p (path))
! 		    {
! 		      while (to > base && *to != '/')
! 			to--;
! 		      from += 2;
! 		      continue;
! 		    }
  		}
+ 	      move_base = 1;
  	    }
! 	}
! 
!       /* Add the component separator.  */
!       if (to > orig_base)
! 	*to++ = '/';
! 
!       /* Copy this component until the trailing null or '/'.  */
!       while (*from != '\0' && *from != '/')
! 	*to++ = *from++;
! 
!       if (move_base)
! 	base = to;
      }
      
!   /* Change the empty string to "." so that it is not treated as stdin.
!      Null terminate.  */
!   if (to == path)
!     *to++ = '.';
!   *to = '\0';
! 
!   return path;
! #else /* VMS  */
!   errno = 0;
!   return path;
! #endif /* !VMS  */
  }
Index: cppinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppinit.c,v
retrieving revision 1.156
diff -u -p -c -r1.156 cppinit.c
*** cppinit.c	2001/03/15 07:54:54	1.156
--- cppinit.c	2001/04/01 13:59:10
*************** append_include_chain (pfile, dir, path, 
*** 211,220 ****
    struct stat st;
    unsigned int len;
  
    _cpp_simplify_pathname (dir);
!   if (stat (dir, &st))
      {
!       /* Dirs that don't exist are silently ignored. */
        if (errno != ENOENT)
  	cpp_notice_from_errno (pfile, dir);
        else if (CPP_OPTION (pfile, verbose))
--- 211,225 ----
    struct stat st;
    unsigned int len;
  
+   if (*dir == '\0')
+     dir = xstrdup (".");
    _cpp_simplify_pathname (dir);
!   if (errno == 0)
!     stat (dir, &st);
! 
!   if (errno)
      {
!       /* Dirs that don't exist are silently ignored.  */
        if (errno != ENOENT)
  	cpp_notice_from_errno (pfile, dir);
        else if (CPP_OPTION (pfile, verbose))
Index: cpplib.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cpplib.c,v
retrieving revision 1.245
diff -u -p -c -r1.245 cpplib.c
*** cpplib.c	2001/03/27 15:31:45	1.245
--- cpplib.c	2001/04/01 13:59:12
*************** do_line (pfile)
*** 729,740 ****
    cpp_get_token (pfile, &token);
    if (token.type == CPP_STRING)
      {
!       char *fname;
!       unsigned int len = token.val.str.len + 1;
! 
!       fname = (char *) _cpp_pool_alloc (&pfile->ident_pool, len);
!       memcpy (fname, token.val.str.text, len);
!       _cpp_simplify_pathname (fname);
  
        /* Only accept flags for the # 55 form.  */
        if (! pfile->state.line_extension)
--- 729,735 ----
    cpp_get_token (pfile, &token);
    if (token.type == CPP_STRING)
      {
!       const char *fname = (const char *) token.val.str.text;
  
        /* Only accept flags for the # 55 form.  */
        if (! pfile->state.line_extension)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]