This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: cpplib: Be safe in presence of symlinks
Zack Weinberg wrote:-
> What happens if I write -I "" ? Right now that's equivalent to -I.
Fixed below, thanks. Is this actually what we want to do, though?
append_include_chain and friends are a bit messy. They leak memory,
too, if a path doesn't stat() or if it's not a directory.
> On the other hand,
>
> $ echo blah | ./cpp0
> # 1 "."
> blah
> $
What should we do here? Have something pretty like # 1 "<stdin>"? The
only problem I can see is if we have a file called "<stdin>". That's
probably not worth worrying about.
> It's not hard. One thing I don't know is, do all systems that have
> lstat declare it in <sys/stat.h>? If not, you may have to add a
> NEED_DECLARATION_LSTAT check as well as plain HAVE_LSTAT.
W.R. Stevens seems to think it's enough. Let's see if anyone complains.
> Maybe you should use stat if there's no lstat, because of the example
> below.
Yup, I've done this.
> #include <stdio.h/../stdlib.h>
>
> and it would be treated equivalent to #include <stdlib.h>. I think
> you've now made this an error, which is fine by me, but does it get
> reported, and if so where, and is the error message useful? One would
> hope for something with "stdio.h" and "Not a directory" in it.
We can't report an error in _cpp_simplify_pathname, because it's
legitimate there to use non-existing paths. So we need to leave it
to the open () call. This actually turns out quite well; it reports
/tmp/test.c:1:31: stdlib.h/../stdio.h: not a directory
To get anything better is not worth the effort IMO.
The code below also hooks up append_include_chain to notice if
_cpp_simplify_pathname had a stat error, and doesn't bother with its
own stat on the full path if so.
> Now I look at it, that example is a convincing reason to stop blindly
> simplifying paths, all by itself. :-)
Quite. Here's a new patch, would you look at it? If you're OK with it,
I'll commit it to mainline (if and when I can bootstrap). I've not
included the files regenerated by autoconf and autoheader.
Thanks,
Neil.
* configure.in: Check for lstat.
* config.in: Regenerate.
* configure: Regenerate.
* cpplib.c (do_line): Don't simplify #line paths.
* cppinit.c (append_include_chain): Convert empty paths into
".". Don't re-stat if _cpp_simplify_include had an error.
* cppfiles.c (remove_component_p): New function.
(handle_missing_header): Don't simplify the path.
(_cpp_simplify_pathname): Don't simplify VMS paths. Return
the empty path untouched. Don't leave a trailing '/'.
Index: configure.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/configure.in,v
retrieving revision 1.505
diff -u -p -c -r1.505 configure.in
*** configure.in 2001/03/28 05:24:17 1.505
--- configure.in 2001/04/01 13:59:00
*************** dnl gcc_AC_C_ENUM_BF_UNSIGNED
*** 547,553 ****
AC_CHECK_FUNCS(strtoul bsearch putenv popen \
strchr strrchr kill getrlimit setrlimit atoll atoq \
sysconf isascii gettimeofday strsignal putc_unlocked fputc_unlocked \
! fputs_unlocked getrusage iconv nl_langinfo)
AC_CHECK_TYPE(ssize_t, int)
--- 547,553 ----
AC_CHECK_FUNCS(strtoul bsearch putenv popen \
strchr strrchr kill getrlimit setrlimit atoll atoq \
sysconf isascii gettimeofday strsignal putc_unlocked fputc_unlocked \
! fputs_unlocked getrusage iconv nl_langinfo lstat)
AC_CHECK_TYPE(ssize_t, int)
Index: cppfiles.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppfiles.c,v
retrieving revision 1.114
diff -u -p -c -r1.114 cppfiles.c
*** cppfiles.c 2001/03/16 05:19:46 1.114
--- cppfiles.c 2001/04/01 13:59:03
*************** static int report_missing_guard PARAMS
*** 101,106 ****
--- 101,107 ----
static splay_tree_node find_or_create_entry PARAMS ((cpp_reader *,
const char *));
static void handle_missing_header PARAMS ((cpp_reader *, const char *, int));
+ static int remove_component_p PARAMS ((const char *));
/* Set up the splay tree we use to store information about all the
file names seen in this compilation. We also have entries for each
*************** _cpp_never_reread (file)
*** 155,161 ****
}
/* Lookup a filename, which is simplified after making a copy, and
! create an entry if none exists. */
static splay_tree_node
find_or_create_entry (pfile, fname)
cpp_reader *pfile;
--- 156,163 ----
}
/* Lookup a filename, which is simplified after making a copy, and
! create an entry if none exists. errno is nonzero iff a (reported)
! stat() error occurred during simplification. */
static splay_tree_node
find_or_create_entry (pfile, fname)
cpp_reader *pfile;
*************** open_file (pfile, filename)
*** 208,213 ****
--- 210,218 ----
splay_tree_node nd = find_or_create_entry (pfile, filename);
struct include_file *file = (struct include_file *) nd->value;
+ if (errno)
+ file->fd = -2;
+
/* Don't retry opening if we failed previously. */
if (file->fd == -2)
return 0;
*************** handle_missing_header (pfile, fname, ang
*** 639,645 ****
p[len++] = '/';
}
memcpy (p + len, fname, fname_len + 1);
- _cpp_simplify_pathname (p);
deps_add_dep (pfile->deps, p);
}
}
--- 644,649 ----
*************** remap_filename (pfile, name, loc)
*** 1002,1007 ****
--- 1006,1031 ----
return name;
}
+ /* Returns true if it is safe to remove the final component of path,
+ when it is followed by a ".." component. We use lstat to avoid
+ symlinks if we have it. If not, we can still catch errors with
+ stat (). */
+ static int
+ remove_component_p (path)
+ const char *path;
+ {
+ struct stat s;
+ int result;
+
+ #ifdef HAVE_LSTAT
+ result = lstat (path, &s);
+ #else
+ result = stat (path, &s);
+ #endif
+
+ return result == 0 && S_ISDIR (s.st_mode);
+ }
+
/* Simplify a path name in place, deleting redundant components. This
reduces OS overhead and guarantees that equivalent paths compare
the same (modulo symlinks).
*************** remap_filename (pfile, name, loc)
*** 1012,1136 ****
foo//bar foo/bar
/../quux /quux
//quux //quux (POSIX allows leading // as a namespace escape)
- Guarantees no trailing slashes. All transforms reduce the length
- of the string. Returns PATH;
- */
char *
_cpp_simplify_pathname (path)
char *path;
{
! char *from, *to;
! char *base;
! int absolute = 0;
#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
! /* Convert all backslashes to slashes. */
! for (from = path; *from; from++)
! if (*from == '\\') *from = '/';
! /* Skip over leading drive letter if present. */
! if (ISALPHA (path[0]) && path[1] == ':')
! from = to = &path[2];
! else
! from = to = path;
! #else
from = to = path;
#endif
! /* Remove redundant initial /s. */
! if (*from == '/')
{
! absolute = 1;
! to++;
! from++;
! if (*from == '/')
{
! if (*++from == '/')
! /* 3 or more initial /s are equivalent to 1 /. */
! while (*++from == '/');
! else
! /* On some hosts // differs from /; Posix allows this. */
! to++;
}
}
! base = to;
!
! for (;;)
{
! while (*from == '/')
! from++;
! if (from[0] == '.' && from[1] == '/')
! from += 2;
! else if (from[0] == '.' && from[1] == '\0')
! goto done;
! else if (from[0] == '.' && from[1] == '.' && from[2] == '/')
{
! if (base == to)
! {
! if (absolute)
! from += 3;
! else
! {
! *to++ = *from++;
! *to++ = *from++;
! *to++ = *from++;
! base = to;
! }
! }
! else
{
! to -= 2;
! while (to > base && *to != '/') to--;
! if (*to == '/')
! to++;
! from += 3;
}
! }
! else if (from[0] == '.' && from[1] == '.' && from[2] == '\0')
! {
! if (base == to)
{
! if (!absolute)
{
! *to++ = *from++;
! *to++ = *from++;
}
! }
! else
! {
! to -= 2;
! while (to > base && *to != '/') to--;
! if (*to == '/')
! to++;
! }
! goto done;
! }
! else
! /* Copy this component and trailing /, if any. */
! while ((*to++ = *from++) != '/')
! {
! if (!to[-1])
{
! to--;
! goto done;
}
}
!
}
-
- done:
- /* Trim trailing slash */
- if (to[0] == '/' && (!absolute || to > path+1))
- to--;
-
- /* Change the empty string to "." so that stat() on the result
- will always work. */
- if (to == path)
- *to++ = '.';
! *to = '\0';
!
! return path;
}
--- 1036,1158 ----
foo//bar foo/bar
/../quux /quux
//quux //quux (POSIX allows leading // as a namespace escape)
+
+ Guarantees no trailing slashes. All transforms reduce the length
+ of the string. Returns PATH. errno is 0 if no error occurred;
+ nonzero if an error occurred when using stat () or lstat (). */
char *
_cpp_simplify_pathname (path)
char *path;
{
! #ifndef VMS
! char *from, *to;
! char *base, *orig_base;
! int absolute = 0;
!
! errno = 0;
! /* Don't overflow the empty path by putting a '.' in it below. */
! if (*path == '\0')
! return path;
#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
! /* Convert all backslashes to slashes. */
! for (from = path; *from; from++)
! if (*from == '\\') *from = '/';
! /* Skip over leading drive letter if present. */
! if (ISALPHA (path[0]) && path[1] == ':')
! from = to = &path[2];
! else
from = to = path;
+ #else
+ from = to = path;
#endif
! /* Remove redundant leading /s. */
! if (*from == '/')
{
! absolute = 1;
! to++;
! from++;
! if (*from == '/')
{
! if (*++from == '/')
! /* 3 or more initial /s are equivalent to 1 /. */
! while (*++from == '/');
! else
! /* On some hosts // differs from /; Posix allows this. */
! to++;
}
}
!
! base = orig_base = to;
! for (;;)
{
! int move_base = 0;
!
! while (*from == '/')
! from++;
!
! if (*from == '\0')
! break;
! if (*from == '.')
{
! if (from[1] == '\0')
! break;
! if (from[1] == '/')
{
! from += 2;
! continue;
}
! else if (from[1] == '.' && (from[2] == '/' || from[2] == '\0'))
{
! /* Don't simplify if there was no previous component. */
! if (absolute && orig_base == to)
{
! from += 2;
! continue;
}
! /* Don't simplify if the previous component was "../",
! or if an error has already occurred with (l)stat. */
! if (base != to && errno == 0)
{
! /* We don't back up if it's a symlink. */
! *to = '\0';
! if (remove_component_p (path))
! {
! while (to > base && *to != '/')
! to--;
! from += 2;
! continue;
! }
}
+ move_base = 1;
}
! }
!
! /* Add the component separator. */
! if (to > orig_base)
! *to++ = '/';
!
! /* Copy this component until the trailing null or '/'. */
! while (*from != '\0' && *from != '/')
! *to++ = *from++;
!
! if (move_base)
! base = to;
}
! /* Change the empty string to "." so that it is not treated as stdin.
! Null terminate. */
! if (to == path)
! *to++ = '.';
! *to = '\0';
!
! return path;
! #else /* VMS */
! errno = 0;
! return path;
! #endif /* !VMS */
}
Index: cppinit.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cppinit.c,v
retrieving revision 1.156
diff -u -p -c -r1.156 cppinit.c
*** cppinit.c 2001/03/15 07:54:54 1.156
--- cppinit.c 2001/04/01 13:59:10
*************** append_include_chain (pfile, dir, path,
*** 211,220 ****
struct stat st;
unsigned int len;
_cpp_simplify_pathname (dir);
! if (stat (dir, &st))
{
! /* Dirs that don't exist are silently ignored. */
if (errno != ENOENT)
cpp_notice_from_errno (pfile, dir);
else if (CPP_OPTION (pfile, verbose))
--- 211,225 ----
struct stat st;
unsigned int len;
+ if (*dir == '\0')
+ dir = xstrdup (".");
_cpp_simplify_pathname (dir);
! if (errno == 0)
! stat (dir, &st);
!
! if (errno)
{
! /* Dirs that don't exist are silently ignored. */
if (errno != ENOENT)
cpp_notice_from_errno (pfile, dir);
else if (CPP_OPTION (pfile, verbose))
Index: cpplib.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/cpplib.c,v
retrieving revision 1.245
diff -u -p -c -r1.245 cpplib.c
*** cpplib.c 2001/03/27 15:31:45 1.245
--- cpplib.c 2001/04/01 13:59:12
*************** do_line (pfile)
*** 729,740 ****
cpp_get_token (pfile, &token);
if (token.type == CPP_STRING)
{
! char *fname;
! unsigned int len = token.val.str.len + 1;
!
! fname = (char *) _cpp_pool_alloc (&pfile->ident_pool, len);
! memcpy (fname, token.val.str.text, len);
! _cpp_simplify_pathname (fname);
/* Only accept flags for the # 55 form. */
if (! pfile->state.line_extension)
--- 729,735 ----
cpp_get_token (pfile, &token);
if (token.type == CPP_STRING)
{
! const char *fname = (const char *) token.val.str.text;
/* Only accept flags for the # 55 form. */
if (! pfile->state.line_extension)