canonicalizing paths

DJ Delorie dj@redhat.com
Wed Jul 11 23:31:00 GMT 2001


I got tired of seeing gcc using paths like
"/foo/lib/gcc-lib/i686-pc-linux-gnu/3.1/../../../../i686-pc-linux-gnu/include"
(especially when /foo/../ happens and "foo" doesn't exist, when the
canonical path does exist) so I wrote a routine to canonicalize the
paths by simplifying "." and ".." segments.  When I went to add it to
gcc I found _cpp_simplify_path, which is supposed to do the same thing
but apparently doesn't.  When I replaced _cpp_simplify_path with my
canonicalize_path, gcc thought all my .c files didn't exist -
apparently, _cpp_simplify_path also checks for the existence of the
file.  Also, xgcc could use a canonicalization when it merges argv[0]
with various paths to find its subprograms and such.

So, I propose adding a generic canonicalize_path to libiberty, and
calling that both in gcc's make_relative_prefix() and in cppfile's
_cpp_simplify_path (leaving the rest of the logic there for now -
minimal change).  The only other mention of canonicalize_path is a
static function in libjava/ltdl/ltdl.c (which should probably be named
"canonicalize_slashes" since that's all it does).  Comments?

Here's the canonicalize_path function (libiberty/canon_path.c):

/* Canonicalize a path.
   Copyright (C) 2001 Free Software Foundation, Inc.

This file is part of the libiberty library.
Libiberty is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.

Libiberty is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Library General Public License for more details.

You should have received a copy of the GNU Library General Public
License along with libiberty; see the file COPYING.LIB.  If
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

/*
NAME
	canonicalize_path -- canonicalize a generated path

SYNOPSIS
	char *canonicalize_path (char *src, char *dest)

DESCRIPTION

	Given a pointer to a path, canonicalize it by removing
	redundant "." and ".." entries.  The result is stored in dest
	(a pointer to the result is returned for convenience).  Dest
	may be NULL or the same as src, in which case the path is
	converted in place.

*/

#include "ansidecl.h"
#include "libiberty.h"
#include "safe-ctype.h"

#if defined(TEST) && !defined (__DJGPP__)
/* This so we can test the drive letter stuff.  */
#define __DJGPP__
#endif

#ifndef DIR_SEPARATOR
#  define DIR_SEPARATOR '/'
#endif

#if defined (_WIN32) || defined (__MSDOS__) \
    || defined (__DJGPP__) || defined (__OS2__)
#  define HAVE_DOS_BASED_FILE_SYSTEM
#  ifndef DIR_SEPARATOR_2 
#    define DIR_SEPARATOR_2 '\\'
#  endif
#endif

#ifndef DIR_SEPARATOR_2
#define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
#else
#define IS_DIR_SEPARATOR(ch) \
	(((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
#endif

/* Here are the rules:

   Leading [drive]: and/or '/' are preserved.
   /./ anywhere becomes '/' (or is removed).
   /foo/../ anywhere becomes '/' (or is removed).
   Trailing slashes are removed.
*/

char *
canonicalize_path (char *s, char *d)
{
#ifdef VMS
  /* Not implemented.  */
  if (d == 0)
    return s;
  if (s != d)
    strcpy (d, s);
  return d;
#else

  char *rv = d;
  char *sroot, *droot;

  if (d == 0)
    rv = d = s;

#ifdef HAVE_DOS_BASED_FILE_SYSTEM
  if (ISALPHA(s[0]) && s[1] == ':')
    {
      *d++ = *s++;
      *d++ = *s++;
    }
#endif

  if (IS_DIR_SEPARATOR (*s))
    {
      *d++ = *s++;
      if (IS_DIR_SEPARATOR (*s) && !IS_DIR_SEPARATOR (s[1]))
	{
	  /* Special case for "//foo" meaning a Posix namespace
	     escape.  */
	  *d++ = *s++;
	}
      while (IS_DIR_SEPARATOR (*s))
	s++;
    }
  droot = d;
  sroot = s;

  while (*s)
    {
      /* At this point, we're always at the beginning of a path
	 segment.  */

      if (s[0] == '.' && (s[1] == 0 || IS_DIR_SEPARATOR (s[1])))
	{
	  s ++;
	  if (*s)
	    s++;
	  else if (d > droot)
	    d--;
	}

      else if (s[0] == '.' && s[1] == '.'
	       && (s[2] == 0 || IS_DIR_SEPARATOR (s[2])))
	{
	  char *pre = d-1; /* includes slash */
	  while (droot < pre && IS_DIR_SEPARATOR (*pre))
	    pre--;
	  if (droot <= pre && ! IS_DIR_SEPARATOR (*pre))
	    {
	      d = pre;
	      while (droot < d && ! IS_DIR_SEPARATOR (*d))
		d--;
	      /* d now points to the slash */
	      if (droot < d)
		d++;
	      s += 2;
	      if (*s)
		s++;
	      else if (d > droot)
		d--;
	    }
	  else
	    {
	      *d++ = *s++;
	      *d++ = *s++;
	      if (*s)
		*d++ = *s++;
	    }
	}

      else
	{
	  while (*s && ! IS_DIR_SEPARATOR (*s))
	    *d++ = *s++;
	}

      if (IS_DIR_SEPARATOR (*s))
	{
	  *d++ = *s++;
	  while (IS_DIR_SEPARATOR (*s))
	    s++;
	}
    }
  while (droot < d && IS_DIR_SEPARATOR (d[-1]))
    --d;
  if (d == rv)
    *d++ = '.';
  *d = 0;

  return rv;
#endif /* VMS */
}

#ifdef TEST
#include <stdio.h>

struct {
  const char *before;
  const char *after;
} test_data[] = {
  { "a", "a" },
#ifdef HAVE_DOS_BASED_FILESYSTEM
  { "a:", "a:" },
  { "a:/", "a:/" },
  { "a:/foo", "a:/foo" },
  { "a:/foo/..", "a:/" },
  { "a:./foo/..", "a:" },
#endif
  { "/", "/" },
  { "//", "//" },
  { "///", "/" },
  { "", "." },
  { ".", "." },
  { "..", ".." },
  { "/a", "/a" },
  { "//a", "//a" },
  { "///a", "/a" },
  { "a/", "a" },
  { "/a/", "/a" },
  { "./a", "a" },
  { "./a/..", "." },
  { "a/.", "a" },
  { "./a/.", "a" },
  { "../a", "../a" },
  { "a/..", "." },
  { "../a/..", ".." },
  { "abc/def", "abc/def" },
  { "abc//def", "abc/def" },
  { "abc///def", "abc/def" },
  { "./abc/def", "abc/def" },
  { "abc/./def", "abc/def" },
  { "abc/def/.", "abc/def" },
  { "././abc/././def/./.", "abc/def" },
  { "abc/../def", "def" },
  { "123/abc/../../def", "def" },
  { "123/abc/../..", "." },
  { "123/456/abc/../..", "123" },
  { "abc/../..", ".." },
  { "/abc/../..", "/.." },
  { "abc/../../", ".." },
  { 0, 0 },
};

main()
{
  int i;
  int error_count = 0;

  for (i=0; test_data[i].before; i++)
    {
      char tmp[1000];

      memset (tmp, 0, sizeof(tmp));
      canonicalize_path ((char *)test_data[i].before, tmp);
      if (strcmp (tmp, test_data[i].after))
	{
	  printf("Before:   %s (buffer)\n", test_data[i].before);
	  printf("After:    %s\n", tmp);
	  printf("Expected: %s\n\n", test_data[i].after);
	  error_count ++;
	}

      strcpy (tmp, test_data[i].before);
      canonicalize_path (tmp, tmp);
      if (strcmp (tmp, test_data[i].after))
	{
	  printf("Before:   %s (in place)\n", test_data[i].before);
	  printf("After:    %s\n", tmp);
	  printf("Expected: %s\n\n", test_data[i].after);
	  error_count ++;
	}

      strcpy (tmp, test_data[i].before);
      if (strcmp (canonicalize_path (tmp, 0), test_data[i].after))
	{
	  printf("Before:   %s (as function)\n", test_data[i].before);
	  printf("After:    %s\n", tmp);
	  printf("Expected: %s\n\n", test_data[i].after);
	  error_count ++;
	}
    }
  return error_count;
}
#endif



More information about the Gcc-patches mailing list