This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

grab-bag of cpplib updates


Three patches all rolled into one:

- Separate reading the file from prescanning it, and use mmap() to do
  the read if possible.  This is currently a small performance lose
  but should become a win when the prescan pass goes away.

- Move struct hashnode to cpplib.h, rename it struct cpp_hashnode,
  give it a typedef, and add a 'union tree_node' slot for future use
  by front ends.  Make cpp_lookup() part of the public API.

- Define symbols in handle_option() based on the -lang option.  This
  goes with the specs patch I sent at the beginning of the week, which
  it would be nice if someone would review it...

zw

	* cppfiles.c: Read files in, using mmap if possible, then
	prescan them separately.
	(read_file, read_with_read): New functions.
	* cpplex.c: Don't define UCHAR_MAX.
	(_cpp_read_and_prescan): Rename to _cpp_prescan.  Don't read
	the file here.

	* cppinit.c (handle_option): Automatically define __cplusplus,
	__OBJC__, __ASEEMBLER__, _LANGUAGE_FORTRAN here when we see
	the respective -lang switch.

	* cpphash.h (enum node_type, struct hashnode, _cpp_lookup
	prototype): Move to...
	* cpplib.h: ... here.  Rename struct hashnode to struct
	cpp_hashnode and give it a typedef.  Rename _cpp_lookup to
	cpp_lookup.  Add 'fe_value' slot, a union tree_node *.

===================================================================
Index: cppexp.c
--- cppexp.c	2000/05/27 23:19:55	1.58
+++ cppexp.c	2000/05/28 05:44:45
@@ -394,7 +394,7 @@ parse_assertion (pfile)
      cpp_reader *pfile;
 {
   struct operation op;
-  HASHNODE *hp;
+  cpp_hashnode *hp;
   struct predicate *pred;
   cpp_toklist query;
   enum cpp_ttype type;
@@ -414,7 +414,7 @@ parse_assertion (pfile)
 
   tok = pfile->token_buffer + old_written;
   len = CPP_WRITTEN (pfile) - old_written;
-  hp = _cpp_lookup (pfile, tok, len);
+  hp = cpp_lookup (pfile, tok, len);
 
   /* Look ahead for an open paren.  */
   _cpp_skip_hspace (pfile);
===================================================================
Index: cppfiles.c
--- cppfiles.c	2000/05/04 04:38:00	1.61
+++ cppfiles.c	2000/05/28 05:44:45
@@ -28,6 +28,17 @@ Foundation, 59 Temple Place - Suite 330,
 #include "intl.h"
 #include "mkdeps.h"
 
+#ifdef HAVE_MMAP_FILE
+# include <sys/mman.h>
+# ifndef MMAP_THRESHOLD
+#  define MMAP_THRESHOLD 3 /* Minimum page count to mmap the file.  */
+# endif
+
+#else  /* No MMAP_FILE */
+#  undef MMAP_THRESHOLD
+#  define MMAP_THRESHOLD 0
+#endif
+
 static IHASH *redundant_include_p PARAMS ((cpp_reader *, IHASH *,
 					   struct file_name_list *));
 static IHASH *make_IHASH	PARAMS ((const char *, const char *,
@@ -45,8 +56,10 @@ static int eq_IHASH		PARAMS ((const void
 static int find_include_file	PARAMS ((cpp_reader *, const char *,
 					struct file_name_list *,
 					IHASH **, int *));
-static int read_include_file	PARAMS ((cpp_reader *, int, IHASH *));
 static inline int open_include_file PARAMS ((cpp_reader *, const char *));
+static int read_include_file	PARAMS ((cpp_reader *, int, IHASH *));
+static ssize_t read_with_read	PARAMS ((cpp_buffer *, int, ssize_t));
+static ssize_t read_file	PARAMS ((cpp_buffer *, int, ssize_t));
 
 #if 0
 static void hack_vms_include_specification PARAMS ((char *));
@@ -678,8 +691,7 @@ read_include_file (pfile, fd, ihash)
      IHASH *ihash;
 {
   struct stat st;
-  size_t st_size;
-  long length;
+  ssize_t length;
   cpp_buffer *fp;
 
   fp = cpp_push_buffer (pfile, NULL, 0);
@@ -690,35 +702,37 @@ read_include_file (pfile, fd, ihash)
   if (fstat (fd, &st) < 0)
     goto perror_fail;
 
-  /* If fd points to a plain file, we know how big it is, so we can
-     allocate the buffer all at once.  If fd is a pipe or terminal, we
-     can't.  Most C source files are 4k or less, so we guess that.  If
-     fd is something weird, like a directory, we don't want to read it
-     at all.
+  /* If fd points to a plain file, we might be able to mmap it; we can
+     definitely allocate the buffer all at once.  If fd is a pipe or
+     terminal, we can't do either.  If fd is something weird, like a
+     block device or a directory, we don't want to read it at all.
 
      Unfortunately, different systems use different st.st_mode values
      for pipes: some have S_ISFIFO, some S_ISSOCK, some are buggy and
      zero the entire struct stat except a couple fields.  Hence we don't
      even try to figure out what something is, except for plain files,
-     directories, and block devices.
-
-     In all cases, read_and_prescan will resize the buffer if it
-     turns out there's more data than we thought.  */
+     directories, and block devices.  */
 
   if (S_ISREG (st.st_mode))
     {
-      /* off_t might have a wider range than size_t - in other words,
+      ssize_t st_size;
+
+      /* off_t might have a wider range than ssize_t - in other words,
 	 the max size of a file might be bigger than the address
 	 space.  We can't handle a file that large.  (Anyone with
-         a single source file bigger than 4GB needs to rethink
+	 a single source file bigger than 2GB needs to rethink
 	 their coding style.)  */
-      st_size = (size_t) st.st_size;
-      if ((unsigned HOST_WIDEST_INT) st_size
-	  != (unsigned HOST_WIDEST_INT) st.st_size)
+      if (st.st_size > SSIZE_MAX)
 	{
-	  cpp_error (pfile, "file `%s' is too large", ihash->name);
+	  cpp_error (pfile, "%s is too large", ihash->name);
 	  goto fail;
 	}
+      st_size = st.st_size;
+      length = read_file (fp, fd, st_size);
+      if (length == -1)
+	goto perror_fail;
+      if (length < st_size)
+	cpp_warning (pfile, "%s is shorter than expected\n", ihash->name);
     }
   else if (S_ISBLK (st.st_mode))
     {
@@ -732,25 +746,28 @@ read_include_file (pfile, fd, ihash)
     }
   else
     {
-      /* We don't know how big this is.  4k is a decent first guess.  */
-      st_size = 4096;
+      /* 8 kilobytes is a sensible starting size.  It ought to be
+	 bigger than the kernel pipe buffer, and it's definitely
+	 bigger than the majority of C source files.  */
+      length = read_with_read (fp, fd, 8 * 1024);
+      if (length == -1)
+	goto perror_fail;
     }
 
-  /* Read the file, converting end-of-line characters and trigraphs
-     (if enabled). */
+  /* These must be set before prescan.  */
   fp->ihash = ihash;
   fp->nominal_fname = ihash->name;
-  length = _cpp_read_and_prescan (pfile, fp, fd, st_size);
-  if (length < 0)
-    goto fail;
+  
   if (length == 0)
     ihash->control_macro = U"";  /* never re-include */
+  else
+    /* Temporary - I hope.  */
+    length = _cpp_prescan (pfile, fp, length);
 
-  close (fd);
   fp->rlimit = fp->buf + length;
   fp->cur = fp->buf;
   if (ihash->foundhere != ABSOLUTE_PATH)
-      fp->system_header_p = ihash->foundhere->sysp;
+    fp->system_header_p = ihash->foundhere->sysp;
   fp->lineno = 1;
   fp->line_base = fp->buf;
 
@@ -761,6 +778,7 @@ read_include_file (pfile, fd, ihash)
 
   pfile->input_stack_listing_current = 0;
   pfile->only_seen_white = 2;
+  close (fd);
   return 1;
 
  perror_fail:
@@ -770,6 +788,74 @@ read_include_file (pfile, fd, ihash)
  push_fail:
   close (fd);
   return 0;
+}
+
+static ssize_t
+read_file (fp, fd, size)
+     cpp_buffer *fp;
+     int fd;
+     ssize_t size;
+{
+  static int pagesize = -1;
+
+  if (size == 0)
+    return 0;
+
+  if (pagesize == -1)
+    pagesize = getpagesize ();
+
+#if MMAP_THRESHOLD
+  if (size / pagesize >= MMAP_THRESHOLD)
+    {
+      const U_CHAR *result
+	= (const U_CHAR *) mmap (0, size, PROT_READ, MAP_PRIVATE, fd, 0);
+      if (result != (const U_CHAR *)-1)
+	{
+	  fp->buf = result;
+	  fp->mapped = 1;
+	  return size;
+	}
+    }
+  /* If mmap fails, try read.  If there's really a problem, read will
+     fail too.  */
+#endif
+
+  return read_with_read (fp, fd, size);
+}
+
+static ssize_t
+read_with_read (fp, fd, size)
+     cpp_buffer *fp;
+     int fd;
+     ssize_t size;
+{
+  ssize_t offset, count;
+  U_CHAR *buf;
+
+  buf = (U_CHAR *) xmalloc (size);
+  offset = 0;
+  while ((count = read (fd, buf + offset, size - offset)) > 0)
+    {
+      offset += count;
+      if (offset == size)
+	buf = xrealloc (buf, (size *= 2));
+    }
+  if (count < 0)
+    {
+      free (buf);
+      return -1;
+    }
+  if (offset == 0)
+    {
+      free (buf);
+      return 0;
+    }
+
+  if (offset < size)
+    buf = xrealloc (buf, offset);
+  fp->buf = buf;
+  fp->mapped = 0;
+  return offset;
 }
 
 /* Given a path FNAME, extract the directory component and place it
===================================================================
Index: cpphash.c
--- cpphash.c	2000/05/19 17:43:38	1.91
+++ cpphash.c	2000/05/28 05:44:46
@@ -110,7 +110,7 @@ struct hashdummy
 static unsigned int hash_HASHNODE PARAMS ((const void *));
 static int eq_HASHNODE		  PARAMS ((const void *, const void *));
 static void del_HASHNODE	  PARAMS ((void *));
-static HASHNODE *make_HASHNODE	  PARAMS ((const U_CHAR *, size_t,
+static cpp_hashnode *make_HASHNODE	  PARAMS ((const U_CHAR *, size_t,
 					   enum node_type, unsigned int));
 
 static void dump_funlike_macro	  PARAMS ((cpp_reader *,
@@ -118,10 +118,10 @@ static void dump_funlike_macro	  PARAMS 
 static int dump_hash_helper	  PARAMS ((void **, void *));
 
 static void push_macro_expansion PARAMS ((cpp_reader *, const U_CHAR *,
-					  int, HASHNODE *));
+					  int, cpp_hashnode *));
 static int unsafe_chars		 PARAMS ((cpp_reader *, int, int));
 static enum cpp_ttype macarg	 PARAMS ((cpp_reader *, int));
-static void special_symbol	 PARAMS ((cpp_reader *, HASHNODE *));
+static void special_symbol	 PARAMS ((cpp_reader *, cpp_hashnode *));
 static int compare_defs		 PARAMS ((cpp_reader *,
 					  const struct funct_defn *,
 					  const struct funct_defn *));
@@ -195,7 +195,7 @@ static void scan_arguments	PARAMS ((cpp_
 					 const struct funct_defn *,
 					 struct argdata *, const U_CHAR *));
 static void stringify		PARAMS ((cpp_reader *, struct argdata *));
-static void funlike_macroexpand	PARAMS ((cpp_reader *, HASHNODE *,
+static void funlike_macroexpand	PARAMS ((cpp_reader *, cpp_hashnode *,
 					 struct argdata *));
 
 /* Calculate hash of a string of length LEN.  */
@@ -213,16 +213,16 @@ _cpp_calc_hash (str, len)
   return r + len;
 }
 
-/* Calculate hash of a HASHNODE structure.  */
+/* Calculate hash of a cpp_hashnode structure.  */
 static unsigned int
 hash_HASHNODE (x)
      const void *x;
 {
-  const HASHNODE *h = (const HASHNODE *)x;
+  const cpp_hashnode *h = (const cpp_hashnode *)x;
   return h->hash;
 }
 
-/* Compare a HASHNODE structure (already in the table) with a
+/* Compare a cpp_hashnode structure (already in the table) with a
    hashdummy structure (not yet in the table).  This relies on the
    rule that the existing entry is the first argument, the potential
    entry the second.  It also relies on the comparison function never
@@ -233,36 +233,36 @@ eq_HASHNODE (x, y)
      const void *x;
      const void *y;
 {
-  const HASHNODE *a = (const HASHNODE *)x;
+  const cpp_hashnode *a = (const cpp_hashnode *)x;
   const struct hashdummy *b = (const struct hashdummy *)y;
 
   return (a->length == b->length
 	  && !ustrncmp (a->name, b->name, a->length));
 }
 
-/* Destroy a HASHNODE.  */
+/* Destroy a cpp_hashnode.  */
 static void
 del_HASHNODE (x)
      void *x;
 {
-  HASHNODE *h = (HASHNODE *)x;
+  cpp_hashnode *h = (cpp_hashnode *)x;
 
   _cpp_free_definition (h);
   free (h);
 }
 
-/* Allocate and initialize a HASHNODE structure.
+/* Allocate and initialize a cpp_hashnode structure.
    Caller must fill in the value field.  */
 
-static HASHNODE *
+static cpp_hashnode *
 make_HASHNODE (name, len, type, hash)
      const U_CHAR *name;
      size_t len;
      enum node_type type;
      unsigned int hash;
 {
-  HASHNODE *hp = (HASHNODE *) xmalloc (sizeof (HASHNODE) + len);
-  U_CHAR *p = (U_CHAR *)hp + offsetof (HASHNODE, name);
+  cpp_hashnode *hp = (cpp_hashnode *) xmalloc (sizeof (cpp_hashnode) + len);
+  U_CHAR *p = (U_CHAR *)hp + offsetof (cpp_hashnode, name);
 
   hp->type = type;
   hp->length = len;
@@ -277,21 +277,21 @@ make_HASHNODE (name, len, type, hash)
 
 /* Find the hash node for name "name", of length LEN.  */
 
-HASHNODE *
-_cpp_lookup (pfile, name, len)
+cpp_hashnode *
+cpp_lookup (pfile, name, len)
      cpp_reader *pfile;
      const U_CHAR *name;
      int len;
 {
   struct hashdummy dummy;
-  HASHNODE *new, **slot;
+  cpp_hashnode *new, **slot;
   unsigned int hash;
 
   dummy.name = name;
   dummy.length = len;
   hash = _cpp_calc_hash (name, len);
 
-  slot = (HASHNODE **)
+  slot = (cpp_hashnode **)
     htab_find_slot_with_hash (pfile->hashtab, (void *)&dummy, hash, INSERT);
   if (*slot)
     return *slot;
@@ -315,7 +315,7 @@ _cpp_init_macro_hash (pfile)
 
 void
 _cpp_free_definition (h)
-     HASHNODE *h;
+     cpp_hashnode *h;
 {
   if (h->type == T_XCONST)
     free ((PTR) h->value.cpval);
@@ -860,7 +860,7 @@ int
 _cpp_create_definition (pfile, list, hp)
      cpp_reader *pfile;
      cpp_toklist *list;
-     HASHNODE *hp;
+     cpp_hashnode *hp;
 {
   struct funct_defn *fdefn = 0;
   struct object_defn *odefn = 0;
@@ -1102,7 +1102,7 @@ _cpp_quote_string (pfile, src)
 static void
 special_symbol (pfile, hp)
      cpp_reader *pfile;
-     HASHNODE *hp;
+     cpp_hashnode *hp;
 {
   const U_CHAR *buf;
   cpp_buffer *ip;
@@ -1187,12 +1187,12 @@ special_symbol (pfile, hp)
       {
 	time_t tt = time (NULL);
 	struct tm *tb = localtime (&tt);
-	HASHNODE *d, *t;
+	cpp_hashnode *d, *t;
 
 	if (hp->type == T_DATE)
-	  d = hp, t = _cpp_lookup (pfile, DSC("__TIME__"));
+	  d = hp, t = cpp_lookup (pfile, DSC("__TIME__"));
 	else
-	  t = hp, d = _cpp_lookup (pfile, DSC("__DATE__"));
+	  t = hp, d = cpp_lookup (pfile, DSC("__DATE__"));
 
 	d->value.cpval = xmalloc (sizeof "'Oct 11 1347'");
 	sprintf ((char *)d->value.cpval, "\"%s %2d %4d\"",
@@ -1229,7 +1229,7 @@ special_symbol (pfile, hp)
 void
 _cpp_macroexpand (pfile, hp)
      cpp_reader *pfile;
-     HASHNODE *hp;
+     cpp_hashnode *hp;
 {
   const struct funct_defn *defn;
   struct argdata *args;
@@ -1481,7 +1481,7 @@ stringify (pfile, arg)
 static void
 funlike_macroexpand (pfile, hp, args)
      cpp_reader *pfile;
-     HASHNODE *hp;
+     cpp_hashnode *hp;
      struct argdata *args;
 {
   const struct funct_defn *defn = hp->value.fdefn;
@@ -1731,7 +1731,7 @@ push_macro_expansion (pfile, xbuf, len, 
      cpp_reader *pfile;
      const U_CHAR *xbuf;
      int len;
-     HASHNODE *hp;
+     cpp_hashnode *hp;
 {
   cpp_buffer *mbuf;
   int advance_cur = 0;
@@ -1840,7 +1840,7 @@ compare_defs (pfile, d1, d2)
 void
 _cpp_dump_definition (pfile, hp)
      cpp_reader *pfile;
-     HASHNODE *hp;
+     cpp_hashnode *hp;
 {
   CPP_RESERVE (pfile, hp->length + sizeof "#define ");
   CPP_PUTS_Q (pfile, "#define ", sizeof "#define " - 1);
@@ -1945,7 +1945,7 @@ dump_hash_helper (h, p)
      void **h;
      void *p;
 {
-  HASHNODE *hp = (HASHNODE *)*h;
+  cpp_hashnode *hp = (cpp_hashnode *)*h;
   cpp_reader *pfile = (cpp_reader *)p;
 
   if (hp->type == T_MACRO || hp->type == T_FMACRO
===================================================================
Index: cpphash.h
--- cpphash.h	2000/05/27 23:19:56	1.51
+++ cpphash.h	2000/05/28 05:44:46
@@ -25,52 +25,6 @@ Foundation, 59 Temple Place - Suite 330,
 typedef unsigned char U_CHAR;
 #define U (const U_CHAR *)  /* Intended use: U"string" */
 
-/* The structure of a node in the hash table.  The hash table
-   has entries for all tokens defined by #define commands (type T_MACRO),
-   plus some special tokens like __LINE__ (these each have their own
-   type, and the appropriate code is run when that type of node is seen.
-   It does not contain control words like "#define", which are recognized
-   by a separate piece of code. */
-
-/* different flavors of hash nodes */
-enum node_type
-{
-  T_VOID = 0,	   /* no definition yet */
-  T_SPECLINE,	   /* `__LINE__' */
-  T_DATE,	   /* `__DATE__' */
-  T_FILE,	   /* `__FILE__' */
-  T_BASE_FILE,	   /* `__BASE_FILE__' */
-  T_INCLUDE_LEVEL, /* `__INCLUDE_LEVEL__' */
-  T_TIME,	   /* `__TIME__' */
-  T_STDC,	   /* `__STDC__' */
-  T_CONST,	   /* Constant string, used by `__SIZE_TYPE__' etc */
-  T_XCONST,	   /* Ditto, but the string is malloced memory */
-  T_POISON,	   /* poisoned identifier */
-  T_MACRO,	   /* object-like macro */
-  T_FMACRO,	   /* function-like macro */
-  T_IDENTITY,	   /* macro defined to itself */
-  T_EMPTY,	   /* macro defined to nothing */
-  T_ASSERTION	   /* predicate for #assert */
-};
-
-typedef struct hashnode HASHNODE;
-struct hashnode
-{
-  unsigned int hash;			/* cached hash value */
-  unsigned short length;		/* length of name */
-  ENUM_BITFIELD(node_type) type : 8;	/* node type */
-  char disabled;			/* macro turned off for rescan? */
-
-  union {
-    const U_CHAR *cpval;		/* some predefined macros */
-    const struct object_defn *odefn;	/* #define foo bar */
-    const struct funct_defn *fdefn;	/* #define foo(x) bar(x) */
-    struct predicate *pred;		/* #assert */
-  } value;
-
-  const U_CHAR name[1];			/* name[length] */
-};
-
 /* Structure used for assertion predicates.  */
 struct predicate
 {
@@ -109,14 +63,13 @@ struct ihash
   struct ihash *next_this_file;
 
   /* Location of the file in the include search path.
-     Used for include_next */
+     Used for include_next and to detect redundant includes. */
   struct file_name_list *foundhere;
 
   unsigned int hash;		/* save hash value for future reference */
   const char *nshort;		/* name of file as referenced in #include;
 				   points into name[]  */
-  const U_CHAR *control_macro;	/* macro, if any, preventing reinclusion -
-				   see redundant_include_p */
+  const U_CHAR *control_macro;	/* macro, if any, preventing reinclusion.  */
   const char name[1];		/* (partial) pathname of file */
 };
 typedef struct ihash IHASH;
@@ -224,14 +177,12 @@ extern unsigned char _cpp_IStable[256];
 
 /* In cpphash.c */
 extern unsigned int _cpp_calc_hash	PARAMS ((const U_CHAR *, size_t));
-extern HASHNODE *_cpp_lookup		PARAMS ((cpp_reader *,
-						 const U_CHAR *, int));
-extern void _cpp_free_definition	PARAMS ((HASHNODE *));
-extern int _cpp_create_definition	PARAMS ((cpp_reader *,
-						 cpp_toklist *, HASHNODE *));
-extern void _cpp_dump_definition	PARAMS ((cpp_reader *, HASHNODE *));
+extern void _cpp_free_definition	PARAMS ((cpp_hashnode *));
+extern int _cpp_create_definition	PARAMS ((cpp_reader *, cpp_toklist *,
+						 cpp_hashnode *));
+extern void _cpp_dump_definition	PARAMS ((cpp_reader *, cpp_hashnode *));
 extern void _cpp_quote_string		PARAMS ((cpp_reader *, const U_CHAR *));
-extern void _cpp_macroexpand		PARAMS ((cpp_reader *, HASHNODE *));
+extern void _cpp_macroexpand		PARAMS ((cpp_reader *, cpp_hashnode *));
 extern void _cpp_init_macro_hash	PARAMS ((cpp_reader *));
 extern void _cpp_dump_macro_hash	PARAMS ((cpp_reader *));
 
@@ -254,8 +205,8 @@ extern void _cpp_expand_to_buffer	PARAMS
 						 const unsigned char *, int));
 extern int _cpp_parse_assertion		PARAMS ((cpp_reader *));
 extern enum cpp_ttype _cpp_lex_token	PARAMS ((cpp_reader *));
-extern long _cpp_read_and_prescan	PARAMS ((cpp_reader *, cpp_buffer *,
-						 int, size_t));
+extern ssize_t _cpp_prescan		PARAMS ((cpp_reader *, cpp_buffer *,
+						 ssize_t));
 extern void _cpp_init_input_buffer	PARAMS ((cpp_reader *));
 extern void _cpp_grow_token_buffer	PARAMS ((cpp_reader *, long));
 extern enum cpp_ttype _cpp_get_directive_token
===================================================================
Index: cppinit.c
--- cppinit.c	2000/05/18 11:09:26	1.81
+++ cppinit.c	2000/05/28 05:44:46
@@ -670,7 +670,7 @@ initialize_builtins (pfile)
 {
   const struct builtin *b;
   const U_CHAR *val;
-  HASHNODE *hp;
+  cpp_hashnode *hp;
   for(b = builtin_array; b < builtin_array_end; b++)
     {
       if (b->type == T_STDC && CPP_TRADITIONAL (pfile))
@@ -686,7 +686,7 @@ initialize_builtins (pfile)
       else
 	val = b->value;
 
-      hp = _cpp_lookup (pfile, b->name, b->len);
+      hp = cpp_lookup (pfile, b->name, b->len);
       hp->value.cpval = val;
       hp->type = b->type;
 
@@ -1252,6 +1252,7 @@ handle_option (pfile, argc, argv)
      char **argv;
 {
   int i = 0;
+  struct cpp_pending *pend = CPP_OPTION (pfile, pending);
 
   if (argv[i][0] != '-')
     {
@@ -1354,7 +1355,7 @@ handle_option (pfile, argc, argv)
 	  CPP_OPTION (pfile, print_include_names) = 1;
 	  break;
 	case OPT_D:
-	  new_pending_directive (CPP_OPTION (pfile, pending), arg, cpp_define);
+	  new_pending_directive (pend, arg, cpp_define);
 	  break;
 	case OPT_pedantic_errors:
 	  CPP_OPTION (pfile, pedantic_errors) = 1;
@@ -1396,8 +1397,7 @@ handle_option (pfile, argc, argv)
 	  CPP_OPTION (pfile, c99) = 0;
 	  CPP_OPTION (pfile, objc) = 0;
 	  CPP_OPTION (pfile, trigraphs) = 1;
-	  new_pending_directive (CPP_OPTION (pfile, pending),
-				 "__STRICT_ANSI__", cpp_define);
+	  new_pending_directive (pend, "__STRICT_ANSI__", cpp_define);
 	  break;
 	case OPT_lang_cplusplus:
 	  CPP_OPTION (pfile, cplusplus) = 1;
@@ -1405,21 +1405,29 @@ handle_option (pfile, argc, argv)
 	  CPP_OPTION (pfile, c89) = 0;
 	  CPP_OPTION (pfile, c99) = 0;
 	  CPP_OPTION (pfile, objc) = 0;
+	  new_pending_directive (pend, "__cplusplus", cpp_define);
 	  break;
-	case OPT_lang_objc:
 	case OPT_lang_objcplusplus:
-	  CPP_OPTION (pfile, cplusplus) = opt_code == OPT_lang_objcplusplus;
+	  CPP_OPTION (pfile, cplusplus) = 1;
+	  new_pending_directive (pend, "__cplusplus", cpp_define);
+	  /* fall through */
+	case OPT_lang_objc:
 	  CPP_OPTION (pfile, cplusplus_comments) = 1;
 	  CPP_OPTION (pfile, c89) = 0;
 	  CPP_OPTION (pfile, c99) = 0;
 	  CPP_OPTION (pfile, objc) = 1;
+	  new_pending_directive (pend, "__OBJC__", cpp_define);
 	  break;
 	case OPT_lang_asm:
  	  CPP_OPTION (pfile, lang_asm) = 1;
+	  CPP_OPTION (pfile, dollars_in_ident) = 0;
+	  new_pending_directive (pend, "__ASSEMBLER__", cpp_define);
 	  break;
 	case OPT_lang_fortran:
  	  CPP_OPTION (pfile, lang_fortran) = 1;
+	  CPP_OPTION (pfile, traditional) = 1;
 	  CPP_OPTION (pfile, cplusplus_comments) = 0;
+	  new_pending_directive (pend, "_LANGUAGE_FORTRAN", cpp_define);
 	  break;
 	case OPT_lang_chill:
 	  CPP_OPTION (pfile, objc) = 0;
===================================================================
Index: cpplex.c
--- cpplex.c	2000/05/28 01:03:16	1.50
+++ cpplex.c	2000/05/28 05:44:47
@@ -26,6 +26,10 @@ Foundation, 59 Temple Place - Suite 330,
 #include "cpplib.h"
 #include "cpphash.h"
 
+#ifdef HAVE_MMAP_FILE
+# include <sys/mman.h>
+#endif
+
 #define PEEKBUF(BUFFER, N) \
   ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
 #define GETBUF(BUFFER) \
@@ -224,7 +228,7 @@ cpp_pop_buffer (pfile)
     }
   else if (buf->macro)
     {
-      HASHNODE *m = buf->macro;
+      cpp_hashnode *m = buf->macro;
   
       m->disabled = 0;
       if ((m->type == T_FMACRO && buf->mapped)
@@ -1622,9 +1626,9 @@ maybe_macroexpand (pfile, written)
 {
   U_CHAR *macro = pfile->token_buffer + written;
   size_t len = CPP_WRITTEN (pfile) - written;
-  HASHNODE *hp = _cpp_lookup (pfile, macro, len);
+  cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
 
-  /* _cpp_lookup never returns null.  */
+  /* cpp_lookup never returns null.  */
   if (hp->type == T_VOID)
     return 0;
   if (hp->disabled || hp->type == T_IDENTITY)
@@ -1892,14 +1896,10 @@ find_position (start, limit, linep)
   return lbase;
 }
 
-/* The following table is used by _cpp_read_and_prescan.  If we have
+/* The following table is used by _cpp_prescan.  If we have
    designated initializers, it can be constant data; otherwise, it is
    set up at runtime by _cpp_init_input_buffer.  */
 
-#ifndef UCHAR_MAX
-#define UCHAR_MAX 255	/* assume 8-bit bytes */
-#endif
-
 #if (GCC_VERSION >= 2007)
 #define init_chartab()  /* nothing */
 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
@@ -1936,10 +1936,11 @@ END
 
 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
+
+/* Prescan pass over a file already loaded into BUF.  This is
+   translation phases 1 and 2 (C99 5.1.1.2).
 
-/* Read the entire contents of file DESC into buffer BUF.  LEN is how
-   much memory to allocate initially; more will be allocated if
-   necessary.  Convert end-of-line markers (\n, \r, \r\n, \n\r) to
+   Convert end-of-line markers (\n, \r, \r\n, \n\r) to
    canonical form (\n).  If enabled, convert and/or warn about
    trigraphs.  Convert backslash-newline to a one-character escape
    (\r) and remove it from "embarrassing" places (i.e. the middle of a
@@ -1960,204 +1961,153 @@ END
    at the end of reload1.c is about 60%.  (reload1.c is 329k.)
 
    If your file has more than one kind of end-of-line marker, you
-   will get messed-up line numbering.
-   
-   So that the cases of the switch statement do not have to concern
-   themselves with the complications of reading beyond the end of the
-   buffer, the buffer is guaranteed to have at least 3 characters in
-   it (or however many are left in the file, if less) on entry to the
-   switch.  This is enough to handle trigraphs and the "\\\n\r" and
-   "\\\r\n" cases.
-   
-   The end of the buffer is marked by a '\\', which, being a special
-   character, guarantees we will exit the fast-scan loops and perform
-   a refill. */
- 
-long
-_cpp_read_and_prescan (pfile, fp, desc, len)
+   will get messed-up line numbering.  */
+
+ssize_t
+_cpp_prescan (pfile, fp, len)
      cpp_reader *pfile;
      cpp_buffer *fp;
-     int desc;
-     size_t len;
+     ssize_t len;
 {
-  U_CHAR *buf = (U_CHAR *) xmalloc (len);
-  U_CHAR *ip, *op, *line_base;
-  U_CHAR *ibase;
+  U_CHAR *buf, *op;
+  const U_CHAR *ibase, *ip, *ilimit;
+  U_CHAR *line_base;
   unsigned long line;
   unsigned int deferred_newlines;
-  size_t offset;
-  int count = 0;
 
-  offset = 0;
-  deferred_newlines = 0;
-  op = buf;
-  line_base = buf;
+  /* Allocate an extra byte in case we must add a trailing \n.  */
+  buf = (U_CHAR *) xmalloc (len + 1);
+  line_base = op = buf;
+  ip = ibase = fp->buf;
+  ilimit = ibase + len;
   line = 1;
-  ibase = pfile->input_buffer + 3;
-  ip = ibase;
-  ip[-1] = '\0';  /* Guarantee no match with \n for SPECCASE_CR */
+  deferred_newlines = 0;
 
   for (;;)
     {
-      U_CHAR *near_buff_end;
+      const U_CHAR *iq;
 
-      count = read (desc, ibase, pfile->input_buffer_len);
-      if (count < 0)
-	goto error;
-      
-      ibase[count] = '\\';  /* Marks end of buffer */
-      if (count)
+      /* Deal with \-newline, potentially in the middle of a token. */
+      if (deferred_newlines)
 	{
-	  near_buff_end = pfile->input_buffer + count;
-	  offset += count;
-	  if (offset > len)
+	  if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
 	    {
-	      size_t delta_op;
-	      size_t delta_line_base;
-	      len = offset * 2;
-	      if (offset > len)
-		/* len overflowed.
-		   This could happen if the file is larger than half the
-		   maximum address space of the machine. */
-		goto too_big;
-
-	      delta_op = op - buf;
-	      delta_line_base = line_base - buf;
-	      buf = (U_CHAR *) xrealloc (buf, len);
-	      op = buf + delta_op;
-	      line_base = buf + delta_line_base;
+	      /* Previous was not white space.  Skip to white
+		 space, if we can, before outputting the \r's */
+	      iq = ip;
+	      while (iq < ilimit
+		     && *iq != ' '
+		     && *iq != '\t'
+		     && *iq != '\n'
+		     && NORMAL(*iq))
+		iq++;
+	      memcpy (op, ip, iq - ip);
+	      op += iq - ip;
+	      ip += iq - ip;
+	      if (! NORMAL(*ip))
+		goto do_speccase;
 	    }
-	}
-      else
-	{
-	  if (ip == ibase)
-	    break;
-	  /* Allow normal processing of the (at most 2) remaining
-	     characters.  The end-of-buffer marker is still present
-	     and prevents false matches within the switch. */
-	  near_buff_end = ibase - 1;
+	  while (deferred_newlines)
+	    deferred_newlines--, *op++ = '\r';
 	}
 
-      for (;;)
-	{
-	  unsigned int span;
+      /* Copy as much as we can without special treatment. */
+      iq = ip;
+      while (iq < ilimit && NORMAL (*iq)) iq++;
+      memcpy (op, ip, iq - ip);
+      op += iq - ip;
+      ip += iq - ip;
 
-	  /* Deal with \-newline, potentially in the middle of a token. */
-	  if (deferred_newlines)
+    do_speccase:
+      if (ip >= ilimit)
+	break;
+
+      switch (chartab[*ip++])
+	{
+	case SPECCASE_CR:  /* \r */
+	  if (ip[-2] != '\n')
 	    {
-	      if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
-		{
-		  /* Previous was not white space.  Skip to white
-		     space, if we can, before outputting the \r's */
-		  span = 0;
-		  while (ip[span] != ' '
-			 && ip[span] != '\t'
-			 && ip[span] != '\n'
-			 && NORMAL(ip[span]))
-		    span++;
-		  memcpy (op, ip, span);
-		  op += span;
-		  ip += span;
-		  if (! NORMAL(ip[0]))
-		    goto do_speccase;
-		}
-	      while (deferred_newlines)
-		deferred_newlines--, *op++ = '\r';
+	      if (ip < ilimit && *ip == '\n')
+		ip++;
+	      *op++ = '\n';
 	    }
-
-	  /* Copy as much as we can without special treatment. */
-	  span = 0;
-	  while (NORMAL (ip[span])) span++;
-	  memcpy (op, ip, span);
-	  op += span;
-	  ip += span;
+	  break;
 
-	do_speccase:
-	  if (ip > near_buff_end) /* Do we have enough chars? */
-	    break;
-	  switch (chartab[*ip++])
+	case SPECCASE_BACKSLASH:  /* \ */
+	backslash:
+	  if (ip < ilimit)
 	    {
-	    case SPECCASE_CR:  /* \r */
-	      if (ip[-2] != '\n')
-		{
-		  if (*ip == '\n')
-		    ip++;
-		  *op++ = '\n';
-		}
-	      break;
-
-	    case SPECCASE_BACKSLASH:  /* \ */
 	      if (*ip == '\n')
 		{
 		  deferred_newlines++;
 		  ip++;
 		  if (*ip == '\r') ip++;
+		  break;
 		}
 	      else if (*ip == '\r')
 		{
 		  deferred_newlines++;
 		  ip++;
 		  if (*ip == '\n') ip++;
+		  break;
 		}
-	      else
-		*op++ = '\\';
-	      break;
+	    }
 
-	    case SPECCASE_QUESTION: /* ? */
-	      {
-		unsigned int d, t;
+	  *op++ = '\\';
+	  break;
 
-		*op++ = '?'; /* Normal non-trigraph case */
-		if (ip[0] != '?')
-		  break;
-		    
-		d = ip[1];
-		t = chartab[d];
-		if (NONTRI (t))
-		  break;
+	case SPECCASE_QUESTION: /* ? */
+	  {
+	    unsigned int d, t;
 
-		if (CPP_OPTION (pfile, warn_trigraphs))
-		  {
-		    unsigned long col;
-		    line_base = find_position (line_base, op, &line);
-		    col = op - line_base + 1;
-		    if (CPP_OPTION (pfile, trigraphs))
-		      cpp_warning_with_line (pfile, line, col,
-					     "trigraph ??%c converted to %c", d, t);
-		    else
-		      cpp_warning_with_line (pfile, line, col,
-					     "trigraph ??%c ignored", d);
-		  }
+	    *op++ = '?'; /* Normal non-trigraph case */
+	    if (ip > ilimit - 2 || ip[0] != '?')
+	      break;
+		    
+	    d = ip[1];
+	    t = chartab[d];
+	    if (NONTRI (t))
+	      break;
 
-		ip += 2;
+	    if (CPP_OPTION (pfile, warn_trigraphs))
+	      {
+		unsigned long col;
+		line_base = find_position (line_base, op, &line);
+		col = op - line_base + 1;
 		if (CPP_OPTION (pfile, trigraphs))
-		  {
-		    op[-1] = t;	    /* Overwrite '?' */
-		    if (t == '\\')
-		      {
-			op--;
-			*--ip = '\\';
-			goto do_speccase; /* May need buffer refill */
-		      }
-		  }
+		  cpp_warning_with_line (pfile, line, col,
+					 "trigraph ??%c converted to %c", d, t);
 		else
+		  cpp_warning_with_line (pfile, line, col,
+					 "trigraph ??%c ignored", d);
+	      }
+
+	    ip += 2;
+	    if (CPP_OPTION (pfile, trigraphs))
+	      {
+		op[-1] = t;	    /* Overwrite '?' */
+		if (t == '\\')
 		  {
-		    *op++ = '?';
-		    *op++ = d;
+		    op--;
+		    goto backslash;
 		  }
 	      }
-	      break;
-	    }
+	    else
+	      {
+		*op++ = '?';
+		*op++ = d;
+	      }
+	  }
+	  break;
 	}
-      /* Copy previous char plus unprocessed (at most 2) chars
-	 to beginning of buffer, refill it with another
-	 read(), and continue processing */
-      memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
-      ip -= count;
     }
 
-  if (offset == 0)
-    return 0;
+#ifdef HAVE_MMAP_FILE
+  if (fp->mapped)
+    munmap ((caddr_t) fp->buf, len);
+  else
+#endif
+    free ((PTR) fp->buf);
 
   if (op[-1] != '\n')
     {
@@ -2165,30 +2115,11 @@ _cpp_read_and_prescan (pfile, fp, desc, 
       line_base = find_position (line_base, op, &line);
       col = op - line_base + 1;
       cpp_warning_with_line (pfile, line, col, "no newline at end of file");
-      if (offset + 1 > len)
-	{
-	  len += 1;
-	  if (offset + 1 > len)
-	    goto too_big;
-	  buf = (U_CHAR *) xrealloc (buf, len);
-	  op = buf + offset;
-	}
       *op++ = '\n';
     }
 
-  fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
+  fp->buf = buf;
   return op - buf;
-
- too_big:
-  cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
-	      (unsigned long)offset);
-  free (buf);
-  return -1;
-
- error:
-  cpp_error_from_errno (pfile, fp->ihash->name);
-  free (buf);
-  return -1;
 }
 
 /* Allocate pfile->input_buffer, and initialize chartab[]
===================================================================
Index: cpplib.c
--- cpplib.c	2000/05/27 23:19:55	1.169
+++ cpplib.c	2000/05/28 05:44:47
@@ -339,7 +339,7 @@ static int
 do_define (pfile)
      cpp_reader *pfile;
 {
-  HASHNODE *node;
+  cpp_hashnode *node;
   int len;
   const U_CHAR *sym;
   cpp_toklist *list = &pfile->directbuf;
@@ -371,7 +371,7 @@ do_define (pfile)
       goto out;
     }
 
-  node = _cpp_lookup (pfile, sym, len);
+  node = cpp_lookup (pfile, sym, len);
   /* Check for poisoned identifiers now.  All other checks
      are done in cpphash.c.  */
   if (node->type == T_POISON)
@@ -681,7 +681,7 @@ do_undef (pfile)
      cpp_reader *pfile;
 {
   int len;
-  HASHNODE *hp;
+  cpp_hashnode *hp;
   U_CHAR *name;
   long here = CPP_WRITTEN (pfile);
   enum cpp_ttype token;
@@ -708,7 +708,7 @@ do_undef (pfile)
   name = pfile->token_buffer + here;
   CPP_SET_WRITTEN (pfile, here);
 
-  hp = _cpp_lookup (pfile, name, len);
+  hp = cpp_lookup (pfile, name, len);
   if (hp->type == T_VOID)
     ; /* Not defined in the first place - do nothing.  */
   else if (hp->type == T_POISON)
@@ -941,7 +941,7 @@ do_pragma_poison (pfile)
   /* Poison these symbols so that all subsequent usage produces an
      error message.  */
   U_CHAR *p;
-  HASHNODE *hp;
+  cpp_hashnode *hp;
   long written;
   size_t len;
   enum cpp_ttype token;
@@ -968,7 +968,7 @@ do_pragma_poison (pfile)
 
       p = pfile->token_buffer + written;
       len = CPP_PWRITTEN (pfile) - p;
-      hp = _cpp_lookup (pfile, p, len);
+      hp = cpp_lookup (pfile, p, len);
       if (hp->type == T_POISON)
 	;  /* It is allowed to poison the same identifier twice.  */
       else
@@ -1528,7 +1528,7 @@ do_assert (pfile)
   long old_written;
   U_CHAR *sym;
   size_t len;
-  HASHNODE *hp;
+  cpp_hashnode *hp;
   struct predicate *pred = 0;
   enum cpp_ttype type;
 
@@ -1544,7 +1544,7 @@ do_assert (pfile)
 
   sym = pfile->token_buffer + old_written;
   len = CPP_WRITTEN (pfile) - old_written;
-  hp = _cpp_lookup (pfile, sym, len);
+  hp = cpp_lookup (pfile, sym, len);
 
   if (_cpp_get_directive_token (pfile) != CPP_OPEN_PAREN)
     ERROR ("missing token-sequence in #assert");
@@ -1606,7 +1606,7 @@ do_unassert (pfile)
   long old_written;
   U_CHAR *sym;
   size_t len;
-  HASHNODE *hp;
+  cpp_hashnode *hp;
   cpp_toklist ans;
   enum cpp_ttype type;
   int specific = 0;
@@ -1620,7 +1620,7 @@ do_unassert (pfile)
 
   sym = pfile->token_buffer + old_written;
   len = CPP_WRITTEN (pfile) - old_written;
-  hp = _cpp_lookup (pfile, sym, len);
+  hp = cpp_lookup (pfile, sym, len);
 
   type = _cpp_get_directive_token (pfile);
   if (type == CPP_OPEN_PAREN)
@@ -1784,7 +1784,7 @@ cpp_defined (pfile, id, len)
      const U_CHAR *id;
      int len;
 {
-  HASHNODE *hp = _cpp_lookup (pfile, id, len);
+  cpp_hashnode *hp = cpp_lookup (pfile, id, len);
   if (hp->type == T_POISON)
     {
       cpp_error (pfile, "attempt to use poisoned `%s'", hp->name);
===================================================================
Index: cpplib.h
--- cpplib.h	2000/05/28 01:03:16	1.97
+++ cpplib.h	2000/05/28 05:44:47
@@ -35,6 +35,7 @@ typedef struct cpp_printer cpp_printer;
 typedef struct cpp_token cpp_token;
 typedef struct cpp_toklist cpp_toklist;
 typedef struct cpp_name cpp_name;
+typedef struct cpp_hashnode cpp_hashnode;
 
 /* The first two groups, apart from '=', can appear in preprocessor
    expressions.  This allows a lookup table to be implemented in
@@ -238,7 +239,7 @@ struct cpp_buffer
 
   /* If the buffer is the expansion of a macro, this points to the
      macro's hash table entry.  */
-  struct hashnode *macro;
+  struct cpp_hashnode *macro;
 
   /* Value of if_stack at start of this file.
      Used to prohibit unmatched #endif (etc) in an include file.  */
@@ -590,6 +591,61 @@ struct cpp_printer
 /* Name under which this program was invoked.  */
 extern const char *progname;
 
+/* The structure of a node in the hash table.  The hash table
+   has entries for all tokens defined by #define commands (type T_MACRO),
+   plus some special tokens like __LINE__ (these each have their own
+   type, and the appropriate code is run when that type of node is seen.
+   It does not contain control words like "#define", which are recognized
+   by a separate piece of code. */
+
+/* different flavors of hash nodes */
+enum node_type
+{
+  T_VOID = 0,	   /* no definition yet */
+  T_SPECLINE,	   /* `__LINE__' */
+  T_DATE,	   /* `__DATE__' */
+  T_FILE,	   /* `__FILE__' */
+  T_BASE_FILE,	   /* `__BASE_FILE__' */
+  T_INCLUDE_LEVEL, /* `__INCLUDE_LEVEL__' */
+  T_TIME,	   /* `__TIME__' */
+  T_STDC,	   /* `__STDC__' */
+  T_CONST,	   /* Constant string, used by `__SIZE_TYPE__' etc */
+  T_XCONST,	   /* Ditto, but the string is malloced memory */
+  T_POISON,	   /* poisoned identifier */
+  T_MACRO,	   /* object-like macro */
+  T_FMACRO,	   /* function-like macro */
+  T_IDENTITY,	   /* macro defined to itself */
+  T_EMPTY,	   /* macro defined to nothing */
+  T_ASSERTION	   /* predicate for #assert */
+};
+
+/* There is a slot in the hashnode for use by front ends when integrated
+   with cpplib.  It holds a tree (see tree.h) but we mustn't drag that
+   header into every user of cpplib.h.  cpplib does not do anything with
+   this slot except clear it when a new node is created.  */
+union tree_node;
+
+struct cpp_hashnode
+{
+  unsigned int hash;			/* cached hash value */
+  unsigned short length;		/* length of name */
+  ENUM_BITFIELD(node_type) type : 8;	/* node type */
+  char disabled;			/* macro turned off for rescan? */
+
+  union {
+    const unsigned char *cpval;		/* some predefined macros */
+    const struct object_defn *odefn;	/* #define foo bar */
+    const struct funct_defn *fdefn;	/* #define foo(x) bar(x) */
+    struct predicate *pred;		/* #assert */
+  } value;
+
+  union tree_node *fe_value;		/* front end value */
+
+  const unsigned char name[1];		/* name[length] */
+};
+
+
+
 extern void _cpp_lex_file PARAMS((cpp_reader *));
 extern int cpp_handle_options PARAMS ((cpp_reader *, int, char **));
 extern enum cpp_ttype cpp_get_token PARAMS ((cpp_reader *));
@@ -652,6 +708,8 @@ extern int cpp_idcmp			PARAMS ((const un
 
 /* In cpphash.c */
 extern int cpp_defined			PARAMS ((cpp_reader *,
+						 const unsigned char *, int));
+extern cpp_hashnode *cpp_lookup		PARAMS ((cpp_reader *,
 						 const unsigned char *, int));
 
 /* In cppfiles.c */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]