This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: fixincludes: replacement for machine_name fix


On Wed, Jan 19, 2000 at 09:38:59PM -0800, Zack Weinberg wrote:
> 
> Revised patch in a few minutes.

Here it is.  I managed to find the relevant change in libc's regex.c
and merge it.  That ten-line change cuts total time by a factor of
nine (4m30s -> 30s).  If you let it use alloca and compile it with
optimization, that's good for another factor of three, but neither is
practical - oh well.

I also cleaned up the Makefile a bit, it was repeating things on the
command lines.

zw

	* Makefile.in (fixinc.sh): Depend on specs.
	* fixinc/Makefile.in: Don't repeat stuff on the compiler
	command lines.  Add rule for machname.h.  Make fixlib.o depend
	on machname.h.

	* fixinc/fixlib.c (mn_get_regexps): New helper function.
	* fixinc/fixlib.c: Prototype it.
	* fixinc/fixtests.c (machine_name): New test.
	* fixinc/fixfixes.c (machine_name): New fix.
	* fixinc/inclhack.def (machine_name): Use new C test and fix.

	* fixinc/gnu-regex.h: Don't define _REGEX_RE_COMP.
	* fixinc/gnu-regex.c: Likewise.
	(regcomp): Allocate and initialize a fastmap.

===================================================================
Index: Makefile.in
--- Makefile.in	2000/01/19 09:42:10	1.367
+++ Makefile.in	2000/01/20 06:24:11
@@ -2161,7 +2161,7 @@ FIXINCSRCDIR=$(srcdir)/fixinc
 fixinc.sh: $(FIXINCSRCDIR)/mkfixinc.sh $(FIXINCSRCDIR)/fixincl.c \
 	$(FIXINCSRCDIR)/procopen.c $(FIXINCSRCDIR)/gnu-regex.c \
 	$(FIXINCSRCDIR)/server.c $(FIXINCSRCDIR)/gnu-regex.h \
-	$(FIXINCSRCDIR)/server.h $(FIXINCSRCDIR)/inclhack.def
+	$(FIXINCSRCDIR)/server.h $(FIXINCSRCDIR)/inclhack.def specs
 	MAKE="$(MAKE)"; srcdir=`cd $(srcdir)/fixinc; pwd` ; \
 	export MAKE srcdir ; \
 	cd ./fixinc; $(SHELL) $${srcdir}/mkfixinc.sh $(target)
===================================================================
Index: fixinc/Makefile.in
--- fixinc/Makefile.in	2000/01/19 21:41:04	1.12
+++ fixinc/Makefile.in	2000/01/20 06:24:11
@@ -77,8 +77,8 @@ gen : $(SH_TARGET) fixincl.x
 $(FIOBJ): $(HDR)
 
 fixincl: $(FIOBJ)
-	@echo $(CC) -o $@ $(FIOBJ) $(LIBERTY) $(LIB) ; \
-	if $(CC) -o $@ $(FIOBJ) $(LIBERTY) $(LIB) ; then : ; else \
+	@echo $(CC) $(FIXINC_DEFS) -o $@ $(FIOBJ) ; \
+	if $(CC) $(FIXINC_DEFS) -o $@ $(FIOBJ) ; then : ; else \
 	rm -f $@ ; (echo "#! /bin/sh" ; echo exit 1 ) > $@ ; \
 	chmod 777 $@ ; fi
 
@@ -91,12 +91,13 @@ fixtests: fixtests.c $(LIBOBJ)
 		$(srcdir)/fixtests.c $(LIBOBJ) $(LIB)
 
 gnu-regex.o: gnu-regex.c
-	-$(CC) $(CFLAGS) $(FIXINC_DEFS) $(INCLUDES) -DREGEX_MALLOC \
+	$(CC) $(FIXINC_DEFS) -DREGEX_MALLOC \
 		-c $(srcdir)/gnu-regex.c
 
 fixincl.o : fixincl.x fixincl.c fixfixes.c fixtests.c
 server.o : server.c server.h
 procopen.o : procopen.c server.h
+fixlib.o: machname.h
 
 fixincl.x: fixincl.tpl inclhack.def
 	cd $(srcdir) ; ./genfixes $@
@@ -139,3 +140,16 @@ install: $(TARGETS)
 Makefile: Makefile.in ../config.status
 	cd .. \
 	  && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+# Black magic.
+# Note dependency on ASCII. \040 = space, \011 = tab, \012 = newline.
+
+machname.h: ../specs
+	@tr -s '\040\011' '\012\012' < ../specs | \
+	    sed -n 's/^.*-D\([a-zA-Z_][a-zA-Z0-9_]*\).*$$/\1/p' | sort -u | \
+	    grep -v '^_[_A-Z]' > mn.T
+	@echo "Forbidden identifiers: `tr '\012' ' ' <mn.T`"
+	@sed 's/^/\\\\</; s/$$/\\\\>/' <mn.T | tr '\012' '|' | \
+	     sed 's/^/#define MN_NAME_PAT "/; s/|$$/"/' > machname.h
+	@echo >> machname.h
+	@-rm -f mn.T
===================================================================
Index: fixinc/fixfixes.c
--- fixinc/fixfixes.c	2000/01/19 21:41:04	1.7
+++ fixinc/fixfixes.c	2000/01/20 06:24:12
@@ -77,7 +77,8 @@ typedef struct {
   _FT_( "IO_use",	    IO_use_fix ) \
   _FT_( "CTRL_use",	    CTRL_use_fix) \
   _FT_( "IO_defn",	    IO_defn_fix ) \
-  _FT_( "CTRL_defn",	    CTRL_defn_fix )
+  _FT_( "CTRL_defn",	    CTRL_defn_fix ) \
+  _FT_( "machine_name",	    machine_name_fix )
 
 
 #define FIX_PROC_HEAD( fix ) \
@@ -545,6 +546,104 @@ FIX_PROC_HEAD( IO_defn_fix )
 FIX_PROC_HEAD( CTRL_defn_fix )
 {
   fix_char_macro_defines (text, "CTRL");
+}
+
+
+/* Fix for machine name #ifdefs that are not in the namespace reserved
+   by the C standard.  They won't be defined if compiling with -ansi,
+   and the headers will break.  We go to some trouble to only change
+   #ifdefs where the macro is defined by GCC in non-ansi mode; this
+   minimizes the number of headers touched.  */
+
+#define SCRATCHSZ 64   /* hopefully long enough */
+
+FIX_PROC_HEAD( machine_name_fix )
+{
+  regmatch_t match[2];
+  char *line, *base, *limit, *p, *q;
+  regex_t *label_re, *name_re;
+  char scratch[SCRATCHSZ];
+  size_t len;
+
+  mn_get_regexps (&label_re, &name_re, "machine_name_fix");
+  scratch[0] = '_';
+  scratch[1] = '_';
+
+  for (base = text;
+       regexec (label_re, base, 2, match, 0) == 0;
+       base = limit)
+    {
+      base += match[0].rm_eo;
+      /* We're looking at an #if or #ifdef.  Scan forward for the
+	 next non-escaped newline.  */
+      line = limit = base;
+      do
+	{
+	  limit++;
+	  limit = strchr (limit, '\n');
+	  if (!limit)
+	    goto done;
+	}
+      while (limit[-1] == '\\');
+
+      /* If the 'name_pat' matches in between base and limit, we have
+	 a bogon.  It is not worth the hassle of excluding comments
+	 because comments on #if/#ifdef lines are rare, and strings on
+	 such lines are illegal.
+
+	 REG_NOTBOL means 'base' is not at the beginning of a line, which
+	 shouldn't matter since the name_re has no ^ anchor, but let's
+	 be accurate anyway.  */
+
+      for (;;)
+	{
+	again:
+	  if (base == limit)
+	    break;
+
+	  if (regexec (name_re, base, 1, match, REG_NOTBOL))
+	    goto done;  /* No remaining match in this file */
+
+	  /* Match; is it on the line?  */
+	  if (match[0].rm_eo > limit - base)
+	    break;
+
+	  p = base + match[0].rm_so;
+	  base += match[0].rm_eo;
+
+	  /* One more test: if on the same line we have the same string
+	     with the appropriate underscores, then leave it alone.
+	     We want exactly two leading and trailing underscores.  */
+	  if (*p == '_')
+	    {
+	      len = base - p - ((*base == '_') ? 2 : 1);
+	      q = p + 1;
+	    }
+	  else
+	    {
+	      len = base - p - ((*base == '_') ? 1 : 0);
+	      q = p;
+	    }
+	  if (len + 4 > SCRATCHSZ)
+	    abort ();
+	  memcpy (&scratch[2], q, len);
+	  len += 2;
+	  scratch[len++] = '_';
+	  scratch[len++] = '_';
+
+	  for (q = line; q <= limit - len; q++)
+	    if (*q == '_' && !strncmp (q, scratch, len))
+	      goto again;
+	  
+	  fwrite (text, 1, p - text, stdout);
+	  fwrite (scratch, 1, len, stdout);
+
+	  text = base;
+	}
+    }
+ done:
+  fputs (text, stdout);
+  free (scratch);
 }
 
 
===================================================================
Index: fixinc/fixlib.c
--- fixinc/fixlib.c	2000/01/19 21:41:04	1.6
+++ fixinc/fixlib.c	2000/01/20 06:24:12
@@ -171,3 +171,34 @@ compile_re( pat, re, match, e1, e2 )
       exit (EXIT_FAILURE);
     }
 }
+
+/* * * * * * * * * * * * *
+
+   Helper routine and data for the machine_name test and fix.
+   machname.h is created by black magic in the Makefile.  */
+
+#include "machname.h"
+
+tSCC mn_label_pat[] = "^[ \t]*#[ \t]*(if|ifdef|ifndef)[ \t]+";
+static regex_t mn_label_re;
+
+tSCC mn_name_pat[] = MN_NAME_PAT;
+static regex_t mn_name_re;
+
+static int mn_compiled = 0;
+
+void
+mn_get_regexps( label_re, name_re, who )
+     regex_t **label_re;
+     regex_t **name_re;
+     tCC *who;
+{
+  if (! mn_compiled)
+    {
+      compile_re (mn_label_pat, &mn_label_re, 1, "label pattern", who);
+      compile_re (mn_name_pat, &mn_name_re, 1, "name pattern", who);
+      mn_compiled++;
+    }
+  *label_re = &mn_label_re;
+  *name_re = &mn_name_re;
+}
===================================================================
Index: fixinc/fixlib.h
--- fixinc/fixlib.h	2000/01/17 21:45:29	1.5
+++ fixinc/fixlib.h	2000/01/20 06:24:12
@@ -99,4 +99,6 @@ char * load_file_data _P_(( FILE* fp ));
 t_bool is_cxx_header  _P_(( tCC* filename, tCC* filetext ));
 void   compile_re     _P_(( tCC* pat, regex_t* re, int match,
 			    tCC *e1, tCC *e2 ));
+void   mn_get_regexps _P_(( regex_t** label_re, regex_t** name_re,
+			    tCC *who ));
 #endif /* FIXINCLUDES_FIXLIB_H */
===================================================================
Index: fixinc/fixtests.c
--- fixinc/fixtests.c	2000/01/19 21:41:04	1.9
+++ fixinc/fixtests.c	2000/01/20 06:24:12
@@ -58,7 +58,8 @@ typedef struct {
 
 #define FIX_TEST_TABLE \
   _FT_( "double_slash",     double_slash_test ) \
-  _FT_( "else_endif_label", else_endif_label_test )
+  _FT_( "else_endif_label", else_endif_label_test ) \
+  _FT_( "machine_name",     machine_name_test )
 
 
 #define TEST_FOR_FIX_PROC_HEAD( test ) \
@@ -265,6 +266,52 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
       text = pz_next;
     } /* for (entire file) loop */
 
+  return SKIP_FIX;
+}
+
+TEST_FOR_FIX_PROC_HEAD( machine_name_test )
+{
+  regex_t *label_re, *name_re;
+  regmatch_t match[2];
+  tCC *base, *limit;
+
+  mn_get_regexps(&label_re, &name_re, "machine_name_test");
+
+  for (base = text;
+       regexec (label_re, base, 2, match, 0) == 0;
+       base = limit)
+    {
+      base += match[0].rm_eo;
+      /* We're looking at an #if or #ifdef.  Scan forward for the
+	 next non-escaped newline.  */
+      limit = base;
+      do
+	{
+	  limit++;
+	  limit = strchr (limit, '\n');
+	  if (!limit)
+	    return SKIP_FIX;
+	}
+      while (limit[-1] == '\\');
+
+      /* If the 'name_pat' matches in between base and limit, we have
+	 a bogon.  It is not worth the hassle of excluding comments,
+	 because comments on #if/#ifdef/#ifndef lines are rare,
+	 and strings on such lines are illegal.
+
+	 REG_NOTBOL means 'base' is not at the beginning of a line, which
+	 shouldn't matter since the name_re has no ^ anchor, but let's
+	 be accurate anyway.  */
+
+      if (regexec (name_re, base, 1, match, REG_NOTBOL))
+	return SKIP_FIX;  /* No match in file - no fix needed */
+
+      /* Match; is it on the line?  */
+      if (match[0].rm_eo < limit - base)
+	return APPLY_FIX;  /* Yup */
+
+      /* Otherwise, keep looking... */
+    }
   return SKIP_FIX;
 }
 
===================================================================
Index: fixinc/gnu-regex.h
--- fixinc/gnu-regex.h	1999/09/04 15:09:01	1.2
+++ fixinc/gnu-regex.h	2000/01/20 06:24:13
@@ -36,11 +36,6 @@ extern "C" {
 # include <stddef.h>
 #endif
 
-/* GDB LOCAL: define _REGEX_RE_COMP to get BSD style re_comp and re_exec */
-#ifndef _REGEX_RE_COMP
-#define _REGEX_RE_COMP
-#endif
-
 /* The following two types have to be signed and unsigned integer type
    wide enough to hold a value of a pointer.  For most ANSI compilers
    ptrdiff_t and size_t should be likely OK.  Still size of these two
===================================================================
Index: fixinc/gnu-regex.c
--- fixinc/gnu-regex.c	1999/05/20 07:10:38	1.2
+++ fixinc/gnu-regex.c	2000/01/20 06:28:34
@@ -153,11 +153,6 @@ char *realloc ();
 /* How many characters in the character set.  */
 # define CHAR_SET_SIZE 256
 
-/* GDB LOCAL: define _REGEX_RE_COMP to get BSD style re_comp and re_exec */
-#ifndef _REGEX_RE_COMP
-#define _REGEX_RE_COMP
-#endif
-
 # ifdef SYNTAX_TABLE
 
 extern char *re_syntax_table;
@@ -5561,7 +5556,8 @@ re_exec (s)
        REG_EXTENDED bit in CFLAGS is set; otherwise, to
        RE_SYNTAX_POSIX_BASIC;
      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
-     `fastmap' and `fastmap_accurate' to zero;
+     `fastmap' to an allocated space for the fastmap;
+     `fastmap_accurate' to 1;
      `re_nsub' to the number of subexpressions in PATTERN.
 
    PATTERN is the address of the pattern string.
@@ -5600,11 +5596,8 @@ regcomp (preg, pattern, cflags)
   preg->allocated = 0;
   preg->used = 0;
 
-  /* Don't bother to use a fastmap when searching.  This simplifies the
-     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
-     characters after newlines into the fastmap.  This way, we just try
-     every character.  */
-  preg->fastmap = 0;
+  /* Try to allocate space for the fastmap.  */
+  preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
 
   if (cflags & REG_ICASE)
     {
@@ -5643,6 +5636,19 @@ regcomp (preg, pattern, cflags)
   /* POSIX doesn't distinguish between an unmatched open-group and an
      unmatched close-group: both are REG_EPAREN.  */
   if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+  if (ret == REG_NOERROR && preg->fastmap)
+    {
+      /* Compute the fastmap now, since regexec cannot modify the pattern
+        buffer.  */
+      if (re_compile_fastmap (preg) == -2)
+       {
+         /* Some error occured while computing the fastmap, just forget
+            about it.  */
+         free (preg->fastmap);
+         preg->fastmap = NULL;
+       }
+    }
 
   return (int) ret;
 }
===================================================================
Index: fixinc/inclhack.def
--- fixinc/inclhack.def	2000/01/19 21:41:04	1.48
+++ fixinc/inclhack.def	2000/01/20 06:24:13
@@ -1253,93 +1253,11 @@ fix = {
 
 /*
  *  Fix non-ansi machine name defines
- *  File selection is split into two parts:  the shell version as
- *  a single patch, and the program version with each patch separate.
- *  Each is substantially faster for the particular environment.
- *  You have a dual maintenance problem here.
  */
 fix = {
     hackname = machine_name;
-    /*
-     *  Select '#if.*' and '#elif" with possible non-ansi symbols
-     *  The only non-ansi symbols we know about start with one of:
-     *     MRS_bhimnprstuv
-     *  If any are added to the substitution list, then add it to
-     *  the selection list as well.  Hopefully we can avoid names
-     *  starting with "d" and "l", because this pattern would then
-     *  match "defined" and "lint" as well.  I suppose we could add
-     *  a "bypass = lint" if we had to though.
-     *
-     * The fixinc_eol stuff is to work around a bug in the sed
-     */
-    select = "^#[ \t]*(if|elif).*"
-             "[^a-zA-Z0-9_](_*[MSRrhim]|[Mbimnpstuv])[a-zA-Z0-9_]";
-    exesel = "^#[ \t]*(if|elif).*[^a-zA-Z0-9_]"
-             "("
-                  "M32"
-                 "|_*MIPSE[LB]"
-                 "|_*SYSTYPE_[A-Z0-9]"
-                 "|_*[Rr][34]000"
-                 "|_*host_mips"
-                 "|_*i386"
-                 "|_*mips"
-                 "|bsd4"
-                 "|is68k"
-                 "|m[68]8k"
-                 "|mc680"
-                 "|news"
-                 "|ns32000"
-                 "|pdp11"
-                 "|pyr"
-                 "|sel"
-                 "|sony_news"
-                 "|sparc"
-                 "|sun"
-                 "|tahoe"
-                 "|tower"
-                 "|u370"
-                 "|u3b"
-                 "|unix"
-                 "|vax"
-             ")";
-
-    sed =      ":loop\n"
-               '/\\\\$/'                       "N\n"
-               's/\\\\$/\\\\+++fixinc_eol+++/' "\n"
-               '/\\\\$/'                       "b loop\n"
-               's/\\\\+++fixinc_eol+++/\\\\/g' "\n"
-
-          "/#[\t ]*[el]*if/ {\n"
-                "\ts/[a-zA-Z0-9_][a-zA-Z0-9_]*/ & /g\n"
-
-                "\ts/ M32 / __M32__ /g\n"
-                "\ts/ _*MIPSE\\([LB]\\) / __MIPSE\\1__ /g\n"
-                "\ts/ _*SYSTYPE_\\([A-Z0-9]*\\) / __SYSTYPE_\\1__ /g\n"
-                "\ts/ _*\\([Rr][34]\\)000 / __\\1000__ /g\n"
-                "\ts/ _*host_mips / __host_mips__ /g\n"
-                "\ts/ _*i386 / __i386__ /g\n"
-                "\ts/ _*mips / __mips__ /g\n"
-                "\ts/ bsd4\\([0-9]\\) / __bsd4\\1__ /g\n"
-                "\ts/ is68k / __is68k__ /g\n"
-                "\ts/ m68k / __m68k__ /g\n"
-                "\ts/ m88k / __m88k__ /g\n"
-                "\ts/ mc680\\([0-9]\\)0 / __mc680\\10__ /g\n"
-                "\ts/ news\\([0-9]*\\) / __news\\1__ /g\n"
-                "\ts/ ns32000 / __ns32000__ /g\n"
-                "\ts/ pdp11 / __pdp11__ /g\n"
-                "\ts/ pyr / __pyr__ /g\n"
-                "\ts/ sel / __sel__ /g\n"
-                "\ts/ sony_news / __sony_news__ /g\n"
-                "\ts/ sparc / __sparc__ /g\n"
-                "\ts/ sun\\([a-z0-9]*\\) / __sun\\1__ /g\n"
-                "\ts/ tahoe / __tahoe__ /g\n"
-                "\ts/ tower\\([_0-9]*\\) / __tower\\1__ /g\n"
-                "\ts/ u370 / __u370__ /g\n"
-                "\ts/ u3b\\([0-9]*\\) / __u3b\\1__ /g\n"
-                "\ts/ unix / __unix__ /g\n"
-                "\ts/ vax / __vax__ /g\n"
-
-                "\ts/ \\([a-zA-Z0-9_][a-zA-Z0-9_]*\\) /\\1/g\n\t}";
+    c_test   = machine_name;
+    c_fix    = machine_name;
 };
 
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]