This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: fixincludes: replacement for machine_name fix
On Wed, Jan 19, 2000 at 09:38:59PM -0800, Zack Weinberg wrote:
>
> Revised patch in a few minutes.
Here it is. I managed to find the relevant change in libc's regex.c
and merge it. That ten-line change cuts total time by a factor of
nine (4m30s -> 30s). If you let it use alloca and compile it with
optimization, that's good for another factor of three, but neither is
practical - oh well.
I also cleaned up the Makefile a bit, it was repeating things on the
command lines.
zw
* Makefile.in (fixinc.sh): Depend on specs.
* fixinc/Makefile.in: Don't repeat stuff on the compiler
command lines. Add rule for machname.h. Make fixlib.o depend
on machname.h.
* fixinc/fixlib.c (mn_get_regexps): New helper function.
* fixinc/fixlib.c: Prototype it.
* fixinc/fixtests.c (machine_name): New test.
* fixinc/fixfixes.c (machine_name): New fix.
* fixinc/inclhack.def (machine_name): Use new C test and fix.
* fixinc/gnu-regex.h: Don't define _REGEX_RE_COMP.
* fixinc/gnu-regex.c: Likewise.
(regcomp): Allocate and initialize a fastmap.
===================================================================
Index: Makefile.in
--- Makefile.in 2000/01/19 09:42:10 1.367
+++ Makefile.in 2000/01/20 06:24:11
@@ -2161,7 +2161,7 @@ FIXINCSRCDIR=$(srcdir)/fixinc
fixinc.sh: $(FIXINCSRCDIR)/mkfixinc.sh $(FIXINCSRCDIR)/fixincl.c \
$(FIXINCSRCDIR)/procopen.c $(FIXINCSRCDIR)/gnu-regex.c \
$(FIXINCSRCDIR)/server.c $(FIXINCSRCDIR)/gnu-regex.h \
- $(FIXINCSRCDIR)/server.h $(FIXINCSRCDIR)/inclhack.def
+ $(FIXINCSRCDIR)/server.h $(FIXINCSRCDIR)/inclhack.def specs
MAKE="$(MAKE)"; srcdir=`cd $(srcdir)/fixinc; pwd` ; \
export MAKE srcdir ; \
cd ./fixinc; $(SHELL) $${srcdir}/mkfixinc.sh $(target)
===================================================================
Index: fixinc/Makefile.in
--- fixinc/Makefile.in 2000/01/19 21:41:04 1.12
+++ fixinc/Makefile.in 2000/01/20 06:24:11
@@ -77,8 +77,8 @@ gen : $(SH_TARGET) fixincl.x
$(FIOBJ): $(HDR)
fixincl: $(FIOBJ)
- @echo $(CC) -o $@ $(FIOBJ) $(LIBERTY) $(LIB) ; \
- if $(CC) -o $@ $(FIOBJ) $(LIBERTY) $(LIB) ; then : ; else \
+ @echo $(CC) $(FIXINC_DEFS) -o $@ $(FIOBJ) ; \
+ if $(CC) $(FIXINC_DEFS) -o $@ $(FIOBJ) ; then : ; else \
rm -f $@ ; (echo "#! /bin/sh" ; echo exit 1 ) > $@ ; \
chmod 777 $@ ; fi
@@ -91,12 +91,13 @@ fixtests: fixtests.c $(LIBOBJ)
$(srcdir)/fixtests.c $(LIBOBJ) $(LIB)
gnu-regex.o: gnu-regex.c
- -$(CC) $(CFLAGS) $(FIXINC_DEFS) $(INCLUDES) -DREGEX_MALLOC \
+ $(CC) $(FIXINC_DEFS) -DREGEX_MALLOC \
-c $(srcdir)/gnu-regex.c
fixincl.o : fixincl.x fixincl.c fixfixes.c fixtests.c
server.o : server.c server.h
procopen.o : procopen.c server.h
+fixlib.o: machname.h
fixincl.x: fixincl.tpl inclhack.def
cd $(srcdir) ; ./genfixes $@
@@ -139,3 +140,16 @@ install: $(TARGETS)
Makefile: Makefile.in ../config.status
cd .. \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+# Black magic.
+# Note dependency on ASCII. \040 = space, \011 = tab, \012 = newline.
+
+machname.h: ../specs
+ @tr -s '\040\011' '\012\012' < ../specs | \
+ sed -n 's/^.*-D\([a-zA-Z_][a-zA-Z0-9_]*\).*$$/\1/p' | sort -u | \
+ grep -v '^_[_A-Z]' > mn.T
+ @echo "Forbidden identifiers: `tr '\012' ' ' <mn.T`"
+ @sed 's/^/\\\\</; s/$$/\\\\>/' <mn.T | tr '\012' '|' | \
+ sed 's/^/#define MN_NAME_PAT "/; s/|$$/"/' > machname.h
+ @echo >> machname.h
+ @-rm -f mn.T
===================================================================
Index: fixinc/fixfixes.c
--- fixinc/fixfixes.c 2000/01/19 21:41:04 1.7
+++ fixinc/fixfixes.c 2000/01/20 06:24:12
@@ -77,7 +77,8 @@ typedef struct {
_FT_( "IO_use", IO_use_fix ) \
_FT_( "CTRL_use", CTRL_use_fix) \
_FT_( "IO_defn", IO_defn_fix ) \
- _FT_( "CTRL_defn", CTRL_defn_fix )
+ _FT_( "CTRL_defn", CTRL_defn_fix ) \
+ _FT_( "machine_name", machine_name_fix )
#define FIX_PROC_HEAD( fix ) \
@@ -545,6 +546,104 @@ FIX_PROC_HEAD( IO_defn_fix )
FIX_PROC_HEAD( CTRL_defn_fix )
{
fix_char_macro_defines (text, "CTRL");
+}
+
+
+/* Fix for machine name #ifdefs that are not in the namespace reserved
+ by the C standard. They won't be defined if compiling with -ansi,
+ and the headers will break. We go to some trouble to only change
+ #ifdefs where the macro is defined by GCC in non-ansi mode; this
+ minimizes the number of headers touched. */
+
+#define SCRATCHSZ 64 /* hopefully long enough */
+
+FIX_PROC_HEAD( machine_name_fix )
+{
+ regmatch_t match[2];
+ char *line, *base, *limit, *p, *q;
+ regex_t *label_re, *name_re;
+ char scratch[SCRATCHSZ];
+ size_t len;
+
+ mn_get_regexps (&label_re, &name_re, "machine_name_fix");
+ scratch[0] = '_';
+ scratch[1] = '_';
+
+ for (base = text;
+ regexec (label_re, base, 2, match, 0) == 0;
+ base = limit)
+ {
+ base += match[0].rm_eo;
+ /* We're looking at an #if or #ifdef. Scan forward for the
+ next non-escaped newline. */
+ line = limit = base;
+ do
+ {
+ limit++;
+ limit = strchr (limit, '\n');
+ if (!limit)
+ goto done;
+ }
+ while (limit[-1] == '\\');
+
+ /* If the 'name_pat' matches in between base and limit, we have
+ a bogon. It is not worth the hassle of excluding comments
+ because comments on #if/#ifdef lines are rare, and strings on
+ such lines are illegal.
+
+ REG_NOTBOL means 'base' is not at the beginning of a line, which
+ shouldn't matter since the name_re has no ^ anchor, but let's
+ be accurate anyway. */
+
+ for (;;)
+ {
+ again:
+ if (base == limit)
+ break;
+
+ if (regexec (name_re, base, 1, match, REG_NOTBOL))
+ goto done; /* No remaining match in this file */
+
+ /* Match; is it on the line? */
+ if (match[0].rm_eo > limit - base)
+ break;
+
+ p = base + match[0].rm_so;
+ base += match[0].rm_eo;
+
+ /* One more test: if on the same line we have the same string
+ with the appropriate underscores, then leave it alone.
+ We want exactly two leading and trailing underscores. */
+ if (*p == '_')
+ {
+ len = base - p - ((*base == '_') ? 2 : 1);
+ q = p + 1;
+ }
+ else
+ {
+ len = base - p - ((*base == '_') ? 1 : 0);
+ q = p;
+ }
+ if (len + 4 > SCRATCHSZ)
+ abort ();
+ memcpy (&scratch[2], q, len);
+ len += 2;
+ scratch[len++] = '_';
+ scratch[len++] = '_';
+
+ for (q = line; q <= limit - len; q++)
+ if (*q == '_' && !strncmp (q, scratch, len))
+ goto again;
+
+ fwrite (text, 1, p - text, stdout);
+ fwrite (scratch, 1, len, stdout);
+
+ text = base;
+ }
+ }
+ done:
+ fputs (text, stdout);
+ free (scratch);
}
===================================================================
Index: fixinc/fixlib.c
--- fixinc/fixlib.c 2000/01/19 21:41:04 1.6
+++ fixinc/fixlib.c 2000/01/20 06:24:12
@@ -171,3 +171,34 @@ compile_re( pat, re, match, e1, e2 )
exit (EXIT_FAILURE);
}
}
+
+/* * * * * * * * * * * * *
+
+ Helper routine and data for the machine_name test and fix.
+ machname.h is created by black magic in the Makefile. */
+
+#include "machname.h"
+
+tSCC mn_label_pat[] = "^[ \t]*#[ \t]*(if|ifdef|ifndef)[ \t]+";
+static regex_t mn_label_re;
+
+tSCC mn_name_pat[] = MN_NAME_PAT;
+static regex_t mn_name_re;
+
+static int mn_compiled = 0;
+
+void
+mn_get_regexps( label_re, name_re, who )
+ regex_t **label_re;
+ regex_t **name_re;
+ tCC *who;
+{
+ if (! mn_compiled)
+ {
+ compile_re (mn_label_pat, &mn_label_re, 1, "label pattern", who);
+ compile_re (mn_name_pat, &mn_name_re, 1, "name pattern", who);
+ mn_compiled++;
+ }
+ *label_re = &mn_label_re;
+ *name_re = &mn_name_re;
+}
===================================================================
Index: fixinc/fixlib.h
--- fixinc/fixlib.h 2000/01/17 21:45:29 1.5
+++ fixinc/fixlib.h 2000/01/20 06:24:12
@@ -99,4 +99,6 @@ char * load_file_data _P_(( FILE* fp ));
t_bool is_cxx_header _P_(( tCC* filename, tCC* filetext ));
void compile_re _P_(( tCC* pat, regex_t* re, int match,
tCC *e1, tCC *e2 ));
+void mn_get_regexps _P_(( regex_t** label_re, regex_t** name_re,
+ tCC *who ));
#endif /* FIXINCLUDES_FIXLIB_H */
===================================================================
Index: fixinc/fixtests.c
--- fixinc/fixtests.c 2000/01/19 21:41:04 1.9
+++ fixinc/fixtests.c 2000/01/20 06:24:12
@@ -58,7 +58,8 @@ typedef struct {
#define FIX_TEST_TABLE \
_FT_( "double_slash", double_slash_test ) \
- _FT_( "else_endif_label", else_endif_label_test )
+ _FT_( "else_endif_label", else_endif_label_test ) \
+ _FT_( "machine_name", machine_name_test )
#define TEST_FOR_FIX_PROC_HEAD( test ) \
@@ -265,6 +266,52 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
text = pz_next;
} /* for (entire file) loop */
+ return SKIP_FIX;
+}
+
+TEST_FOR_FIX_PROC_HEAD( machine_name_test )
+{
+ regex_t *label_re, *name_re;
+ regmatch_t match[2];
+ tCC *base, *limit;
+
+ mn_get_regexps(&label_re, &name_re, "machine_name_test");
+
+ for (base = text;
+ regexec (label_re, base, 2, match, 0) == 0;
+ base = limit)
+ {
+ base += match[0].rm_eo;
+ /* We're looking at an #if or #ifdef. Scan forward for the
+ next non-escaped newline. */
+ limit = base;
+ do
+ {
+ limit++;
+ limit = strchr (limit, '\n');
+ if (!limit)
+ return SKIP_FIX;
+ }
+ while (limit[-1] == '\\');
+
+ /* If the 'name_pat' matches in between base and limit, we have
+ a bogon. It is not worth the hassle of excluding comments,
+ because comments on #if/#ifdef/#ifndef lines are rare,
+ and strings on such lines are illegal.
+
+ REG_NOTBOL means 'base' is not at the beginning of a line, which
+ shouldn't matter since the name_re has no ^ anchor, but let's
+ be accurate anyway. */
+
+ if (regexec (name_re, base, 1, match, REG_NOTBOL))
+ return SKIP_FIX; /* No match in file - no fix needed */
+
+ /* Match; is it on the line? */
+ if (match[0].rm_eo < limit - base)
+ return APPLY_FIX; /* Yup */
+
+ /* Otherwise, keep looking... */
+ }
return SKIP_FIX;
}
===================================================================
Index: fixinc/gnu-regex.h
--- fixinc/gnu-regex.h 1999/09/04 15:09:01 1.2
+++ fixinc/gnu-regex.h 2000/01/20 06:24:13
@@ -36,11 +36,6 @@ extern "C" {
# include <stddef.h>
#endif
-/* GDB LOCAL: define _REGEX_RE_COMP to get BSD style re_comp and re_exec */
-#ifndef _REGEX_RE_COMP
-#define _REGEX_RE_COMP
-#endif
-
/* The following two types have to be signed and unsigned integer type
wide enough to hold a value of a pointer. For most ANSI compilers
ptrdiff_t and size_t should be likely OK. Still size of these two
===================================================================
Index: fixinc/gnu-regex.c
--- fixinc/gnu-regex.c 1999/05/20 07:10:38 1.2
+++ fixinc/gnu-regex.c 2000/01/20 06:28:34
@@ -153,11 +153,6 @@ char *realloc ();
/* How many characters in the character set. */
# define CHAR_SET_SIZE 256
-/* GDB LOCAL: define _REGEX_RE_COMP to get BSD style re_comp and re_exec */
-#ifndef _REGEX_RE_COMP
-#define _REGEX_RE_COMP
-#endif
-
# ifdef SYNTAX_TABLE
extern char *re_syntax_table;
@@ -5561,7 +5556,8 @@ re_exec (s)
REG_EXTENDED bit in CFLAGS is set; otherwise, to
RE_SYNTAX_POSIX_BASIC;
`newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' and `fastmap_accurate' to zero;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to 1;
`re_nsub' to the number of subexpressions in PATTERN.
PATTERN is the address of the pattern string.
@@ -5600,11 +5596,8 @@ regcomp (preg, pattern, cflags)
preg->allocated = 0;
preg->used = 0;
- /* Don't bother to use a fastmap when searching. This simplifies the
- REG_NEWLINE case: if we used a fastmap, we'd have to put all the
- characters after newlines into the fastmap. This way, we just try
- every character. */
- preg->fastmap = 0;
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
if (cflags & REG_ICASE)
{
@@ -5643,6 +5636,19 @@ regcomp (preg, pattern, cflags)
/* POSIX doesn't distinguish between an unmatched open-group and an
unmatched close-group: both are REG_EPAREN. */
if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ if (ret == REG_NOERROR && preg->fastmap)
+ {
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. */
+ if (re_compile_fastmap (preg) == -2)
+ {
+ /* Some error occured while computing the fastmap, just forget
+ about it. */
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+ }
return (int) ret;
}
===================================================================
Index: fixinc/inclhack.def
--- fixinc/inclhack.def 2000/01/19 21:41:04 1.48
+++ fixinc/inclhack.def 2000/01/20 06:24:13
@@ -1253,93 +1253,11 @@ fix = {
/*
* Fix non-ansi machine name defines
- * File selection is split into two parts: the shell version as
- * a single patch, and the program version with each patch separate.
- * Each is substantially faster for the particular environment.
- * You have a dual maintenance problem here.
*/
fix = {
hackname = machine_name;
- /*
- * Select '#if.*' and '#elif" with possible non-ansi symbols
- * The only non-ansi symbols we know about start with one of:
- * MRS_bhimnprstuv
- * If any are added to the substitution list, then add it to
- * the selection list as well. Hopefully we can avoid names
- * starting with "d" and "l", because this pattern would then
- * match "defined" and "lint" as well. I suppose we could add
- * a "bypass = lint" if we had to though.
- *
- * The fixinc_eol stuff is to work around a bug in the sed
- */
- select = "^#[ \t]*(if|elif).*"
- "[^a-zA-Z0-9_](_*[MSRrhim]|[Mbimnpstuv])[a-zA-Z0-9_]";
- exesel = "^#[ \t]*(if|elif).*[^a-zA-Z0-9_]"
- "("
- "M32"
- "|_*MIPSE[LB]"
- "|_*SYSTYPE_[A-Z0-9]"
- "|_*[Rr][34]000"
- "|_*host_mips"
- "|_*i386"
- "|_*mips"
- "|bsd4"
- "|is68k"
- "|m[68]8k"
- "|mc680"
- "|news"
- "|ns32000"
- "|pdp11"
- "|pyr"
- "|sel"
- "|sony_news"
- "|sparc"
- "|sun"
- "|tahoe"
- "|tower"
- "|u370"
- "|u3b"
- "|unix"
- "|vax"
- ")";
-
- sed = ":loop\n"
- '/\\\\$/' "N\n"
- 's/\\\\$/\\\\+++fixinc_eol+++/' "\n"
- '/\\\\$/' "b loop\n"
- 's/\\\\+++fixinc_eol+++/\\\\/g' "\n"
-
- "/#[\t ]*[el]*if/ {\n"
- "\ts/[a-zA-Z0-9_][a-zA-Z0-9_]*/ & /g\n"
-
- "\ts/ M32 / __M32__ /g\n"
- "\ts/ _*MIPSE\\([LB]\\) / __MIPSE\\1__ /g\n"
- "\ts/ _*SYSTYPE_\\([A-Z0-9]*\\) / __SYSTYPE_\\1__ /g\n"
- "\ts/ _*\\([Rr][34]\\)000 / __\\1000__ /g\n"
- "\ts/ _*host_mips / __host_mips__ /g\n"
- "\ts/ _*i386 / __i386__ /g\n"
- "\ts/ _*mips / __mips__ /g\n"
- "\ts/ bsd4\\([0-9]\\) / __bsd4\\1__ /g\n"
- "\ts/ is68k / __is68k__ /g\n"
- "\ts/ m68k / __m68k__ /g\n"
- "\ts/ m88k / __m88k__ /g\n"
- "\ts/ mc680\\([0-9]\\)0 / __mc680\\10__ /g\n"
- "\ts/ news\\([0-9]*\\) / __news\\1__ /g\n"
- "\ts/ ns32000 / __ns32000__ /g\n"
- "\ts/ pdp11 / __pdp11__ /g\n"
- "\ts/ pyr / __pyr__ /g\n"
- "\ts/ sel / __sel__ /g\n"
- "\ts/ sony_news / __sony_news__ /g\n"
- "\ts/ sparc / __sparc__ /g\n"
- "\ts/ sun\\([a-z0-9]*\\) / __sun\\1__ /g\n"
- "\ts/ tahoe / __tahoe__ /g\n"
- "\ts/ tower\\([_0-9]*\\) / __tower\\1__ /g\n"
- "\ts/ u370 / __u370__ /g\n"
- "\ts/ u3b\\([0-9]*\\) / __u3b\\1__ /g\n"
- "\ts/ unix / __unix__ /g\n"
- "\ts/ vax / __vax__ /g\n"
-
- "\ts/ \\([a-zA-Z0-9_][a-zA-Z0-9_]*\\) /\\1/g\n\t}";
+ c_test = machine_name;
+ c_fix = machine_name;
};