fixincludes grabbag, revised _again_

Zack Weinberg zack@wolery.cumb.org
Wed Jan 19 11:41:00 GMT 2000


On Wed, Jan 19, 2000 at 07:44:08AM -0800, Bruce Korb wrote:
> Zack wrote:
> > Here is yet another revision of the fixincludes grabbag.  The change
> > this time around is a brand new shiny way to do the machine_name fix.
> > As Bruce suggested, it only touches macros that are defined by gcc
> > without -ansi, but are not defined with that switch.
> 
> I did not intend to make that suggestion, but that is an interesting
> idea.  I like it as long as all the appropriate ports do, in fact,
> define the non-ansi symbols that are used in broken headers.
> I do not know that for fact.

I think it is a fair assumption to make, but admittedly it'd be nice
to have it verified.

> It would be nice if true, but I still
> am not completely certain of how to obtain the information in a
> double-cross build environment.  (How would one get xgcc usage
> information at fixincl compile time?)

How is the specs file generated in a double cross?  The patch extracts
all the relevant information from there.

[...]
> The last patch you sent out looked pretty much okay.
> Please apply those and separate out the rest.  Especially the
> change to gcc.c, since I cannot approve changes to that one.

Well, I have lingering nerves about the ioctl fixes, but OK.  Here is
the patch I will commit if the build I'm running right now completes
successfully.

zw

	* fixinc/Makefile.in: Correct dependencies of fixincl and fixincl.o.
	* fixfixes.c (IO_use, CTRL_use, IO_defn, CTRL_defn): New fixes.
	(fix_char_macro_defines, fix_char_macro_uses): New functions.
	* fixlib.c (is_cxx_header): Do the text scan with a regexp.
	Recognize Emacs mode markers also.
	* fixtests.c (else_endif_label): Fix bug in recognition of
	C++ comments in C++ headers.  Call is_cxx_header only if
	necessary.
	* inclhack.def (avoid_bool): Add select for the problem and
	bypass for ncurses.
	(bsd43_io_macros, io_def_quotes, ioctl_fix_ctrl): Replace with...
	(io_def_quotes, io_use_quotes, ctrl_def_quotes, ctrl_use_quotes):
	... these, which use the new C fixes.
	(math_exception): Escape literal '+' in bypass expression.

===================================================================
Index: Makefile.in
--- Makefile.in	1999/10/19 13:22:45	1.11
+++ Makefile.in	2000/01/19 19:26:19
@@ -76,7 +76,7 @@ gen : $(SH_TARGET) fixincl.x
 
 $(FIOBJ): $(HDR)
 
-fixincl: $(FIOBJ) fixfixes fixtests
+fixincl: $(FIOBJ)
 	@echo $(CC) -o $@ $(FIOBJ) $(LIBERTY) $(LIB) ; \
 	if $(CC) -o $@ $(FIOBJ) $(LIBERTY) $(LIB) ; then : ; else \
 	rm -f $@ ; (echo "#! /bin/sh" ; echo exit 1 ) > $@ ; \
@@ -94,7 +94,7 @@ gnu-regex.o: gnu-regex.c
 	-$(CC) $(CFLAGS) $(FIXINC_DEFS) $(INCLUDES) -DREGEX_MALLOC \
 		-c $(srcdir)/gnu-regex.c
 
-fixincl.o : fixincl.x fixincl.c
+fixincl.o : fixincl.x fixincl.c fixfixes.c fixtests.c
 server.o : server.c server.h
 procopen.o : procopen.c server.h
 
===================================================================
Index: fixfixes.c
--- fixfixes.c	2000/01/17 21:45:28	1.6
+++ fixfixes.c	2000/01/19 19:26:19
@@ -73,7 +73,11 @@ typedef struct {
 
 #define FIXUP_TABLE \
   _FT_( "no_double_slash",  double_slash_fix ) \
-  _FT_( "else_endif_label", else_endif_label_fix )
+  _FT_( "else_endif_label", else_endif_label_fix ) \
+  _FT_( "IO_use",	    IO_use_fix ) \
+  _FT_( "CTRL_use",	    CTRL_use_fix) \
+  _FT_( "IO_defn",	    IO_defn_fix ) \
+  _FT_( "CTRL_defn",	    CTRL_defn_fix )
 
 
 #define FIX_PROC_HEAD( fix ) \
@@ -353,6 +357,196 @@ FIX_PROC_HEAD( else_endif_label_fix )
 
   return;
 }
+
+/* Scan the input file for all occurrences of text like this:
+
+   #define TIOCCONS _IO(T, 12)
+
+   and change them to read like this:
+
+   #define TIOCCONS _IO('T', 12)
+
+   which is the required syntax per the C standard.  (The definition of
+   _IO also has to be tweaked - see below.)  'IO' is actually whatever you
+   provide in the STR argument.  */
+void
+fix_char_macro_uses (text, str)
+     const char *text;
+     const char *str;
+{
+  /* This regexp looks for a traditional-syntax #define (# in column 1)
+     of an object-like macro.  */
+  static const char pat[] =
+    "^#[ \t]*define[ \t]+[A-Za-z][A-Za-z0-9]*[ \t]+";
+  static regex_t re;
+
+  regmatch_t rm[1];
+  const char *p, *limit;
+  size_t len = strlen (str);
+
+  compile_re (pat, &re, 1, "macro pattern", "fix_char_macro_uses");
+
+  for (p = text;
+       regexec (&re, p, 1, rm, 0) == 0;
+       p = limit + 1)
+    {
+      /* p + rm[0].rm_eo is the first character of the macro replacement.
+	 Find the end of the macro replacement, and the STR we were
+	 sent to look for within the replacement.  */
+      p += rm[0].rm_eo;
+      limit = p - 1;
+      do
+	{
+	  limit = strchr (limit + 1, '\n');
+	  if (!limit)
+	    goto done;
+	}
+      while (limit[-1] == '\\');
+
+      do
+	{
+	  if (*p == str[0] && !strncmp (p+1, str+1, len-1))
+	    goto found;
+	}
+      while (++p < limit - len);
+      /* Hit end of line.  */
+      continue;
+
+    found:
+      /* Found STR on this line.  If the macro needs fixing,
+	 the next few chars will be whitespace or uppercase,
+	 then an open paren, then a single letter.  */
+      while ((isspace (*p) || isupper (*p)) && p < limit) p++;
+      if (*p++ != '(')
+	continue;
+      if (!isalpha (*p))
+	continue;
+      if (isalnum (p[1]) || p[1] == '_')
+	continue;
+
+      /* Splat all preceding text into the output buffer,
+	 quote the character at p, then proceed.  */
+      fwrite (text, 1, p - text, stdout);
+      putchar ('\'');
+      putchar (*p);
+      putchar ('\'');
+      text = p + 1;
+    }
+ done:
+  fputs (text, stdout);
+}
+
+/* Scan the input file for all occurrences of text like this:
+
+   #define _IO(x, y) ('x'<<16+y)
+
+   and change them to read like this:
+
+   #define _IO(x, y) (x<<16+y)
+
+   which is the required syntax per the C standard.  (The uses of _IO
+   also have to be tweaked - see above.)  'IO' is actually whatever
+   you provide in the STR argument.  */
+void
+fix_char_macro_defines (text, str)
+     const char *text;
+     const char *str;
+{
+  /* This regexp looks for any traditional-syntax #define (# in column 1).  */
+  static const char pat[] =
+    "^#[ \t]*define[ \t]+";
+  static regex_t re;
+
+  regmatch_t rm[1];
+  const char *p, *limit;
+  size_t len = strlen (str);
+  char arg;
+
+  compile_re (pat, &re, 1, "macro pattern", "fix_char_macro_defines");
+
+  for (p = text;
+       regexec (&re, p, 1, rm, 0) == 0;
+       p = limit + 1)
+    {
+      /* p + rm[0].rm_eo is the first character of the macro name.
+	 Find the end of the macro replacement, and the STR we were
+	 sent to look for within the name.  */
+      p += rm[0].rm_eo;
+      limit = p - 1;
+      do
+	{
+	  limit = strchr (limit + 1, '\n');
+	  if (!limit)
+	    goto done;
+	}
+      while (limit[-1] == '\\');
+
+      do
+	{
+	  if (*p == str[0] && !strncmp (p+1, str+1, len-1))
+	    goto found;
+	}
+      while (isalpha (*p) || isalnum (*p) || *p == '_');
+      /* Hit end of macro name without finding the string.  */
+      continue;
+
+    found:
+      /* Found STR in this macro name.  If the macro needs fixing,
+	 there may be a few uppercase letters, then there will be an
+	 open paren with _no_ intervening whitespace, and then a
+	 single letter.  */
+      while (isupper (*p) && p < limit) p++;
+      if (*p++ != '(')
+	continue;
+      if (!isalpha (*p))
+	continue;
+      if (isalnum (p[1]) || p[1] == '_')
+	continue;
+
+      /* The character at P is the one to look for in the following
+	 text.  */
+      arg = *p;
+      p += 2;
+
+      while (p < limit)
+	{
+	  if (p[-1] == '\'' && p[0] == arg && p[1] == '\'')
+	    {
+	      /* Remove the quotes from this use of ARG.  */
+	      p--;
+	      fwrite (text, 1, p - text, stdout);
+	      putchar (arg);
+	      p += 3;
+	      text = p;
+	    }
+	  else
+	    p++;
+	}
+    }
+ done:
+  fputs (text, stdout);
+}
+
+/* The various prefixes on these macros are handled automatically
+   because the fixers don't care where they start matching.  */
+FIX_PROC_HEAD( IO_use_fix )
+{
+  fix_char_macro_uses (text, "IO");
+}
+FIX_PROC_HEAD( CTRL_use_fix )
+{
+  fix_char_macro_uses (text, "CTRL");
+}
+
+FIX_PROC_HEAD( IO_defn_fix )
+{
+  fix_char_macro_defines (text, "IO");
+}
+FIX_PROC_HEAD( CTRL_defn_fix )
+{
+  fix_char_macro_defines (text, "CTRL");
+}
+
 
 /* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
 
===================================================================
Index: fixlib.c
--- fixlib.c	2000/01/17 21:45:29	1.5
+++ fixlib.c	2000/01/19 19:26:19
@@ -112,43 +112,27 @@ is_cxx_header (fname, text)
         }
     } not_cxx_name:;
 
-  /* Or it might contain the phrase 'extern "C++"' */
-  for (;;)
+  /* Or it might contain one of several phrases which indicate C++ code.
+     Currently recognized are:
+     extern "C++"
+     -*- (Mode: )? C++ -*-   (emacs mode marker)
+     template <
+   */
     {
-      tSCC zExtern[]   = "extern";
-      tSCC zExtCxx[]   = "\"C++\"";
-      tSCC zTemplate[] = "template";
+      tSCC cxxpat[] = "\
+extern[ \t]*\"C\\+\\+\"|\
+-\\*-[ \t]*([mM]ode:[ \t]*)?[cC]\\+\\+[; \t]*-\\*-|\
+template[ \t]*<";
+      static regex_t cxxre;
+      static int compiled;
 
-      switch (*(text++))
-        {
-        case 'e':
-          /*  Check for "extern \"C++\"" */
-          if (strncmp (text, zExtern+1, sizeof( zExtern )-2) != 0)
-            break;
-          text += sizeof( zExtern )-2;
-          if (! isspace( *(text++)) )
-            break;
-          while (isspace( *text ))  text++;
-          if (strncmp (text, zExtCxx, sizeof (zExtCxx) -1) == 0)
-            return BOOL_TRUE;
-          break;
+      if (!compiled)
+	compile_re (cxxpat, &cxxre, 0, "contents check", "is_cxx_header");
 
-        case 't':
-          /*  Check for "template<" */
-          if (strncmp (text, zTemplate+1, sizeof( zTemplate )-2) != 0)
-            break;
-          text += sizeof( zTemplate )-2;
-          while (isspace( *text ))  text++;
-          if (*text == '<')
-            return BOOL_TRUE;
-          break;
-
-        case NUL:
-          goto text_done;
-          break;
-        }
-    } text_done:;
-
+      if (regexec (&cxxre, text, 0, 0, 0) == 0)
+	return BOOL_TRUE;
+    }
+		   
   return BOOL_FALSE;
 }
 
===================================================================
Index: fixtests.c
--- fixtests.c	2000/01/17 21:45:29	1.8
+++ fixtests.c	2000/01/19 19:26:19
@@ -153,9 +153,9 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
   static regex_t label_re;
 
   char ch;
-  tCC* pz_next = (char*)NULL;
+  tCC* pz_next;
+  tCC* all_text = text;
   regmatch_t match[2];
-  t_bool file_is_cxx = is_cxx_header( fname, text );
 
   /*
      This routine may be run many times within a single execution.
@@ -170,21 +170,15 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
 
   for (;;) /* entire file */
     {
-      /*
-        See if we need to advance to the next candidate directive
-        If the scanning pointer passes over the end of the directive,
-        then the directive is inside a comment */
-      if (pz_next < text)
+      /* Find the next else or endif in the file.  */
+      if (regexec (&label_re, text, 2, match, 0) != 0)
+	break;
+      pz_next = text + match[0].rm_eo;
+
+      /* Scan from where we are up to that position, to make sure
+	 we didn't find something in a string or comment.  */
+      while (pz_next > text)
         {
-          if (regexec (&label_re, text, 2, match, 0) != 0)
-            break;
-          pz_next = text + match[0].rm_eo;
-        }
-
-      /*
-        IF the scan pointer has not reached the directive end, ... */
-      if (pz_next > text)
-        {
           /*
             Advance the scanning pointer.  If we are at the start
             of a quoted string or a comment, then skip the entire unit */
@@ -209,34 +203,23 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
             case '\'':
               text = skip_quote( ch, text );
               break;
-            } /* switch (ch) */
-          continue;
-        } /* if (still shy of directive end) */
+            }
+        }
+      if (pz_next < text)
+	continue;
 
-      /*
-         The scanning pointer (text) has reached the end of the current
-         directive under test, then check for bogons here */
-      for (;;) /* bogon check */
+      /* We're at the end of a real directive.  Check for bogons here.  */
+      for (;;)
         {
           char ch = *(pz_next++);
-          if (isspace (ch))
-            {
-              if (ch == '\n')
-                {
-                  /*
-                    It is clean.  No bogons on this directive */
-                  text = pz_next;
-                  pz_next = (char*)NULL; /* force a new regex search */
-                  break;
-                }
-              continue;
-            }
+	  switch (ch)
+	    {
+	    case '\n':
+	      /* It is clean.  No bogons on this directive.  */
+	      goto next_directive;
 
-          switch (ch)
-            {
             case '\\':
-              /*
-                Skip escaped newlines.  Otherwise, we have a bogon */
+              /* Skip escaped newlines.  Otherwise, we have a bogon.  */
               if (*pz_next != '\n')
                 return APPLY_FIX;
 
@@ -244,46 +227,47 @@ TEST_FOR_FIX_PROC_HEAD( else_endif_label
               break;
 
             case '/':
-              /*
-                Skip comments.  Otherwise, we have a bogon */
-              switch (*pz_next)
-                {
-                case '/':
-                  /* IF we found a "//" in a C header, THEN fix it. */
-                  if (! file_is_cxx)
+              /* Skip comments.  Otherwise, we have a bogon */
+	      switch (*pz_next)
+		{
+		case '/':
+		  /* // in a C header is a bogon.  */
+                  if (! is_cxx_header( fname, all_text ))
                     return APPLY_FIX;
 
-                  /* C++ header.  Skip to newline and continue. */
+                  /* C++ comment is allowed in a C++ header.
+		     Skip to newline and continue. */
                   pz_next = strchr( pz_next+1, '\n' );
                   if (pz_next == (char*)NULL)
                     return SKIP_FIX;
                   pz_next++;
-                  break;
+                  goto next_directive;
 
-                case '*':
+		case '*':
                   /* A comment for either C++ or C.  Skip over it. */
                   pz_next = strstr( pz_next+1, "*/" );
                   if (pz_next == (char*)NULL)
                     return SKIP_FIX;
                   pz_next += 2;
-                  break;
+		  break;
 
-                default:
-                  /* a '/' followed by other junk. */
-                  return APPLY_FIX;
-                }
-              break; /* a C or C++ comment */
+		default:
+		  return APPLY_FIX;
+		}
+	      break;
 
             default:
-              /*
-                GOTTA BE A BOGON */
-              return APPLY_FIX;
+	      if (!isspace (ch))
+		return APPLY_FIX;
             } /* switch (ch) */
         } /* for (bogon check loop) */
+    next_directive:;
+      text = pz_next;
     } /* for (entire file) loop */
 
   return SKIP_FIX;
 }
+
 
 /* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
 
===================================================================
Index: inclhack.def
--- inclhack.def	2000/01/17 21:45:29	1.47
+++ inclhack.def	2000/01/19 19:26:20
@@ -631,6 +631,9 @@ fix = {
     files    = term.h;
     files    = tinfo.h;
 
+    select   = "char[ \t]+bool|bool[ \t]+char";
+    bypass   = "we must use the C\\+\\+ compiler's type";
+
     sed = "/^#[ \t]*define[ \t][ \t]*bool[ \t][ \t]*char[ \t]*$/i\\\n"
                 "#ifndef __cplusplus\n";
 
@@ -749,21 +752,6 @@ fix = {
 
 
 /*
- *  Note that BSD43_* are used on recent MIPS systems.
- */
-fix = {
-    hackname = bsd43_io_macros;
-    select   = "BSD43__IO";
-    /*
-     *  Put single quotes aroung the character that appears after '('
-     *  and before ',', UNLESS it is a 'c' or 'g' or 'x'.
-     */
-    sed = "/[ \t]BSD43__IO[A-Z]*[ \t]*(/"       's/(\(.\),/(\'\1\',/';
-    sed = "/#[ \t]*define[ \t]*[ \t]BSD43__IO/" 's/\'\([cgx]\)\'/\1/g';
-};
-
-
-/*
  *  Fix <c_asm.h> on Digital UNIX V4.0:
  *  It contains a prototype for a DEC C internal asm() function,
  *  clashing with gcc's asm keyword.  So protect this with __DECC.
@@ -970,52 +958,45 @@ fix = {
 };
 
 /*
- *  Fix various _IO* defines, but do *not* quote the characters cgxtf.
+ *  Fix various macros used to define ioctl numbers.  The traditional
+ *  syntax was
+ *  #define _IO(n, x) (('n'<<8)+x)
+ *  #define TIOCFOO _IO(T, 1)
+ *  but this does not work with the C standard, which disallows macro
+ *  expansion inside strings.  We have to rewrite it thus:
+ *  #define _IO(n, x) ((n<<8)+x)
+ *  #define TIOCFOO  _IO('T', 1)
+ *  The select expressions match too much, but the c_fix code is cautious.
+ *
+ *  _IO might be: _IO DESIO BSD43__IO with W, R, WR, C, ... suffixes.
  */
 fix = {
-    hackname = io_def_quotes;
-    select = "[ \t]*[ \t](_|DES)IO[A-Z]*[ \t]*\\( *[^,']";
-    sed = "s/\\([ \t]*[ \t]_IO[A-Z]*[ \t]*(\\)\\([^,']\\),/\\1'\\2',/";
-    sed = "s/\\([ \t]*[ \t]DESIO[A-Z]*[ \t]*(\\)\\([^,']\\),/\\1'\\2',/";
-    sed = "/#[ \t]*define[ \t]*[ \t]_IO/"       "s/'\\([cgxtf]\\)'/\\1/g";
-    sed = "/#[ \t]*define[ \t]*[ \t]DESIOC/"    's/\'\([cdgx]\)\'/\1/g';
+    hackname = io_use_quotes;
+    select = "define[ \t]+[A-Z0-9_]+[ \t]+[A-Z0-9_]+IO[A-Z]*[ \t]*\\( *[^,']";
+    c_fix = IO_use;
 };
-
 
+fix = {
+    hackname = io_def_quotes;
+    select = "define[ \t]+[A-Z0-9_]+IO[A-Z]*\\(([a-zA-Z]).*'\\1'";
+    c_fix = IO_defn;
+};
+ 
+ 
 /*
- *  Fix CTRL macros
- *
- * Basically, what is supposed to be happening is that every
- * _invocation_ of the "_CTRL()" or "CTRL()" macros is supposed to have
- * its argument inserted into single quotes.  We _must_ do this because
- * ANSI macro substitution rules prohibit looking inside quoted strings
- * for the substitution names.  A side effect is that the quotes are
- * inserted in the definitions of those macros as well.  So, the last
- * several sed expressions are supposed to clean up the definitions, as
- * long as those definitions are using "c", "g" or "x" as the macro
- * argument :).  Yuck.
+ *  Same deal for CTRL() macros.
+ *  CTRL might be: CTRL _CTRL ISCTRL BSD43_CTRL ...
  */
 fix = {
-    hackname = ioctl_fix_ctrl;
-    select = "CTRL[ \t]*\\(";
-
-    sed = "/[^A-Z0-9_]CTRL[ \t]*(/"
-              "s/\\([^']\\))/'\\1')/";
-
-    sed = "/[^A-Z0-9]_CTRL[ \t]*(/"
-              "s/\\([^']\\))/'\\1')/";
-
-    sed = "/#[ \t]*define[ \t]*[ \t]CTRL/"
-              "s/'\\([cgx]\\)'/\\1/g";
-
-    sed = "/#[ \t]*define[ \t]*[ \t]_CTRL/"
-              "s/'\\([cgx]\\)'/\\1/g";
-
-    sed = "/#[ \t]*define[ \t]*[ \t]BSD43_CTRL/"
-              "s/'\\([cgx]\\)'/\\1/g";
-
-    sed = "/#[ \t]*define[ \t]*[ \t][_]*ISCTRL/"
-              "s/'\\([cgx]\\)'/\\1/g";
+    hackname = ctrl_use_quotes;
+    select = "define[ \t]+[A-Z0-9_]+[ \t]+[A-Z0-9_]+CTRL[ \t]*\\( *[^,']";
+    c_fix = CTRL_use;
+};
+ 
+fix = {
+    hackname = ctrl_def_quotes;
+    select = "define[ \t]+[A-Z0-9_]+CTRL\\(([a-zA-Z]).*'\\1'";
+    c_fix = CTRL_defn;
 };
 
 
@@ -1372,7 +1353,7 @@ fix = {
     hackname = math_exception;
     files    = math.h;
     select   = "struct exception";
-    bypass   = "We have a problem when using C++";
+    bypass   = "We have a problem when using C\\+\\+";
     sed      = "/struct exception/i\\\n"
                "#ifdef __cplusplus\\\n"
                "#define exception __math_exception\\\n"


More information about the Gcc-patches mailing list