Fix to switch decoder

Neil Booth neil@daikokuya.co.uk
Sun Jun 22 13:42:00 GMT 2003


There was a subtle bug in the switch decoder that has only recently
been possible, owing to the variety of switches that are now handled.

Suppose the list of switches contains "-g" and "-gen-decls" in that
order, as it does now.

A command line switch of "-gstabs" should match "-g" with argument
"stabs".  However, if the binary search happens to land on "-gen-decls"
without seeing "-g", then "-gen-decls" becomes the lower bound of the
binary search window as "-gen-decls" < "-gstabs", and "-g" is never
seen.  This causes "-gstabs" to be reported as an unknown switch.  This
has never happened until now since switches taking joined arguments
have not been sufficiently short to cause such ambiguities.  -ffixed-X
is another switch that had this problem, because of the existence of
-ffixed-form and -ffixed-length-.

This patch cures that whilst retaining the efficiency of a binary
search; each lookup requires between C + 1 and C + 3 strcmps, where
C = (int) log2 (N) where N is the number of switches.  It even
simplifies the lookup code slightly.

Tested with mawk and gawk, and bootstrapped x86 NetBSD for C, C++,
ObjC and F77.

Neil.

	* opts.c (find_opt): Fix to always guarantee a find of a
	switch with joined parameter.
	* opts.h (struct cl_option): New member back_chain.
	* opts.sh: Update to calculate and add back_chain member.

Index: opts.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/opts.c,v
retrieving revision 1.11
diff -u -p -r1.11 opts.c
--- opts.c	21 Jun 2003 20:28:16 -0000	1.11
+++ opts.c	22 Jun 2003 08:59:54 -0000
@@ -131,93 +131,87 @@ static void handle_param (const char *);
 static void set_Wextra (int);
 
 /* Perform a binary search to find which option the command-line INPUT
-   matches.  Returns its index in the option array, and N_OPTS on
-   failure.
+   matches.  Returns its index in the option array, and N_OPTS
+   (cl_options_count) on failure.
 
-   Complications arise since some options can be suffixed with an
-   argument, and multiple complete matches can occur, e.g. -pedantic
-   and -pedantic-errors.  Also, some options are only accepted by some
-   languages.  If a switch matches for a different language and
-   doesn't match any alternatives for the true front end, the index of
-   the matched switch is returned anyway.  The caller should check for
-   this case.  */
+   This routine is quite subtle.  A normal binary search is not good
+   enough because some options can be suffixed with an argument, and
+   multiple sub-matches can occur, e.g. input of "-pedantic" matching
+   the initial substring of "-pedantic-errors".
+
+   A more complicated example is -gstabs.  It should match "-g" with
+   an argument of "stabs".  Suppose, however, that the number and list
+   of switches are such that the binary search tests "-gen-decls"
+   before having tested "-g".  This doesn't match, and as "-gen-decls"
+   is less than "-gstabs", it will become the lower bound of the
+   binary search range, and "-g" will never be seen.  To resolve this
+   issue, opts.sh makes "-gen-decls" point, via the back_chain member,
+   to "-g" so that failed searches that end between "-gen-decls" and
+   the lexicographically subsequent switch know to go back and see if
+   "-g" causes a match (which it does in this example).
+
+   This search is done in such a way that the longest match for the
+   front end in question wins.  If there is no match for the current
+   front end, the longest match for a different front end is returned
+   (or N_OPTS if none) and the caller emits an error message.  */
 static size_t
 find_opt (const char *input, int lang_mask)
 {
-  size_t md, mn, mx;
-  size_t opt_len;
-  size_t result = cl_options_count;
+  size_t mn, mx, md, opt_len;
+  size_t match_wrong_lang;
   int comp;
 
   mn = 0;
   mx = cl_options_count;
 
-  while (mx > mn)
+  /* Find mn such this lexicographical inequality holds:
+     cl_options[mn] <= input < cl_options[mn + 1].  */
+  while (mx - mn > 1)
     {
       md = (mn + mx) / 2;
-
       opt_len = cl_options[md].opt_len;
       comp = strncmp (input, cl_options[md].opt_text, opt_len);
 
       if (comp < 0)
 	mx = md;
-      else if (comp > 0)
-	mn = md + 1;
       else
-	{
-	  /* The switch matches.  It it an exact match?  */
-	  if (input[opt_len] == '\0')
-	    return md;
-	  else
-	    {
-	      mn = md + 1;
-
-	      /* If the switch takes no arguments this is not a proper
-		 match, so we continue the search (e.g. input="stdc++"
-		 match was "stdc").  */
-	      if (!(cl_options[md].flags & CL_JOINED))
-		continue;
-
-	      /* Is this switch valid for this front end?  */
-	      if (!(cl_options[md].flags & lang_mask))
-		{
-		  /* If subsequently we don't find a better match,
-		     return this and let the caller report it as a bad
-		     match.  */
-		  result = md;
-		  continue;
-		}
-
-	      /* Two scenarios remain: we have the switch's argument,
-		 or we match a longer option.  This can happen with
-		 -iwithprefix and -withprefixbefore.  The longest
-		 possible option match succeeds.
-
-		 Scan forwards, and return an exact match.  Otherwise
-		 return the longest valid option-accepting match (mx).
-		 This loops at most twice with current options.  */
-	      mx = md;
-	      for (md = md + 1; md < cl_options_count; md++)
-		{
-		  opt_len = cl_options[md].opt_len;
-		  comp = strncmp (input, cl_options[md].opt_text, opt_len);
-		  if (comp < 0)
-		    break;
-		  if (comp > 0)
-		    continue;
-		  if (input[opt_len] == '\0')
-		    return md;
-		  if (cl_options[md].flags & lang_mask
-		      && cl_options[md].flags & CL_JOINED)
-		    mx = md;
-		}
+	mn = md;
+    }
 
-	      return mx;
-	    }
+  /* This is the switch that is the best match but for a different
+     front end, or cl_options_count if there is no match at all.  */
+  match_wrong_lang = cl_options_count;
+
+  /* Backtrace the chain of possible matches, returning the longest
+     one, if any, that fits best.  With current GCC switches, this
+     loop executes at most twice.  */
+  do
+    {
+      const struct cl_option *opt = &cl_options[mn];
+
+      /* Is this switch a prefix of the input?  */
+      if (!strncmp (input, opt->opt_text, opt->opt_len))
+	{
+	  /* If language is OK, and the match is exact or the switch
+	     takes a joined argument, return it.  */
+	  if ((opt->flags & lang_mask)
+	      && (input[opt->opt_len] == '\0' || (opt->flags & CL_JOINED)))
+	    return mn;
+
+	  /* If we haven't remembered a prior match, remember this
+	     one.  Any prior match is necessarily better.  */
+	  if (match_wrong_lang != cl_options_count)
+	    match_wrong_lang = mn;
 	}
+
+      /* Try the next possibility.  This is cl_options_count if there
+	 are no more.  */
+      mn = opt->back_chain;
     }
+  while (mn != cl_options_count);
 
-  return result;
+  /* Return the best wrong match, or cl_options_count if none.  */
+  return match_wrong_lang;
 }
 
 /* If ARG is a non-negative integer made up solely of digits, return its
Index: opts.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/opts.h,v
retrieving revision 1.5
diff -u -p -r1.5 opts.h
--- opts.h	16 Jun 2003 05:47:04 -0000	1.5
+++ opts.h	22 Jun 2003 08:59:54 -0000
@@ -26,6 +26,7 @@ extern int handle_option (int argc, char
 struct cl_option
 {
   const char *opt_text;
+  unsigned short back_chain;
   unsigned char opt_len;
   unsigned int flags;
 };
Index: opts.sh
===================================================================
RCS file: /cvs/gcc/gcc/gcc/opts.sh,v
retrieving revision 1.16
diff -u -p -r1.16 opts.sh
--- opts.sh	20 Jun 2003 20:07:51 -0000	1.16
+++ opts.sh	22 Jun 2003 08:59:54 -0000
@@ -100,21 +100,43 @@ ${AWK} '
 	print "const unsigned int cl_options_count = N_OPTS;\n" >> c_file
 	print "const struct cl_option cl_options[] =\n{" >> c_file
 
+	for (i = 0; i < n_opts; i++)
+	    back_chain[i] = "N_OPTS";
+
 	for (i = 0; i < n_opts; i++) {
+	    # Combine the flags of identical switches.  Switches
+	    # appear many times if they are handled by many front
+	    # ends, for example.
 	    while( i + 1 != n_opts && opts[i] == opts[i + 1] ) {
 		flags[i + 1] = flags[i] " " flags[i + 1];
 		i++;
 	    }
 
+	    len = length (opts[i]);
 	    enum = "OPT_" opts[i]
-	    gsub( "[^A-Za-z0-9]", "_", enum)
+	    gsub ("[^A-Za-z0-9]", "_", enum)
+
+	    # If this switch takes joined arguments, back-chain all
+	    # subsequent switches to it for which it is a prefix.  If
+	    # a later switch S is a longer prefix of a switch T, T
+	    # will be back-chained to S in a later iteration of this
+	    # for() loop, which is what we want.
+	    if (flags[i] ~ "Joined") {
+		for (j = i + 1; j < n_opts; j++) {
+		    if (substr (opts[j], 1, len) != opts[i])
+			break;
+		    back_chain[j] = enum;
+		}
+	    }
+
 	    s = substr("                                  ", length (opts[i]))
 	    if (i + 1 == n_opts)
 		comma = ""
 
 	    printf("  %s,%s/* -%s */\n", enum, s, opts[i]) >> h_file
-	    printf("  { \"%s\", %u, %s }%s\n", opts[i], \
-		length(opts[i]), switch_flags(flags[i]), comma) >> c_file
+	    printf("  { \"%s\", (unsigned short) %s, %u,\n\t%s }%s\n",
+		   opts[i], back_chain[i], len, switch_flags(flags[i]),
+		   comma) >> c_file
 	}
 
 	print "  N_OPTS\n};"				>> h_file



More information about the Gcc-patches mailing list