This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 0/2] Levenshtein-based suggestions (v3)


Probably coming too late, sorry.

On Thu, Nov 12, 2015 at 09:08:36PM -0500, David Malcolm wrote:
> index 4335a87..eb4e1fc 100644
> --- a/gcc/c/c-typeck.c
> +++ b/gcc/c/c-typeck.c
> @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "c-family/c-ubsan.h"
>  #include "cilk.h"
>  #include "gomp-constants.h"
> +#include "spellcheck.h"
>  
>  /* Possible cases of implicit bad conversions.  Used to select
>     diagnostic messages in convert_for_assignment.  */
> @@ -2242,6 +2243,72 @@ lookup_field (tree type, tree component)
>    return tree_cons (NULL_TREE, field, NULL_TREE);
>  }
>  
> +/* Recursively append candidate IDENTIFIER_NODEs to CANDIDATES.  */
> +
> +static void
> +lookup_field_fuzzy_find_candidates (tree type, tree component,
> +				    vec<tree> *candidates)
> +{
> +  tree field;
> +  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))

I'd prefer declaring field in the for loop, so
  for (tree field = TYPE_FIELDS...

> +	  && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE
> +	      || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE))

This is RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)).

> +	{
> +	  lookup_field_fuzzy_find_candidates (TREE_TYPE (field),
> +					      component,
> +					      candidates);
> +	}

Lose the brackets around a single statement.

> +      if (DECL_NAME (field))
> +	candidates->safe_push (DECL_NAME (field));
> +    }
> +}
> +
> +/* Like "lookup_field", but find the closest matching IDENTIFIER_NODE,
> +   rather than returning a TREE_LIST for an exact match.  */
> +
> +static tree
> +lookup_field_fuzzy (tree type, tree component)
> +{
> +  gcc_assert (TREE_CODE (component) == IDENTIFIER_NODE);
> +
> +  /* First, gather a list of candidates.  */
> +  auto_vec <tree> candidates;
> +
> +  lookup_field_fuzzy_find_candidates (type, component,
> +				      &candidates);
> +
> +  /* Now determine which is closest.  */
> +  int i;
> +  tree identifier;
> +  tree best_identifier = NULL;

NULL_TREE

> +  edit_distance_t best_distance = MAX_EDIT_DISTANCE;
> +  FOR_EACH_VEC_ELT (candidates, i, identifier)
> +    {
> +      gcc_assert (TREE_CODE (identifier) == IDENTIFIER_NODE);
> +      edit_distance_t dist = levenshtein_distance (component, identifier);
> +      if (dist < best_distance)
> +	{
> +	  best_distance = dist;
> +	  best_identifier = identifier;
> +	}
> +    }
> +
> +  /* If more than half of the letters were misspelled, the suggestion is
> +     likely to be meaningless.  */
> +  if (best_identifier)
> +    {
> +      unsigned int cutoff = MAX (IDENTIFIER_LENGTH (component),
> +				 IDENTIFIER_LENGTH (best_identifier)) / 2;
> +      if (best_distance > cutoff)
> +	return NULL;

NULL_TREE

> +/* The Levenshtein distance is an "edit-distance": the minimal
> +   number of one-character insertions, removals or substitutions
> +   that are needed to change one string into another.
> +
> +   This implementation uses the Wagner-Fischer algorithm.  */
> +
> +static edit_distance_t
> +levenshtein_distance (const char *s, int len_s,
> +		      const char *t, int len_t)
> +{
> +  const bool debug = false;
> +
> +  if (debug)
> +    {
> +      printf ("s: \"%s\" (len_s=%i)\n", s, len_s);
> +      printf ("t: \"%s\" (len_t=%i)\n", t, len_t);
> +    }

Did you leave this debug stuff here intentionally?

> +      /* Build the rest of the row by considering neighbours to
> +	 the north, west and northwest.  */
> +      for (int j = 0; j < len_s; j++)
> +	{
> +	  edit_distance_t cost = (s[j] == t[i] ? 0 : 1);
> +	  edit_distance_t deletion     = v1[j] + 1;
> +	  edit_distance_t insertion    = v0[j + 1] + 1;

The formatting doesn't look right here.

	Marek


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]