This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: libsanitizer merge from upstream r173241


On Mon, Feb 11, 2013 at 12:38:00PM +0100, Jakub Jelinek wrote:
> On Wed, Jan 23, 2013 at 04:24:01PM +0400, Evgeniy Stepanov wrote:
> > > > What if glibc adds a scanf hook (like it has already printf hooks), apps
> > > > could then register their own stuff and the above would then break.  It
> > > > really should be very conservative, and should be checked e.g. with all
> > > > glibc's *scanf tests (e.g. stdio-common/scanf[0-9]*.c,
> > > > stdio-common/tst-sscanf.c).
> > 
> > I'll add support for the missing %specs. About the testing, these
> > files seem like GPL, so I'd prefer not to look at them at all. We've
> > got a smallish test for the scanf implementation in sanitizer_common,
> > but it would be really great to run it on full glibc scanf tests.
> > Would you be willing to setup such testing gnu-side?
> 
> Seems the code in llvm repo looks much better now to me than it used to,
> still it breaks on:

Jakub,
  So there is likely to be at least one more remerge in libsanitizer for the
gcc 4.8 release? I saw that Richard felt that PR56128 justified one. FYI,
I'm interested because it would get us the new allocator support on darwin.
       Jack

> 
> #define _GNU_SOURCE 1
> #include <stdio.h>
> #include <string.h>
> 
> int
> main ()
> {
>   char *p;
>   long double q;
>   if (sscanf ("abcdefghijklmno 1.0", "%ms %Lf", &p, &q) != 2
>       || strcmp (p, "abcdefghijklmno") != 0
>       || q != 1.0L)
>     return 1;
>   return 0;
> }
> 
> because it mishandles %ms.
> 
> Attached patch fixes that and also handles %a when possible.
> There are cases where it is unambiguous, e.g.
> s%Las is s %La s, reading long double, or %ar is %a r, reading
> float, other cases where we can check at least something and keep checking
> the rest of the format string, e.g. for:
> %as
> we know it will either store float, or char * pointer, so just assume
> conservatively the smaller size, and other cases where we have to punt,
> %a[a%d]
> (where % appears in the [...] list).
> 
> I've tested various glibc *scanf testcases with the GCC libsanitizer
> + enabling of # define SANITIZER_INTERCEPT_SCANF SI_NOT_WINDOWS
> + llvm svn diff between last merge point to GCC and current llvm trunk
> + this patch, and it looked good.
> 
> There is one further issue, I'd say you need to pass down to scanf_common
> the result from the intercepted call, and use it to see if it is valid
> to call strlen on the ptr, or if just 1 should be assumed for SSS_STRLEN.
> Note 'n' doesn't count towards that.  Because, it is unsafe to call strlen
> on arguments that haven't been assigned yet.  Testcase that still fails:
> 
> #define _GNU_SOURCE 1
> #include <stdio.h>
> #include <string.h>
> 
> int
> main ()
> {
>   int a, b, c, d;
>   char e[3], f[10];
>   memset (e, ' ', sizeof e);
>   memset (f, ' ', sizeof f);
>   if (sscanf ("1 2 a", "%d%n%n%d %s %s", &a, &b, &c, &d, e, f) != 3
>       || a != 1
>       || b != 1
>       || c != 1
>       || d != 2
>       || strcmp (e, "a") != 0)
>     return 1;
>   return 0;
> }
> 
> Oh, one more thing, on Linux for recent enough glibc's it would be
> desirable to also intercept:
> __isoc99_sscanf, __isoc99_scanf, __isoc99_vsscanf, __isoc99_fscanf,
> __isoc99_vfscanf, __isoc99_vscanf
> functions (guard the interception with
> #if defined __GLIBC_PREREQ && __GLIBC_PREREQ (2, 7)
> ).  All these work exactly like the corresponding non-__isoc99_
> functions, just %a followed by s, S or [ always writes float, thus
> there is no ambiguity.
> 
> 	Jakub

> --- sanitizer_common_interceptors_scanf.inc.jj	2013-02-08 13:21:51.000000000 +0100
> +++ sanitizer_common_interceptors_scanf.inc	2013-02-11 12:15:27.575871424 +0100
> @@ -18,11 +18,12 @@
>  
>  struct ScanfDirective {
>    int argIdx;      // argument index, or -1 of not specified ("%n$")
> -  bool suppressed; // suppress assignment ("*")
>    int fieldWidth;
> +  bool suppressed; // suppress assignment ("*")
>    bool allocate; // allocate space ("m")
>    char lengthModifier[2];
>    char convSpecifier;
> +  bool maybeGnuMalloc;
>  };
>  
>  static const char *parse_number(const char *p, int *out) {
> @@ -119,6 +120,31 @@ static const char *scanf_parse_next(cons
>                    // Consume the closing ']'.
>        ++p;
>      }
> +    // This is unfortunately ambiguous between old GNU extension
> +    // of %as, %aS and %a[...] and newer POSIX %a followed by
> +    // letters s, S or [.
> +    if (dir->convSpecifier == 'a' && !dir->lengthModifier[0]) {
> +      if (*p == 's' || *p == 'S') {
> +	dir->maybeGnuMalloc = true;
> +	++p;
> +      } else if (*p == '[') {
> +	// Watch for %a[h-j%d], if % appears in the
> +	// [...] range, then we need to give up, we don't know
> +	// if scanf will parse it as POSIX %a [h-j %d ] or
> +	// GNU allocation of string with range dh-j plus %.
> +	const char *q = p + 1;
> +	if (*q == '^')
> +	  ++q;
> +	if (*q == ']')
> +	  ++q;
> +	while (*q && *q != ']' && *q != '%')
> +	  ++q;
> +	if (*q == 0 || *q == '%')
> +	  return 0;
> +	p = q + 1; // Consume the closing ']'.
> +	dir->maybeGnuMalloc = true;
> +      }
> +    }
>      break;
>    }
>    return p;
> @@ -131,9 +157,7 @@ static bool scanf_is_integer_conv(char c
>  
>  // Returns true if the character is an floating point conversion specifier.
>  static bool scanf_is_float_conv(char c) {
> -  return char_is_one_of(c, "AeEfFgG");
> -  // NOTE: c == 'a' is ambiguous between POSIX and GNU and, therefore,
> -  // unsupported.
> +  return char_is_one_of(c, "aAeEfFgG");
>  }
>  
>  // Returns string output character size for string-like conversions,
> @@ -168,6 +192,21 @@ enum ScanfStoreSize {
>  // Returns the store size of a scanf directive (if >0), or a value of
>  // ScanfStoreSize.
>  static int scanf_get_store_size(ScanfDirective *dir) {
> +  if (dir->allocate) {
> +    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
> +      return SSS_INVALID;
> +    return sizeof(char *);
> +  }
> +
> +  if (dir->maybeGnuMalloc) {
> +    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
> +      return SSS_INVALID;
> +    // This is ambiguous, so check the smaller size of char * (if it is
> +    // a GNU extension of %as, %aS or %a[...]) and float (if it is
> +    // POSIX %a followed by s, S or [ letters).
> +    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
> +  }
> +
>    if (scanf_is_integer_conv(dir->convSpecifier)) {
>      switch (dir->lengthModifier[0]) {
>      case 'h':
> @@ -256,10 +295,6 @@ static void scanf_common(void *ctx, cons
>      }
>      if (dir.suppressed)
>        continue;
> -    if (dir.allocate) {
> -      // Unsupported;
> -      continue;
> -    }
>  
>      int size = scanf_get_store_size(&dir);
>      if (size == SSS_INVALID)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]