This code gets "caught" at runtime, but I believe that the code is valid. This is on Intel i5, 32-bit Fedora 29, gcc 8.2.1. command line flags are: "-fsanitize=undefined -fsanitize=address -fsanitize=bounds-strict -fstack-protector-all -H -std=gnu17 -O0 -march=native -mfpmath=387 -mieee-fp -fno-builtin -frounding-math -ffloat-store -fexcess-precision=standard -fsignaling-nans" #include <stdio.h> #include <wchar.h> /* wchar_t, wcsto*(), swprintf(), swscanf() */ #include <stddef.h> static FILE *file = NULL; /* points to FILE returned by open() */ static char *filename = NULL; /* points to name of temp file */ typedef struct { float str_fpval; /* FP value returned by wcsto*, and maybe *wscanf */ ptrdiff_t str_numread; /* number of characters accepted by wcsto* */ int wscan_cnt; /* # of chars processed by %f */ int wscan_rc; /* return code from wscanf */ const wchar_t from[20]; /* string to process by *wscanf and wcsto* */ const wchar_t wscan_str[20-8]; /* string return by wscanf's %ls; this or next time */ } data; static data tv[] = { /* 0*/{ 100.f, (ptrdiff_t)3, 3, 1, L"100", L"" } }; int main(void){ int rc; int i = 0; { (void)printf("DEBUG 1\n"); fflush(NULL); } filename = tmpnam(NULL); /* create a temporary file */ { (void)printf("DEBUG 2\n"); fflush(NULL); } (void)printf("temp file name=%s\n", filename); { (void)printf("DEBUG 3\n"); fflush(NULL); } file = fopen(filename,"wb"); /* make sure we can open the file */ { (void)printf("DEBUG 4\n"); fflush(NULL); } file = freopen(NULL, "wb", file); /* so can write */ { (void)printf("DEBUG 5\n"); fflush(NULL); } { (void)printf("str=%ls\n", tv[i].from); fflush(NULL); } { (void)printf("DEBUG 6\n"); fflush(NULL); } { (void)printf("file=%p\n", file); fflush(NULL); } { (void)printf("DEBUG 7\n"); fflush(NULL); } rc = fwprintf( file, L"%ls", tv[i].from ); /* string to process */ { (void)printf("DEBUG 8\n"); fflush(NULL); } return 0; }
Confirmed with -m32.
Apparently i?86 glibc has two fopen entrypoints: 192: 000657d0 35 FUNC GLOBAL DEFAULT 13 fopen@@GLIBC_2.1 193: 0011ec90 144 FUNC GLOBAL DEFAULT 13 fopen@GLIBC_2.0 and libsanitizer intercepts just fopen rather than both and during interception calls the unversioned one returned by dlsym, which is the fopen@GLIBC_2.0. If it wants to intercept fopen on this platform, it needs to probably intercept both and do symbol versioning, or if it doesn't want to bother with the old one at least should make sure to use dlvsym in this case.
The problem looks very similar to: 3537 #if SANITIZER_INTERCEPT_REALPATH 3538 INTERCEPTOR(char *, realpath, const char *path, char *resolved_path) { 3539 void *ctx; 3540 COMMON_INTERCEPTOR_ENTER(ctx, realpath, path, resolved_path); 3541 if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1); 3542 3543 // Workaround a bug in glibc where dlsym(RTLD_NEXT, ...) returns the oldest 3544 // version of a versioned symbol. For realpath(), this gives us something 3545 // (called __old_realpath) that does not handle NULL in the second argument. 3546 // Handle it as part of the interceptor. 3547 char *allocated_path = nullptr; 3548 if (!resolved_path) 3549 allocated_path = resolved_path = (char *)WRAP(malloc)(path_max + 1); 3550 3551 char *res = REAL(realpath)(path, resolved_path); 3552 if (allocated_path && !res) WRAP(free)(allocated_path); 3553 if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1); 3554 return res; 3555 } 3556 #define INIT_REALPATH COMMON_INTERCEPT_FUNCTION(realpath); 3557 #else 3558 #define INIT_REALPATH 3559 #endif
The comment is bogus, that is the symbol versioning design, the only way how to stay ABI compatible even with the binaries that were written before the new symbol version has been introduced. For symbols with more than one symbol version, one needs to use dlvsym, unless the oldest symbol version is desired.
The issue with dlvsym is that one can't request a default version (without knowing ABI version), so e.g. dlvsym(h, "fopen", ""). I believe root problem is this glibc PR I just linked. The problem is default version is not returned. I wrote a script that lists all function names for which interceptor is called (GetRealFunctionAddress) and I compared that to symbols in glibc that have multiple interface versions. And the result is not good: $ readelf --syms --wide /lib64/libc.so.6 | ./check-symver.py ['glob64@@GLIBC_2.27', 'glob64@GLIBC_2.2.5'] ['glob@@GLIBC_2.27', 'glob@GLIBC_2.2.5'] ['sched_getaffinity@@GLIBC_2.3.4', 'sched_getaffinity@GLIBC_2.3.3'] ['realpath@@GLIBC_2.3', 'realpath@GLIBC_2.2.5'] ['fmemopen@@GLIBC_2.22', 'fmemopen@GLIBC_2.2.5'] ['memcpy@@GLIBC_2.14', 'memcpy@GLIBC_2.2.5'] $ readelf --syms --wide /lib/libc.so.6 | ./check-symver.py ['scandir64@@GLIBC_2.2', 'scandir64@GLIBC_2.1'] ['glob64@@GLIBC_2.27', 'glob64@GLIBC_2.1', 'glob64@GLIBC_2.2'] ['getpwent_r@@GLIBC_2.1.2', 'getpwent_r@GLIBC_2.0'] ['getpwuid_r@@GLIBC_2.1.2', 'getpwuid_r@GLIBC_2.0'] ['getgrnam_r@@GLIBC_2.1.2', 'getgrnam_r@GLIBC_2.0'] ['shmctl@@GLIBC_2.2', 'shmctl@GLIBC_2.0'] ['getgrent_r@@GLIBC_2.1.2', 'getgrent_r@GLIBC_2.0'] ['glob@@GLIBC_2.27', 'glob@GLIBC_2.0'] ['__lxstat64@@GLIBC_2.2', '__lxstat64@GLIBC_2.1'] ['fclose@@GLIBC_2.1', 'fclose@GLIBC_2.0'] ['gethostbyaddr_r@@GLIBC_2.1.2', 'gethostbyaddr_r@GLIBC_2.0'] ['realpath@@GLIBC_2.3', 'realpath@GLIBC_2.0'] ['gethostbyname_r@@GLIBC_2.1.2', 'gethostbyname_r@GLIBC_2.0'] ['getpwnam_r@@GLIBC_2.1.2', 'getpwnam_r@GLIBC_2.0'] ['readdir64@@GLIBC_2.2', 'readdir64@GLIBC_2.1'] ['__xstat64@@GLIBC_2.2', '__xstat64@GLIBC_2.1'] ['gethostbyname2_r@@GLIBC_2.1.2', 'gethostbyname2_r@GLIBC_2.0'] ['getgrgid_r@@GLIBC_2.1.2', 'getgrgid_r@GLIBC_2.0'] ['fopen@@GLIBC_2.1', 'fopen@GLIBC_2.0'] ['fmemopen@@GLIBC_2.22', 'fmemopen@GLIBC_2.2'] ['readdir64_r@@GLIBC_2.2', 'readdir64_r@GLIBC_2.1'] ['fopencookie@@GLIBC_2.2', 'fopencookie@GLIBC_2.0'] ['gethostent_r@@GLIBC_2.1.2', 'gethostent_r@GLIBC_2.0'] ['fdopen@@GLIBC_2.1', 'fdopen@GLIBC_2.0'] ['sched_getaffinity@@GLIBC_2.3.4', 'sched_getaffinity@GLIBC_2.3.3']
GCC 8.3 has been released.
GCC 8.4.0 has been released, adjusting target milestone.