Bug 70988 - missing buffer overflow detection in chained strcat calls
Summary: missing buffer overflow detection in chained strcat calls
Status: NEW
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 7.0
: P3 normal
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: diagnostic
Depends on:
Blocks:
 
Reported: 2016-05-07 00:12 UTC by Martin Sebor
Modified: 2017-11-19 15:00 UTC (History)
2 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail: 4.5.3, 4.8.3, 4.9.3, 5.3.0, 6.1.0, 7.1.0, 8.0
Last reconfirmed: 2016-05-27 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Martin Sebor 2016-05-07 00:12:14 UTC
In the program below, all of f1 through f4 clearly write past the end of the destination buffer.  However, only f1 and f2 cause a warning about the overflow.  f3 and f4 do not.  Since chaining calls to strcat (or strcpy followed by strcat) is not an uncommon idiom, it might be worth checking for and diagnosing buffer overflow in it at compile time.  I suspect that doing so would also improve the generated code since in the absence of overflow chained calls like 'strcat (strcat (a, "abc"), "def")' would be merged into a single call to __builtin_memcpy.

$ cat t.c && /build/gcc-6.1.0/gcc/xgcc -B/build/gcc-6.1.0/gcc -D_FORTIFY_SOURCE=1 -O2 -Wall -Wextra -Wpedantic t.c
#include <string.h>

#define NOINLINE   __attribute__ ((noclone, noinline))

void NOINLINE f0 (char *s) { __builtin_puts (s); }

void NOINLINE f1 (void)
{
  char a [4];
  strcat (a, "abcdef");
  f0 (a);
}

void NOINLINE f2 (void)
{
  char a [4] = "";
  strcat (a, "abcdef");
  f0 (a);
}

void NOINLINE f3 (void)
{
  char a [4];
  strcat (strcat (a, "abc"), "def");
  f0 (a);
}

void NOINLINE f4 (void)
{
  char a [4] = "";
  strcat (strcat (a, "abc"), "def");
  f0 (a);
}

int main ()
{
  f1 ();
  f2 ();
  f3 ();
  f4 ();
}
In file included from /usr/include/string.h:638:0,
                 from t.c:1:
In function ‘strcat’,
    inlined from ‘f1’ at t.c:10:3:
/usr/include/bits/string3.h:142:10: warning: call to __builtin___strcat_chk will always overflow destination buffer
   return __builtin___strcat_chk (__dest, __src, __bos (__dest));
          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In function ‘strcat’,
    inlined from ‘f2’ at t.c:17:3:
/usr/include/bits/string3.h:142:10: warning: call to __builtin___memcpy_chk will always overflow destination buffer
   return __builtin___strcat_chk (__dest, __src, __bos (__dest));
          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Comment 1 Martin Sebor 2016-05-09 20:43:53 UTC
Furthermore, in cases where GCC does optimize multiple chained strcat calls into calls to __builtin_memcpy (which are then expanded into inline assembly) as in the test case below, it fails to add the instrumentation necessary to detect the buffer overflow.

$ cat xxx.c && /home/msebor/build/gcc-trunk-git/gcc/xgcc -B/home/msebor/build/gcc-trunk-git/gcc -D_FORTIFY_SOURCE=2 -O2 -S -Wall -Wextra -Wpedantic -fdump-tree-optimized=/dev/stdout xxx.c && ./a.out 
#include <string.h>

void  __attribute__ ((noclone, noinline))
f (const char *s)
{
  __builtin_printf ("\"%s\"\n", s);
}

void  __attribute__ ((noclone, noinline))
g (void)
{
  char a [4] = "";
  strcat (a, "abc");
  strcat (a, "def");
  strcat (a, "ghi");
  strcat (a, "jkl");
  f (a);
}

int main ()
{
  g ();
}

;; Function f (f, funcdef_no=24, decl_uid=2214, cgraph_uid=24, symbol_order=24)

__attribute__((noinline, noclone))
f (const char * s)
{
  <bb 2>:
  __builtin_printf ("\"%s\"\n", s_2(D)); [tail call]
  return;

}



;; Function g (g, funcdef_no=25, decl_uid=2217, cgraph_uid=25, symbol_order=25)

__attribute__((noinline, noclone))
g ()
{
  char a[4];

  <bb 2>:
  MEM[(char * {ref-all})&a] = "abc";
  __builtin_memcpy (&MEM[(void *)&a + 3B], "def", 4);
  __builtin_memcpy (&MEM[(void *)&a + 6B], "ghi", 4);
  __builtin_memcpy (&MEM[(void *)&a + 9B], "jkl", 4);
  f (&a);
  a ={v} {CLOBBER};
  return;

}



;; Function main (main, funcdef_no=26, decl_uid=2220, cgraph_uid=26, symbol_order=26) (executed once)

main ()
{
  <bb 2>:
  g ();
  return 0;

}


"�@abcdef"
Comment 2 Jakub Jelinek 2016-05-10 07:43:26 UTC
At least #c1 is indeed a tree-ssa-strlen.c bug.
It transforms
  a = "";
  __builtin___strcat_chk (&a, "abc", 4);
  __builtin___strcat_chk (&a, "def", 4);
  __builtin___strcat_chk (&a, "ghi", 4);
  __builtin___strcat_chk (&a, "jkl", 4);
into:
  a = "";
  __builtin___memcpy_chk (&a, "abc", 4, 4);
  _9 = &a + 3;
  __builtin___memcpy_chk (_9, "def", 4, 4);
  _10 = &a + 6;
  __builtin___memcpy_chk (_10, "ghi", 4, 4);
  _11 = &a + 9;
  __builtin___memcpy_chk (_11, "jkl", 4, 4);
which is wrong, we need to subtract the current known length of the destination string from the __strcat_chk BOS sizes to get the __memcpy_chk BOS size (and both have to be integers), otherwise we have to punt on the transformation (BOS can't be variable).  So the above would be:
  a = "";
  __builtin___memcpy_chk (&a, "abc", 4, 4);
  _9 = &a + 3;
  __builtin___memcpy_chk (_9, "def", 4, 1);
  _10 = &a + 6;
  __builtin___memcpy_chk (_10, "ghi", 4, 0);
  _11 = &a + 9;
  __builtin___memcpy_chk (_11, "jkl", 4, 0);
For known variable length of destination string before __strcat_chk, the question is if we shouldn't transform it to memcpy instead with if (var_len + const_size >= strcat_bos) __chk_fail (); but we'd then might run into overflow issues and from the basic design principle that -D_FORTIFY_SOURCE=2 should provide only very cheap checks.  So maybe use ADD_OVERFLOW too on the addition?
The strcat -> memcpy transformation is important on the other side, would be bad if we lost it.
Comment 3 Martin Sebor 2016-05-27 20:50:12 UTC
Testing a patch.
Comment 4 Martin Sebor 2017-07-13 20:08:55 UTC
Unassigning myself since I'm not actively working on this bug at the moment.