2018-08-01 Bernd Edlinger * doc/generic.texi (STRING_CST): Update. * varasm.c (check_string_literal): New checking function. (output_constant): Use it. diff -pur gcc/varasm.c gcc/varasm.c --- gcc/varasm.c 2018-07-17 11:19:27.000000000 +0200 +++ gcc/varasm.c 2018-07-31 10:16:12.058827505 +0200 @@ -4774,6 +4774,29 @@ initializer_constant_valid_for_bitfield_ return false; } +/* Check if a STRING_CST fits into the field. + Tolerate only the case when the NUL termination + does not fit into the field. */ + +static bool +check_string_literal (tree string, unsigned HOST_WIDE_INT size) +{ + tree eltype = TREE_TYPE (TREE_TYPE (string)); + unsigned HOST_WIDE_INT elts = tree_to_uhwi (TYPE_SIZE_UNIT (eltype)); + const char *p = TREE_STRING_POINTER (string); + int len = TREE_STRING_LENGTH (string); + + if (elts != 1 && elts != 2 && elts != 4) + return false; + if (len <= 0 || len % elts != 0) + return false; + if (size < (unsigned)len && size != len - elts) + return false; + if (memcmp (p + len - elts, "\0\0\0\0", elts) != 0) + return false; + return true; +} + /* output_constructor outer state of relevance in recursive calls, typically for nested aggregate bitfields. */ @@ -4942,6 +4965,7 @@ output_constant (tree exp, unsigned HOST case STRING_CST: thissize = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size); + gcc_checking_assert (check_string_literal (exp, size)); assemble_string (TREE_STRING_POINTER (exp), thissize); break; case VECTOR_CST: Index: gcc/doc/generic.texi =================================================================== --- gcc/doc/generic.texi (revision 263072) +++ gcc/doc/generic.texi (working copy) @@ -1162,9 +1162,13 @@ These nodes represent string-constants. The @code returns the length of the string, as an @code{int}. The @code{TREE_STRING_POINTER} is a @code{char*} containing the string itself. The string may not be @code{NUL}-terminated, and it may contain -embedded @code{NUL} characters. Therefore, the -@code{TREE_STRING_LENGTH} includes the trailing @code{NUL} if it is -present. +embedded @code{NUL} characters. However, the +@code{TREE_STRING_LENGTH} always includes a trailing @code{NUL} that +is not part of the language string literal but appended by the front end. +If the string shall not be @code{NUL}-terminated the @code{TREE_TYPE} +is one character shorter than @code{TREE_STRING_LENGTH}. +Excess caracters other than one trailing @code{NUL} character are not +permitted. For wide string constants, the @code{TREE_STRING_LENGTH} is the number of bytes in the string, and the @code{TREE_STRING_POINTER} @@ -1173,6 +1177,11 @@ target system (that is, as integers in the target non-wide string constants are distinguished only by the @code{TREE_TYPE} of the @code{STRING_CST}. +String constants may also be used for other purpose like assember +constraints or attributes. These have no @code{TREE_TYPE} and +need no explicit @code{NUL}-termination. Note there is always +another @code{NUL}-byte beyond @code{TREE_STRING_LENGTH}. + FIXME: The formats of string constants are not well-defined when the target system bytes are not the same width as host system bytes.