This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] [Ada] Make middle-end string literals NUL terminated
- From: Bernd Edlinger <bernd dot edlinger at hotmail dot de>
- To: Olivier Hainque <hainque at adacore dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>, Richard Biener <rguenther at suse dot de>, Eric Botcazou <ebotcazou at adacore dot com>, Arnaud Charlet <charlet at adacore dot com>, Jakub Jelinek <jakub at redhat dot com>
- Date: Sat, 4 Aug 2018 11:33:50 +0000
- Subject: Re: [PATCH] [Ada] Make middle-end string literals NUL terminated
- References: <AM5PR0701MB2657F9BAF49A613759D90D13E42E0@AM5PR0701MB2657.eurprd07.prod.outlook.com> <7D840828-B2C9-4C20-BFE9-57249E324ED3@adacore.com>
Hi Olivier,
I think I like your idea a lot, it should be highly useful for Fortran and GO as well.
Would somthing something like this (untested patch) work for you on top of my patch series.
It makes use of the zero-termination properties of STRING_CSTs, that the other patch ensures.
I have two C test cases for this:
$ cat y3.c
const char str[3] = "abc";
$ gcc -fmerge-all-constants -S y3.c
$ cat y3.s
.file "y3.c"
.text
.globl str
.section .rodata.str1.1,"aMS",@progbits,1
.type str, @object
.size str, 3
str:
.string "abc"
.ident "GCC: (GNU) 9.0.0 20180730 (experimental)"
.section .note.GNU-stack,"",@progbits
$ cat y4.c
const char str[4] = "abc";
$ gcc -fmerge-all-constants -S y4.c
$ cat y4.s
$ cat y4.s
.file "y4.c"
.text
.globl str
.section .rodata.str1.1,"aMS",@progbits,1
.type str, @object
.size str, 4
str:
.string "abc"
.ident "GCC: (GNU) 9.0.0 20180730 (experimental)"
.section .note.GNU-stack,"",@progbits
The .size directive uses the DECL_SIZE_UNIT, and is still 3.
I don't know of that needs to change or not.
But what is important that we have .string "abc"
and not .ascii "abc".
What do you think?
Thanks
Bernd.
Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c (revision 263072)
+++ gcc/varasm.c (working copy)
@@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre
static void output_constant_def_contents (rtx);
static void output_addressed_constants (tree);
static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT,
- unsigned int, bool);
+ unsigned int, bool,
+ bool = false);
static void globalize_decl (tree);
static bool decl_readonly_section_1 (enum section_category);
#ifdef BSS_SECTION_ASM_OP
@@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS
unit = GET_MODE_SIZE (mode);
/* Check for embedded NUL characters. */
- for (i = 0; i < len; i += unit)
+ for (i = 0; i < len - unit; i += unit)
{
for (j = 0; j < unit; j++)
if (str[i + j] != '\0')
@@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char
static void
assemble_variable_contents (tree decl, const char *name,
- bool dont_output_data)
+ bool dont_output_data, bool merge_strings = false)
{
/* Do any machine/system dependent processing of the object. */
#ifdef ASM_DECLARE_OBJECT_NAME
@@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char
output_constant (DECL_INITIAL (decl),
tree_to_uhwi (DECL_SIZE_UNIT (decl)),
get_variable_align (decl),
- false);
+ false, merge_strings);
else
/* Leave space for it. */
assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl)));
@@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB
switch_to_section (sect);
if (align > BITS_PER_UNIT)
ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
- assemble_variable_contents (decl, name, dont_output_data);
+ assemble_variable_contents (decl, name, dont_output_data,
+ sect->common.flags & SECTION_MERGE
+ && sect->common.flags & SECTION_STRINGS);
if (asan_protected)
{
unsigned HOST_WIDE_INT int size
@@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT
output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool,
oc_outer_state *);
+/* Find out if a string P of length LEN and character size UNIT
+ has double nul termination.
+ The string is already known to have a nul termination at
+ offset LEN. Find out if there is already a NUL termination
+ at offset LEN - UNIT. */
+
+static bool
+redundant_nul_term_p (const char *p, unsigned len, unsigned unit)
+{
+ gcc_assert(len >= unit);
+ return !memcmp (p + len, p + len - unit, unit);
+}
+
/* Output assembler code for constant EXP, with no label.
This includes the pseudo-op such as ".int" or ".byte", and a newline.
Assumes output_addressed_constants has been done on EXP already.
@@ -4812,7 +4828,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT,
static unsigned HOST_WIDE_INT
output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align,
- bool reverse)
+ bool reverse, bool merge_strings /* = false */)
{
enum tree_code code;
unsigned HOST_WIDE_INT thissize;
@@ -4940,8 +4956,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT
case CONSTRUCTOR:
return output_constructor (exp, size, align, reverse, NULL);
case STRING_CST:
- thissize
- = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size);
+ thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp);
+ if (thissize > size && merge_strings
+ && !redundant_nul_term_p (TREE_STRING_POINTER (exp),
+ size, thissize - size))
+ ;
+ else
+ thissize = MIN (thissize, size);
assemble_string (TREE_STRING_POINTER (exp), thissize);
break;
case VECTOR_CST: