This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] [Ada] Make middle-end string literals NUL terminated


Hi Olivier,


I think I like your idea a lot, it should be highly useful for Fortran and GO as well.

Would somthing something like this (untested patch) work for you on top of my patch series.
It makes use of the zero-termination properties of STRING_CSTs, that the other patch ensures.

I have two C test cases for this:

$ cat y3.c
const char str[3] = "abc";
$ gcc -fmerge-all-constants -S y3.c
$ cat y3.s
	.file	"y3.c"
	.text
	.globl	str
	.section	.rodata.str1.1,"aMS",@progbits,1
	.type	str, @object
	.size	str, 3
str:
	.string	"abc"
	.ident	"GCC: (GNU) 9.0.0 20180730 (experimental)"
	.section	.note.GNU-stack,"",@progbits

$ cat y4.c
const char str[4] = "abc";
$ gcc -fmerge-all-constants -S y4.c
$ cat y4.s
$ cat y4.s
	.file	"y4.c"
	.text
	.globl	str
	.section	.rodata.str1.1,"aMS",@progbits,1
	.type	str, @object
	.size	str, 4
str:
	.string	"abc"
	.ident	"GCC: (GNU) 9.0.0 20180730 (experimental)"
	.section	.note.GNU-stack,"",@progbits


The .size directive uses the DECL_SIZE_UNIT, and is still 3.
I don't know of that needs to change or not.
But what is important that we have .string "abc"
and not .ascii "abc".

What do you think?


Thanks
Bernd.
Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c	(revision 263072)
+++ gcc/varasm.c	(working copy)
@@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre
 static void output_constant_def_contents (rtx);
 static void output_addressed_constants (tree);
 static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT,
-					       unsigned int, bool);
+					       unsigned int, bool,
+					       bool = false);
 static void globalize_decl (tree);
 static bool decl_readonly_section_1 (enum section_category);
 #ifdef BSS_SECTION_ASM_OP
@@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS
 	  unit = GET_MODE_SIZE (mode);
 
 	  /* Check for embedded NUL characters.  */
-	  for (i = 0; i < len; i += unit)
+	  for (i = 0; i < len - unit; i += unit)
 	    {
 	      for (j = 0; j < unit; j++)
 		if (str[i + j] != '\0')
@@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char
 
 static void
 assemble_variable_contents (tree decl, const char *name,
-			    bool dont_output_data)
+			    bool dont_output_data, bool merge_strings = false)
 {
   /* Do any machine/system dependent processing of the object.  */
 #ifdef ASM_DECLARE_OBJECT_NAME
@@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char
 	output_constant (DECL_INITIAL (decl),
 			 tree_to_uhwi (DECL_SIZE_UNIT (decl)),
 			 get_variable_align (decl),
-			 false);
+			 false, merge_strings);
       else
 	/* Leave space for it.  */
 	assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl)));
@@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB
 	switch_to_section (sect);
       if (align > BITS_PER_UNIT)
 	ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
-      assemble_variable_contents (decl, name, dont_output_data);
+      assemble_variable_contents (decl, name, dont_output_data,
+				  sect->common.flags & SECTION_MERGE
+				  && sect->common.flags & SECTION_STRINGS);
       if (asan_protected)
 	{
 	  unsigned HOST_WIDE_INT int size
@@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT
 output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool,
 		    oc_outer_state *);
 
+/* Find out if a string P of length LEN and character size UNIT
+   has double nul termination.
+   The string is already known to have a nul termination at
+   offset LEN.  Find out if there is already a NUL termination
+   at offset LEN - UNIT.  */
+
+static bool
+redundant_nul_term_p (const char *p, unsigned len, unsigned unit)
+{
+  gcc_assert(len >= unit);
+  return !memcmp (p + len, p + len - unit, unit);
+}
+
 /* Output assembler code for constant EXP, with no label.
    This includes the pseudo-op such as ".int" or ".byte", and a newline.
    Assumes output_addressed_constants has been done on EXP already.
@@ -4812,7 +4828,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT,
 
 static unsigned HOST_WIDE_INT
 output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align,
-		 bool reverse)
+		 bool reverse, bool merge_strings /* = false */)
 {
   enum tree_code code;
   unsigned HOST_WIDE_INT thissize;
@@ -4940,8 +4956,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT
 	case CONSTRUCTOR:
 	  return output_constructor (exp, size, align, reverse, NULL);
 	case STRING_CST:
-	  thissize
-	    = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size);
+	  thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp);
+	  if (thissize > size && merge_strings
+	      && !redundant_nul_term_p (TREE_STRING_POINTER (exp),
+				        size, thissize - size))
+	    ;
+ 	  else
+	    thissize = MIN (thissize, size);
 	  assemble_string (TREE_STRING_POINTER (exp), thissize);
 	  break;
 	case VECTOR_CST:

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]