This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH]: Proof-of-concept for dynamic format checking


Following up on the discussion about dynamic format checking:
http://gcc.gnu.org/ml/gcc-patches/2005-07/msg00627.html

This patch is a proof-of-concept on the format checking infrastructure
to demonstrate feasibility of dynamic format checking.  This patch is
NOT yet a submission for installation.  I expect to go through some
iterations.  The goal being to have code ready for GCC 4.2 stage1.

I use the code to allocate a new dynamic format checking type called
"__bfd__" which maps to the format used in the BFD library.  AFAICT,
this format takes all the standard printf formats plus two additions,
"%A" with a type "asection*" and "%B" with a type "bfd*".  BFD
accomplishes this by preprocessing the format string and arguments and
substituting the %A or %B with strings before passing the lot to
vfprintf.

My code expects the new format type to be activated like so:

	#pragma GCC format "bfd" "format-checking-data"

Then you can use the format checking type "__bfd__".  Here's a
demonstration program:

	typedef struct asection asection;
	typedef struct bfd bfd;
	#pragma GCC format "bfd" "blah-blah-blah"
	extern void bfd_warn (const char *, ...) __attribute__ ((__format__ (__bfd__, 1, 2)));
	
	void foo (void)
	{
	  asection *a;
	  bfd *b;
	
	  bfd_warn ("%d", 1);
	  bfd_warn ("%d", 1L); /* warn! */
	
	  bfd_warn ("%A", a);
	  bfd_warn ("%A", b); /* warn! */
	  bfd_warn ("%B", a); /* warn! */
	  bfd_warn ("%B", b);
	}

At this point, I don't do any parsing of the "format-checking-data",
this is where I would expect Ian's state machine language to appear.
What I do is simply create the bfd format particulars at runtime and
have the pragma activate it.  The interesting part is that I allocate
this format dynamically, and also that I pull the majority of it from
the existing printf format rather than duplicating that information.
In doing this exercise, several things came to light.

1. I chose to implement a style outside GCC to show that this feature
   is useful outside our compiler sources.  Once it worked, I applied
   the format on 'bfd_error_handler_type' in binutils.  I found almost
   200 format errors/bugs that need fixing.  All of these represent
   binutils or gdb crashes waiting to happen.

2. The format checking code very easily accepts additional formats at
   runtime.  Most of the infrastructure necessary to dynamically
   create format attributes was already in place.  All we really need
   to do conceptually beyond my patch is mesh a state machine format
   style parser onto this code in the pragma handler.  (Hi Ian!)

3. We can easily implement "gimme printf plus this little bit."

4. It became clear that the notion of copying out printf-style could
   lead to confusion as to which C standard printf the user meant.
   I.e. currently printf-style format morphs at GCC runtime depending
   on which --std= flags you pass to the compiler.  I suspect the
   users will usually (always) expect a fixed behavior for their
   format checking.  So I think we'll want to have them be able to say
   specifically they want printf_c90 or printf_gnu99, etc.  The format
   copying routine I wrote allows one to specifiy which things to pull
   out of the printf format by particular C standard.


Here's the rough patch.  Again, posted here only for reference,
comments and future work, not for installation.  So no nitpicking
about docs or ChangeLog please. :-)

		--Kaveh



diff -rup orig/egcc-CVS20050809/gcc/c-common.h egcc-CVS20050809/gcc/c-common.h
--- orig/egcc-CVS20050809/gcc/c-common.h	2005-07-24 18:07:35.000000000 -0400
+++ egcc-CVS20050809/gcc/c-common.h	2005-08-10 23:54:32.000000000 -0400
@@ -936,4 +936,6 @@ extern bool check_missing_format_attribu
 #define GCC_DIAG_STYLE __gcc_cdiag__
 #endif
 
+extern void init_dynamic_bfd_info (void);
+
 #endif /* ! GCC_C_COMMON_H */
diff -rup orig/egcc-CVS20050809/gcc/c-format.c egcc-CVS20050809/gcc/c-format.c
--- orig/egcc-CVS20050809/gcc/c-format.c	2005-07-22 21:34:59.000000000 -0400
+++ egcc-CVS20050809/gcc/c-format.c	2005-08-11 01:29:58.000000000 -0400
@@ -2353,6 +2353,222 @@ find_length_info_modifier_index (const f
   gcc_unreachable ();
 }
 
+/* Copies the entries from printf-style which are less than or equal
+   to the specified C standard STD.  Entries are then modified to work
+   in all C modes.  */
+
+static format_length_info * ATTRIBUTE_MALLOC
+get_printf_length_info (enum format_std_version std)
+{
+  format_length_info *ret;
+  unsigned i, len;
+  
+  /* Calculate number of matching entries.  */
+  for (i=0, len=0; i < ARRAY_SIZE (printf_length_specs); i++)
+    if (printf_length_specs[i].std <= std)
+      len++;
+  
+  ret = XNEWVEC (format_length_info, len);
+
+  /* Copy any matching entries, including the NULL one.  */
+  while (i > 0)
+    if (printf_length_specs[--i].std <= std)
+      {
+	ret[--len] = printf_length_specs[i];
+	ret[len].std = STD_C89;
+      }
+
+  gcc_assert (i==0 && len==0);
+
+#if 0
+  while (ret[i].name)
+  {
+    warning (0, "<%s><%s>",
+	     ret[i].name, ret[i].double_name);
+    i++;
+  }
+#endif
+
+  return ret;
+}
+
+/* Copies the entries from printf-style which are less than or equal
+   to the specified C standard STD.  Entries are then modified to work
+   in all C modes.  */
+
+static format_flag_spec * ATTRIBUTE_MALLOC
+get_printf_flag_spec (enum format_std_version std)
+{
+  format_flag_spec *ret;
+  unsigned i, len;
+  
+  /* Calculate number of matching entries.  */
+  for (i=0, len=0; i < ARRAY_SIZE (printf_flag_specs); i++)
+    if (printf_flag_specs[i].std <= std)
+      len++;
+  
+  ret = XNEWVEC (format_flag_spec, len);
+
+  /* Copy any matching entries, including the NULL one.  */
+  while (i > 0)
+    if (printf_flag_specs[--i].std <= std)
+      {
+	ret[--len] = printf_flag_specs[i];
+	ret[len].std = STD_C89;
+      }
+
+  gcc_assert (i==0 && len==0);
+
+#if 0
+  while (ret[i].flag_char)
+  {
+    warning (0, "<%c><%s>",
+	     ret[i].flag_char, ret[i].name);
+    i++;
+  }
+#endif
+
+  return ret;
+}
+
+/* Copies the entries from printf-style which are less than or equal
+   to the specified C standard STD.  Entries are then modified to work
+   in all C modes.  You can get EXTRA more entries in the table.
+   These will be filled with the variable arguments if EXTRA is
+   greater than zero. */
+
+static format_char_info * ATTRIBUTE_MALLOC
+get_printf_char_info (enum format_std_version std, size_t extra, ...)
+{
+  format_char_info *ret;
+  unsigned i, len;
+  va_list ap;
+  
+  /* Calculate number of matching entries.  */
+  for (i=0, len=0; i < ARRAY_SIZE (print_char_table); i++)
+    if (print_char_table[i].std <= std)
+      len++;
+  
+  ret = XCNEWVEC (format_char_info, len+extra);
+
+  /* Copy the NULL entry.  */
+  ret[--len+extra] = print_char_table[--i];
+
+  /* Copy the EXTRA entries.  */
+  va_start (ap, extra);
+  while (extra > 0)
+    ret[--extra+len] = *(va_arg (ap, format_char_info *));
+  va_end (ap);
+
+  /* Copy any matching entries.  */
+  while (i > 0)
+    if (print_char_table[--i].std <= std)
+      {
+	ret[--len] = print_char_table[i];
+	ret[len].std = STD_C89;
+      }
+
+  gcc_assert (i==0 && len==0);
+
+#if 0
+  while (ret[i].format_chars)
+  {
+    warning (0, "<%s><%s><%s>",
+	     ret[i].format_chars, ret[i].flag_chars, ret[i].flags2);
+    i++;
+  }
+#endif
+
+  return ret;
+}
+
+void
+init_dynamic_bfd_info (void)
+{
+  static bool firsttime = true;
+  static tree asection, bfd;
+  static format_kind_info bfd_fki;
+
+  /* Increase the number of format types by one and insert the "bfd"
+     format type.  */
+  if (firsttime)
+    {
+      /* The "bfd" format_kind_info is just like C89 printf with two
+	 extra flags.  The %A flag takes a "asection *" and the %B
+	 flag takes a "bfd *".  */
+      static const format_char_info fci_asection = 
+	{ "A", 1, STD_C89, { {STD_C89, NULL, &asection}, BADLEN, BADLEN,
+			     BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN },
+	  "", "R", NULL };
+      static const format_char_info fci_bfd = 
+	{ "B", 1, STD_C89, { {STD_C89, NULL, &bfd}, BADLEN, BADLEN, BADLEN,
+			     BADLEN, BADLEN, BADLEN, BADLEN, BADLEN },
+	  "", "R", NULL };
+      bfd_fki = format_types_orig[printf_format_type];
+      bfd_fki.name = "bfd";
+      bfd_fki.length_char_specs = get_printf_length_info (STD_C89);
+      bfd_fki.conversion_specs = get_printf_char_info (STD_C89, 2, &fci_asection, &fci_bfd);
+      bfd_fki.flag_specs = get_printf_flag_spec (STD_C89);
+      
+      n_format_types++;
+      if (dynamic_format_types)
+	dynamic_format_types = XRESIZEVEC (format_kind_info, dynamic_format_types, n_format_types);
+      else
+        {
+	  dynamic_format_types = XNEWVEC (format_kind_info, n_format_types);
+	  memcpy (dynamic_format_types, format_types_orig, sizeof (format_types_orig));
+	}
+      dynamic_format_types[n_format_types-1] = bfd_fki;
+      format_types = dynamic_format_types;
+      firsttime = false;
+    }
+  
+  if (!asection)
+    {
+      if ((asection = maybe_get_identifier ("asection")))
+        {
+	  if (identifier_global_value (asection))
+	    asection = identifier_global_value (asection);
+	  if (asection)
+	    {
+	      if (TREE_CODE (asection) != TYPE_DECL)
+		{
+		  error ("%<asection%> is not defined as a type");
+		  asection = 0;
+		}
+	      else
+		asection = TREE_TYPE (asection);
+	    }
+	  
+	}
+    }
+  if (!bfd)
+    {
+      if ((bfd = maybe_get_identifier ("bfd")))
+        {
+	  if (identifier_global_value (bfd))
+	    bfd = identifier_global_value (bfd);
+	  if (bfd)
+	    {
+	      if (TREE_CODE (bfd) != TYPE_DECL)
+		{
+		  error ("%<bfd%> is not defined as a type");
+		  bfd = 0;
+		}
+	      else
+		bfd = TREE_TYPE (bfd);
+	    }
+	}
+
+    }
+  warning (0, "<%s><%s><%s><%c><%c><%c><%c><%c>",
+	   bfd_fki.name, bfd_fki.flag_chars, bfd_fki.modifier_chars,
+	   bfd_fki.width_char, bfd_fki.left_precision_char,
+	   bfd_fki.precision_char, bfd_fki.suppression_char,
+	   bfd_fki.length_code_char);
+  
+}
+
 /* Determine the type of HOST_WIDE_INT in the code being compiled for
    use in GCC's __asm_fprintf__ custom format attribute.  You must
    have set dynamic_format_types before calling this function.  */
diff -rup orig/egcc-CVS20050809/gcc/c-pragma.c egcc-CVS20050809/gcc/c-pragma.c
--- orig/egcc-CVS20050809/gcc/c-pragma.c	2005-07-19 21:40:44.000000000 -0400
+++ egcc-CVS20050809/gcc/c-pragma.c	2005-08-10 23:53:44.000000000 -0400
@@ -667,6 +667,12 @@ handle_pragma_visibility (cpp_reader *du
 
 #endif
 
+static void
+handle_pragma_format (cpp_reader *dummy ATTRIBUTE_UNUSED)
+{
+  init_dynamic_bfd_info();
+}
+
 /* Front-end wrappers for pragma registration to avoid dragging
    cpplib.h in almost everywhere.  */
 void
@@ -706,6 +712,8 @@ init_pragma (void)
 
   c_register_pragma ("GCC", "pch_preprocess", c_common_pch_pragma);
 
+  c_register_pragma ("GCC", "format", handle_pragma_format);
+
 #ifdef REGISTER_TARGET_PRAGMAS
   REGISTER_TARGET_PRAGMAS ();
 #endif


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]