This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

dwarf2 basic block start information


hi,

Since the cvs version of gas supports extensions for the dwarf2 basic_block location information, I thought I could try to add support to gcc for this feature. My use of this feature is related to binary code analysis: being able to gather the bb boundaries through gcc's debugging output would save me reverse engineering it from the binary code itself.

The attached code is the start of a patch to do this. It would be really nice to have feedback on:
- the approach choosen
- the bugs which I have stumbled upon.


The patch itself is pretty straightforward. I have simply added an argument to the source_line debug_hook and I have implemented it correctly (I think) for the dwarf2 backend. The final.c pass now reads the rtl BASIC_BLOCK note to invoke source_line correctly. Is this the right approach ?

I have tested lightly this patch on the sample C code below on x86 with gcc svn HEAD and binutils cvs HEAD:

#include <stdio.h>
static int foo (void)
{
      if (3) {
              int i = 0;
              while (i < 100) {
                      printf ("test\n");
                      i++;
              }
      }
      return 8;
}

int main (int argc, char *argv[])
{
      foo ();
      return 0;
}

While the debugging output looks quite correct at -O0, the -O2 output seems broken:
00000000 <main>:
0: 8d 4c 24 04 lea 0x4(%esp),%ecx
4: 83 e4 f0 and $0xfffffff0,%esp
7: ff 71 fc pushl 0xfffffffc(%ecx)
a: 55 push %ebp
b: 89 e5 mov %esp,%ebp
d: 53 push %ebx
e: 31 db xor %ebx,%ebx
10: 51 push %ecx
11: 83 ec 10 sub $0x10,%esp
14: 8d b6 00 00 00 00 lea 0x0(%esi),%esi
1a: 8d bf 00 00 00 00 lea 0x0(%edi),%edi
20: c7 04 24 00 00 00 00 movl $0x0,(%esp)
27: 43 inc %ebx
28: e8 fc ff ff ff call 29 <main+0x29>
2d: 83 fb 64 cmp $0x64,%ebx
30: 75 ee jne 20 <main+0x20>
32: 83 c4 10 add $0x10,%esp
35: 31 c0 xor %eax,%eax
37: 59 pop %ecx
38: 5b pop %ebx
39: 5d pop %ebp
3a: 8d 61 fc lea 0xfffffffc(%ecx),%esp
3d: c3 ret


With this list of basic block boundaries as reported by the debugging information:
ad: 0x0
ad: 0x11
ad: 0x20
ad: 0x32


Clearly, 0x11 is not a bb boundary so we have a bug. Despite the fact that my understanding of gcc internals is close to nil, it seems to me that this problem is most likely related to some sort of inlining pass which did not update the rtl BASIC_BLOCK note. Thus, the following questions:

1) is it expected that some rtl optimization passes would bork the BASIC_BLOCK notes ?
2) if it is, are these known culprits and would there be interest in patches to try to fix this ?
3) have anyone an idea of which passes might be the culprits ? (it might save a lot of time wandering through gcc sources)


If someone is interested in trying out this patch, the released version of readelf on my system seems to be able to dump the basic block dwarf2 instructions with --debug-dump=line. I have also written a small tool to dump only this information there: http://cutebugs.net/code/bozo-profiler/

The test binary generated by the top-level Makefile in bin/test/ can be invoked with:
test dw2_bb [BINARY FILE]


[mathieu@mathieu bozo-profiler]$ make
make: Nothing to be done for `all'.
[mathieu@mathieu bozo-profiler]$ ./bin/test/test dw2_bb bin/test/test

regards,
Mathieu
Index: gcc/final.c
===================================================================
--- gcc/final.c	(revision 106485)
+++ gcc/final.c	(working copy)
@@ -129,6 +129,8 @@
 static rtx debug_insn;
 rtx current_output_insn;
 
+int current_start_basic_block = 0;
+
 /* Line number of last NOTE.  */
 static int last_linenum;
 
@@ -1744,6 +1746,7 @@
 	  else
 	    *seen |= SEEN_BB;
 
+	  current_start_basic_block = 1;
 	  break;
 
 	case NOTE_INSN_EH_REGION_BEG:
@@ -2071,8 +2074,21 @@
 	   note in a row.  */
 	if (notice_source_line (insn))
 	  {
-	    (*debug_hooks->source_line) (last_linenum, last_filename);
+	    if (current_start_basic_block)
+	      {
+		current_start_basic_block = 0;
+		(*debug_hooks->source_line) (last_linenum, last_filename, LINE_FLAG_BASIC_BLOCK);
+	      }
+	    else 
+	      {
+		(*debug_hooks->source_line) (last_linenum, last_filename, 0);
+	      }
 	  }
+	else if (current_start_basic_block)
+	  {
+	    current_start_basic_block = 0;
+	    (*debug_hooks->source_line) (last_linenum, last_filename, LINE_FLAG_BASIC_BLOCK);
+	  }
 
 	if (GET_CODE (body) == ASM_INPUT)
 	  {
@@ -2498,6 +2514,7 @@
 	current_output_insn = debug_insn = 0;
       }
     }
+
   return NEXT_INSN (insn);
 }
 
Index: gcc/debug.c
===================================================================
--- gcc/debug.c	(revision 106485)
+++ gcc/debug.c	(working copy)
@@ -33,7 +33,7 @@
   debug_nothing_int_int,	         /* begin_block */
   debug_nothing_int_int,	         /* end_block */
   debug_true_tree,		         /* ignore_block */
-  debug_nothing_int_charstar,	         /* source_line */
+  debug_nothing_int_charstar_int,	 /* source_line */
   debug_nothing_int_charstar,	         /* begin_prologue */
   debug_nothing_int_charstar,	         /* end_prologue */
   debug_nothing_int_charstar,	         /* end_epilogue */
@@ -94,6 +94,13 @@
 }
 
 void
+debug_nothing_int_charstar_int (unsigned int line ATTRIBUTE_UNUSED,
+				const char *text ATTRIBUTE_UNUSED,
+				unsigned int flags ATTRIBUTE_UNUSED)
+{
+}
+
+void
 debug_nothing_int (unsigned int line ATTRIBUTE_UNUSED)
 {
 }
Index: gcc/debug.h
===================================================================
--- gcc/debug.h	(revision 106485)
+++ gcc/debug.h	(working copy)
@@ -59,7 +59,7 @@
   bool (* ignore_block) (tree);
 
   /* Record a source file location at (FILE, LINE).  */
-  void (* source_line) (unsigned int line, const char *file);
+  void (* source_line) (unsigned int line, const char *file, unsigned int flags);
 
   /* Called at start of prologue code.  LINE is the first line in the
      function.  This has been given the same prototype as source_line,
@@ -129,12 +129,16 @@
   int start_end_main_source_file;
 };
 
+
+#define LINE_FLAG_BASIC_BLOCK ((unsigned int)1)
+
 extern const struct gcc_debug_hooks *debug_hooks;
 
 /* The do-nothing hooks.  */
 extern void debug_nothing_void (void);
 extern void debug_nothing_charstar (const char *);
 extern void debug_nothing_int_charstar (unsigned int, const char *);
+extern void debug_nothing_int_charstar_int (unsigned int, const char *, unsigned int flags);
 extern void debug_nothing_int (unsigned int);
 extern void debug_nothing_int_int (unsigned int, unsigned int);
 extern void debug_nothing_tree (tree);
Index: gcc/dwarf2out.c
===================================================================
--- gcc/dwarf2out.c	(revision 106485)
+++ gcc/dwarf2out.c	(working copy)
@@ -69,7 +69,7 @@
 #include "input.h"
 
 #ifdef DWARF2_DEBUGGING_INFO
-static void dwarf2out_source_line (unsigned int, const char *);
+static void dwarf2out_source_line (unsigned int, const char *, unsigned int flags);
 #endif
 
 /* DWARF2 Abbreviation Glossary:
@@ -2510,7 +2510,7 @@
      prologue case, not the eh frame case.  */
 #ifdef DWARF2_DEBUGGING_INFO
   if (file)
-    dwarf2out_source_line (line, file);
+    dwarf2out_source_line (line, file, 0);
 #endif
 }
 
@@ -13534,7 +13534,7 @@
    'line_info_table' for later output of the .debug_line section.  */
 
 static void
-dwarf2out_source_line (unsigned int line, const char *filename)
+dwarf2out_source_line (unsigned int line, const char *filename, unsigned int flags)
 {
   if (debug_info_level >= DINFO_LEVEL_NORMAL
       && line != 0)
@@ -13553,7 +13553,14 @@
 	  file_num = maybe_emit_file (file_num);
 
 	  /* Emit the .loc directive understood by GNU as.  */
-	  fprintf (asm_out_file, "\t.loc %d %d 0\n", file_num, line);
+	  if (flags & LINE_FLAG_BASIC_BLOCK) 
+	    {
+	      fprintf (asm_out_file, "\t.loc %d %d 0 basic_block\n", file_num, line);
+	    }
+	  else
+	    {
+	      fprintf (asm_out_file, "\t.loc %d %d 0 ;#test\n", file_num, line);
+	    }
 
 	  /* Indicate that line number info exists.  */
 	  line_info_table_in_use++;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]