This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Allow -pg without frame pointer on x86


There are some plans to use -pg/mcount tracing even in production
versions of the Linux kernel. That is there is a special tracer
which is able to nop out the calls when not needed. Unfortunately
right now enabling -pg always requires enabling the frame pointer
too, which has measurable cost in macro benchmarks.

This patch allows to use -pg without frame pointer on x86. The only
change that is needed it is to always call mcount before
setting up the local stack frame instead of after. This way the tracer
can always find the return address of the function directly above 
its own.  Luckily gcc supported this already, it was just
an #ifdef for some targets. I turned it into a target vector
entry instead.

Also the tracer function has to get the caller information from the 
stack now instead of through the frame pointer. To avoid incompatibility
I use a new entry point "mcount_nofp" in this case.

Unfortunately this change is incompatible (previously frame pointer
was always forced on functions with mcount calls and also old 
glibcs don't have mcount_nofp), so I added a new option
-mcount-nofp to enable this behaviour.

I also got a glibc patch to add mcount_nofp support there.

I know the patch is very late in the development cycle, but 
perhaps it could be still considered? By default it doesn't
change any behaviour.

Patch passes bootstrap and test suite (including its own test case)

-Andi

2008-10-05  Andi Kleen  <ak@linux.intel.com>

	* target.c (struct gcc_target): Add profile_before_prologue.
	* target-def.h (TARGET_FUNCTION_RROFILE): Add.
	(TARGET_INITIALIZER): Add TARGET_FUNCTION_PROFILE.
	* final.c: (final_start_function): Check target vector instead
	of PROFILE_BEFORE_PROLOGUE.
	(profile_after_prologue): dito.
	* gcc.c: (TARGET_PG_OPTION_SPEC): Add.
	(cc1_options): Move pg into TARGET_PG_OPTION_SPEC.
	* config/i386/i386.h (TARGET_PG_OPTION_SPEC): Add.
	* config/i386/i386.c (override_options): Add check for
	-pg && no frame pointer.
	(x86_function_profiler): Emit _nofp mcount prefix when needed.
	(ix86_frame_pointer_required): Add check for x86_mcount_nofp.
	* config/i386/i386.opt: Add -mmcount-nofp
	* gcc/doc/invoke.texi: Document -mmcount-nofp
	* gcc/gcc/testsuite/gcc.dg/tfp.c: Add.	

Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 141040)
+++ gcc/doc/invoke.texi	(working copy)
@@ -581,7 +581,7 @@
 -momit-leaf-frame-pointer  -mno-red-zone -mno-tls-direct-seg-refs @gol
 -mcmodel=@var{code-model} @gol
 -m32  -m64 -mlarge-data-threshold=@var{num} @gol
--mfused-madd -mno-fused-madd -msse2avx}
+-mfused-madd -mno-fused-madd -msse2avx -mmcount-nofp}
 
 @emph{IA-64 Options}
 @gccoptlist{-mbig-endian  -mlittle-endian  -mgnu-as  -mgnu-ld  -mno-pic @gol
@@ -11219,6 +11219,10 @@
 @opindex msse2avx
 Specify that the assembler should encode SSE instructions with VEX
 prefix.  The option @option{-mavx} turns this on by default.
+
+@item -mcount-nofp
+Don't force the frame counter with @option{-pg} function profiling.
+Instead call a new mcount_nofp function.
 @end table
 
 These @samp{-m} switches are supported in addition to the above
Index: gcc/target.h
===================================================================
--- gcc/target.h	(revision 141040)
+++ gcc/target.h	(working copy)
@@ -1088,6 +1088,9 @@
    */
   bool arm_eabi_unwinder;
 
+  /* True when the function profiler code is outputted before the prologue. */
+  bool profile_before_prologue;
+
   /* Leave the boolean fields at the end.  */
 };
 
Index: gcc/final.c
===================================================================
--- gcc/final.c	(revision 141040)
+++ gcc/final.c	(working copy)
@@ -1503,10 +1503,8 @@
 
   /* The Sun386i and perhaps other machines don't work right
      if the profiling code comes after the prologue.  */
-#ifdef PROFILE_BEFORE_PROLOGUE
-  if (crtl->profile)
+  if (targetm.profile_before_prologue && crtl->profile)
     profile_function (file);
-#endif /* PROFILE_BEFORE_PROLOGUE */
 
 #if defined (DWARF2_UNWIND_INFO) && defined (HAVE_prologue)
   if (dwarf2out_do_frame ())
@@ -1548,10 +1546,8 @@
 static void
 profile_after_prologue (FILE *file ATTRIBUTE_UNUSED)
 {
-#ifndef PROFILE_BEFORE_PROLOGUE
-  if (crtl->profile)
+  if (!targetm.profile_before_prologue && crtl->profile)
     profile_function (file);
-#endif /* not PROFILE_BEFORE_PROLOGUE */
 }
 
 static void
Index: gcc/gcc.c
===================================================================
--- gcc/gcc.c	(revision 141040)
+++ gcc/gcc.c	(working copy)
@@ -759,6 +759,12 @@
 # define SYSROOT_HEADERS_SUFFIX_SPEC ""
 #endif
 
+/* Target can override this to allow -pg/-fomit-frame-pointer together */
+#ifndef TARGET_PG_OPTION_SPEC
+#define TARGET_PG_OPTION_SPEC \
+"%{pg:%{fomit-frame-pointer:%e-pg and -fomit-frame-pointer are incompatible}}"
+#endif
+
 static const char *asm_debug;
 static const char *cpp_spec = CPP_SPEC;
 static const char *cc1_spec = CC1_SPEC;
@@ -828,8 +834,8 @@
 
 /* NB: This is shared amongst all front-ends.  */
 static const char *cc1_options =
-"%{pg:%{fomit-frame-pointer:%e-pg and -fomit-frame-pointer are incompatible}}\
- %1 %{!Q:-quiet} -dumpbase %B %{d*} %{m*} %{a*}\
+ TARGET_PG_OPTION_SPEC
+" %1 %{!Q:-quiet} -dumpbase %B %{d*} %{m*} %{a*}\
  %{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}%{!c:%{!S:-auxbase %b}}\
  %{g*} %{O*} %{W*&pedantic*} %{w} %{std*&ansi&trigraphs}\
  %{v:-version} %{pg:-p} %{p} %{f*} %{undef}\
Index: gcc/testsuite/gcc.dg/tfp.c
===================================================================
--- gcc/testsuite/gcc.dg/tfp.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tfp.c	(revision 0)
@@ -0,0 +1,11 @@
+/* Origin: Andi Kleen */
+/* { dg-do compile } */
+/* { dg-options "-pg -mmcount-nofp" { target { i?86-*-* x86_64-*-* } } } */
+/* { scan-assembler "mcount_nofp" } */
+
+int f(int a)
+{
+	if (a)
+		f2();
+	return 2;
+}
Index: gcc/target-def.h
===================================================================
--- gcc/target-def.h	(revision 141040)
+++ gcc/target-def.h	(working copy)
@@ -824,6 +824,12 @@
     TARGET_OPTION_CAN_INLINE_P,			\
   }
 
+#ifdef PROFILE_BEFORE_PROLOGUE
+#define TARGET_FUNCTION_PROFILE true
+#else
+#define TARGET_FUNCTION_PROFILE false
+#endif
+
 /* The whole shebang.  */
 #define TARGET_INITIALIZER			\
 {						\
@@ -935,7 +941,8 @@
   TARGET_HANDLE_PRAGMA_REDEFINE_EXTNAME,	\
   TARGET_HANDLE_PRAGMA_EXTERN_PREFIX,		\
   TARGET_RELAXED_ORDERING,			\
-  TARGET_ARM_EABI_UNWINDER			\
+  TARGET_ARM_EABI_UNWINDER,			\
+  TARGET_FUNCTION_PROFILE			\
 }
 
 #define TARGET_HANDLE_C_OPTION default_handle_c_option
Index: gcc/config/i386/i386.h
===================================================================
--- gcc/config/i386/i386.h	(revision 141040)
+++ gcc/config/i386/i386.h	(working copy)
@@ -2508,6 +2508,9 @@
 #undef TARG_COND_NOT_TAKEN_BRANCH_COST
 #define TARG_COND_NOT_TAKEN_BRANCH_COST ix86_cost->cond_not_taken_branch_cost
 
+/* Allow -pg with -fomit-frame-pointer */
+#define TARGET_PG_OPTION_SPEC ""
+
 /*
 Local variables:
 version-control: t
Index: gcc/config/i386/i386.opt
===================================================================
--- gcc/config/i386/i386.opt	(revision 141040)
+++ gcc/config/i386/i386.opt	(working copy)
@@ -346,3 +346,7 @@
 msse2avx
 Target Report Var(ix86_sse2avx)
 Encode SSE instructions with VEX prefix
+
+mmcount-nofp
+Target Report Var(ix86_mcount_nofp)
+Support function profiling without frame pointer
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 141040)
+++ gcc/config/i386/i386.c	(working copy)
@@ -3267,6 +3267,9 @@
     target_flags |= MASK_CLD & ~target_flags_explicit;
 #endif
 
+  if (flag_omit_frame_pointer && profile_flag && ix86_mcount_nofp)
+    targetm.profile_before_prologue = true;
+
   /* Save the initial options in case the user does function specific options */
   if (main_args_p)
     target_option_default_node = target_option_current_node
@@ -7131,7 +7134,7 @@
 	  || ix86_current_function_calls_tls_descriptor))
     return 1;
 
-  if (crtl->profile)
+  if (crtl->profile && targetm.profile_before_prologue && !ix86_mcount_nofp)
     return 1;
 
   return 0;
@@ -26114,6 +26117,11 @@
 void
 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 {
+  const char *add = "";
+
+  if (targetm.profile_before_prologue && ix86_mcount_nofp)
+    add = "_nofp";
+
   if (TARGET_64BIT)
     {
 #ifndef NO_PROFILE_COUNTERS
@@ -26121,9 +26129,9 @@
 #endif
 
       if (DEFAULT_ABI == SYSV_ABI && flag_pic)
-	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
+	fprintf (file, "\tcall\t*%s%s@GOTPCREL(%%rip)\n", MCOUNT_NAME, add);
       else
-	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
+	fprintf (file, "\tcall\t%s%s\n", MCOUNT_NAME, add);
     }
   else if (flag_pic)
     {
@@ -26131,7 +26139,7 @@
       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
 	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
 #endif
-      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
+      fprintf (file, "\tcall\t*%s%s@GOT(%%ebx)\n", MCOUNT_NAME, add);
     }
   else
     {
@@ -26139,7 +26147,7 @@
       fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
 	       PROFILE_COUNT_REGISTER);
 #endif
-      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
+      fprintf (file, "\tcall\t%s%s\n", MCOUNT_NAME, add);
     }
 }
 
-- 
ak@linux.intel.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]