This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PING] [PATCH] Add direct support for Linux kernel __fentry__ patching


Andi Kleen <andi@firstfloor.org> writes:

Ping!

> Andi Kleen <andi@firstfloor.org> writes:
>
> Ping!
>
>> From: Andi Kleen <ak@linux.intel.com>
>>
>> The Linux kernel dynamically patches in __fentry__ calls in and
>> out at runtime. This allows using function tracing for debugging
>> in production kernels without (significant) performance penalty.
>>
>> For this it needs a table pointing to each __fentry__ call.
>>
>> The way it is currently implemented is that a special
>> perl script scans the object file, generates the table in a special
>> section. When the kernel boots up it nops the calls, and
>> then later patches in the calls again as needed.
>>
>> The recordmcount.pl script in the kernel works, but it seems
>> cleaner and faster to support the code generation of the patch table
>> directly in gcc.
>>
>> This also allows to nop the calls directly at code generation
>> time, which allows to skip a patching step at kernel boot.
>> I also expect that a patchable production tracing facility is also useful
>> for other applications.
>>
>> For example it could be used in ftracer
>> (https://github.com/andikleen/ftracer)
>>
>> Having a nop area at the beginning of each function can be also
>> also useful for other things. For example it can be used to patch
>> functions at runtime to point to different functions, to do
>> binary updates without restarting the program (like ksplice or
>> similar)
>>
>> This patch implements two new options for the i386 target:
>>
>> -mrecord-mcount
>> Generate a __mcount_loc section entry for each __fentry__ or mcount
>> call. The section is compatible with the kernel convention
>> and the data is put into a section loaded at runtime.
>>
>> -mnop-mcount
>> Generate the mcount/__fentry__ call as 5 byte nop that can be
>> patched in later. The nop is generated as a single instruction,
>> as the Linux kernel run time patching relies on this.
>>
>> Limitations:
>> - I didn't implement -mnop-mcount for -fPIC. This
>> would need a good single instruction 6 byte NOP and it seems a
>> bit pointless, as the patching would prevent text sharing.
>> - I didn't implement noping for targets that pass a variable
>> to mcount.
>> - The facility could be useful on architectures too. Currently
>> the mcount code is target specific, so I made it a i386 option.
>>
>> Passes bootstrap and testing on x86_64-linux.
>>
>> Cc: rostedt@goodmis.org
>>
>> gcc/:
>>
>> 2014-09-01  Andi Kleen  <ak@linux.intel.com>
>>
>> 	* config/i386/i386.c (x86_print_call_or_nop): New function.
>> 	(x86_function_profiler): Support -mnop-mcount and
>> 	-mrecord-mcount.
>> 	* config/i386/i386.opt (-mnop-mcount, -mrecord-mcount): Add
>> 	* doc/invoke.texi: Document -mnop-mcount, -mrecord-mcount
>> 	* testsuite/gcc/gcc.target/i386/nop-mcount.c: New file.
>> 	* testsuite/gcc/gcc.target/i386/record-mcount.c: New file.
>> ---
>>  gcc/config/i386/i386.c                        | 34 +++++++++++++++++++++++----
>>  gcc/config/i386/i386.opt                      |  9 +++++++
>>  gcc/doc/invoke.texi                           | 17 +++++++++++++-
>>  gcc/testsuite/gcc.target/i386/nop-mcount.c    | 24 +++++++++++++++++++
>>  gcc/testsuite/gcc.target/i386/record-mcount.c | 24 +++++++++++++++++++
>>  5 files changed, 102 insertions(+), 6 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/i386/nop-mcount.c
>>  create mode 100644 gcc/testsuite/gcc.target/i386/record-mcount.c
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index 61b33782..a651aa1 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -3974,6 +3974,13 @@ ix86_option_override_internal (bool main_args_p,
>>  	}
>>      }
>>  
>> +#ifndef NO_PROFILE_COUNTERS
>> +  if (flag_nop_mcount)
>> +    error ("-mnop-mcount is not compatible with this target");
>> +#endif
>> +  if (flag_nop_mcount && flag_pic)
>> +    error ("-mnop-mcount is not implemented for -fPIC");
>> +
>>    /* Accept -msseregparm only if at least SSE support is enabled.  */
>>    if (TARGET_SSEREGPARM_P (opts->x_target_flags)
>>        && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
>> @@ -39042,6 +39049,17 @@ x86_field_alignment (tree field, int computed)
>>    return computed;
>>  }
>>  
>> +/* Print call to TARGET to FILE.  */
>> +
>> +static void
>> +x86_print_call_or_nop (FILE *file, const char *target)
>> +{
>> +  if (flag_nop_mcount)
>> +    fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop.  */
>> +  else
>> +    fprintf (file, "1:\tcall\t%s\n", target);
>> +}
>> +
>>  /* Output assembler code to FILE to increment profiler label # LABELNO
>>     for profiling a function entry.  */
>>  void
>> @@ -39049,7 +39067,6 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
>>  {
>>    const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
>>  					 : MCOUNT_NAME);
>> -
>>    if (TARGET_64BIT)
>>      {
>>  #ifndef NO_PROFILE_COUNTERS
>> @@ -39057,9 +39074,9 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
>>  #endif
>>  
>>        if (!TARGET_PECOFF && flag_pic)
>> -	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
>> +	fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
>>        else
>> -	fprintf (file, "\tcall\t%s\n", mcount_name);
>> +	x86_print_call_or_nop (file, mcount_name);
>>      }
>>    else if (flag_pic)
>>      {
>> @@ -39067,7 +39084,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
>>        fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
>>  	       LPREFIX, labelno);
>>  #endif
>> -      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
>> +      fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
>>      }
>>    else
>>      {
>> @@ -39075,7 +39092,14 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
>>        fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
>>  	       LPREFIX, labelno);
>>  #endif
>> -      fprintf (file, "\tcall\t%s\n", mcount_name);
>> +      x86_print_call_or_nop (file, mcount_name);
>> +    }
>> +
>> +  if (flag_record_mcount)
>> +    {
>> +      fprintf (file, "\t.section __mcount_loc, \"r\"\n");
>> +      fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
>> +      fprintf (file, "\t.previous\n");
>>      }
>>  }
>>  
>> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
>> index 9208b76..acf6b37 100644
>> --- a/gcc/config/i386/i386.opt
>> +++ b/gcc/config/i386/i386.opt
>> @@ -789,6 +789,15 @@ mfentry
>>  Target Report Var(flag_fentry) Init(-1)
>>  Emit profiling counter call at function entry before prologue.
>>  
>> +mrecord-mcount
>> +Target Report Var(flag_record_mcount) Init(0)
>> +Generate __mcount_loc section with all mcount or __fentry__ calls.
>> +
>> +mnop-mcount
>> +Target Report Var(flag_nop_mcount) Init(0)
>> +Generate mcount/__fentry__ calls as nops. To activate they need to be
>> +patched in.
>> +
>>  m8bit-idiv
>>  Target Report Mask(USE_8BIT_IDIV) Save
>>  Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index d15d4a9..43fd6b7 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -691,7 +691,7 @@ Objective-C and Objective-C++ Dialects}.
>>  -momit-leaf-frame-pointer  -mno-red-zone -mno-tls-direct-seg-refs @gol
>>  -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol
>>  -m32 -m64 -mx32 -m16 -mlarge-data-threshold=@var{num} @gol
>> --msse2avx -mfentry -m8bit-idiv @gol
>> +-msse2avx -mfentry -mrecord-mcount -mnop-mcount -m8bit-idiv @gol
>>  -mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol
>>  -mstack-protector-guard=@var{guard}}
>>  
>> @@ -15954,6 +15954,21 @@ counter call before the prologue.
>>  Note: On x86 architectures the attribute @code{ms_hook_prologue}
>>  isn't possible at the moment for @option{-mfentry} and @option{-pg}.
>>  
>> +@item -mrecord-mcount
>> +@itemx -mno-record-mcount
>> +@opindex mrecord-mcount
>> +If profiling is active (@option{-pg}), generate a __mcount_loc section
>> +that contains pointers to each profiling call. This is useful for
>> +automatically patching and out calls.
>> +
>> +@item -mnop-mcount
>> +@itemx -mno-nop-mcount
>> +@opindex mnop-mcount
>> +If profiling is active (@option{-pg}), generate the calls to
>> +the profiling functions as nops. This is useful when they
>> +should be patched in later dynamically. This is likely only
>> +useful together with @option{-mrecord-mcount}.
>> +
>>  @item -m8bit-idiv
>>  @itemx -mno-8bit-idiv
>>  @opindex 8bit-idiv
>> diff --git a/gcc/testsuite/gcc.target/i386/nop-mcount.c b/gcc/testsuite/gcc.target/i386/nop-mcount.c
>> new file mode 100644
>> index 0000000..2592231
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/nop-mcount.c
>> @@ -0,0 +1,24 @@
>> +/* Test -mnop-mcount */
>> +/* { dg-do compile } */
>> +/* { dg-options "-pg -mfentry -mrecord-mcount -mnop-mcount" } */
>> +/* { dg-final { scan-assembler-not "__fentry__" } } */
>> +/* Origin: Andi Kleen */
>> +extern void foobar(char *);
>> +
>> +void func(void)
>> +{
>> +  foobar ("Hello world\n");
>> +}
>> +
>> +void func2(void)
>> +{
>> +  int i;
>> +  for (i = 0; i < 10; i++)
>> +    foobar ("Hello world");
>> +}
>> +
>> +void func3(a)
>> +char *a;
>> +{
>> +  foobar("Hello world");
>> +}
>> diff --git a/gcc/testsuite/gcc.target/i386/record-mcount.c b/gcc/testsuite/gcc.target/i386/record-mcount.c
>> new file mode 100644
>> index 0000000..dae413e
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/record-mcount.c
>> @@ -0,0 +1,24 @@
>> +/* Test -mrecord-mcount */
>> +/* { dg-do compile } */
>> +/* { dg-options "-pg -mrecord-mcount" } */
>> +/* { dg-final { scan-assembler "mcount_loc" } } */
>> +/* Origin: Andi Kleen */
>> +extern void foobar(char *);
>> +
>> +void func(void)
>> +{
>> +  foobar ("Hello world\n");
>> +}
>> +
>> +void func2(void)
>> +{
>> +  int i;
>> +  for (i = 0; i < 10; i++)
>> +    foobar ("Hello world");
>> +}
>> +
>> +void func3(a)
>> +char *a;
>> +{
>> +  foobar("Hello world");
>> +}

-- 
ak@linux.intel.com -- Speaking for myself only


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]