This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Make function clone name numbering independent.


On 2018-07-26 01:27 PM, Michael Ploujnikov wrote:
> On 2018-07-24 09:57 AM, Michael Ploujnikov wrote:
>> On 2018-07-20 06:05 AM, Richard Biener wrote:
>>>>  /* Return a new assembler name for a clone with SUFFIX of a decl named
>>>>     NAME.  */
>>>> @@ -521,14 +521,13 @@ tree
>>>>  clone_function_name_1 (const char *name, const char *suffix)
>>>
>>> pass this function the counter to use....
>>>
>>>>  {
>>>>    size_t len = strlen (name);
>>>> -  char *tmp_name, *prefix;
>>>> +  char *prefix;
>>>>
>>>>    prefix = XALLOCAVEC (char, len + strlen (suffix) + 2);
>>>>    memcpy (prefix, name, len);
>>>>    strcpy (prefix + len + 1, suffix);
>>>>    prefix[len] = symbol_table::symbol_suffix_separator ();
>>>> -  ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++);
>>>
>>> and keep using ASM_FORMAT_PRIVATE_NAME here.  You need to change
>>> the lto/lto-partition.c caller (just use zero as counter).
>>>
>>>> -  return get_identifier (tmp_name);
>>>> +  return get_identifier (prefix);
>>>>  }
>>>>
>>>>  /* Return a new assembler name for a clone of DECL with SUFFIX.  */
>>>> @@ -537,7 +536,17 @@ tree
>>>>  clone_function_name (tree decl, const char *suffix)
>>>>  {
>>>>    tree name = DECL_ASSEMBLER_NAME (decl);
>>>> -  return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
>>>> +  const char *decl_name = IDENTIFIER_POINTER (name);
>>>> +  char *numbered_name;
>>>> +  unsigned int *suffix_counter;
>>>> +  if (!clone_fn_ids) {
>>>> +    /* Initialize the per-function counter hash table if this is the first call */
>>>> +    clone_fn_ids = hash_map<const char *, unsigned>::create_ggc (64);
>>>> +  }
>>>
>>> I still do not like throwing memory at the problem in this way for the
>>> little benefit
>>> this change provides :/
>>>
>>> So no approval from me at this point...
>>>
>>> Richard.
>>
>> Can you give me an idea of the memory constraints that are involved?
>>
>> The highest memory usage increase that I could find was when compiling
>> a source file (from Linux) with roughly 10,000 functions. It showed a 2kB
>> increase over the before-patch use of 6936kB which is barely 0.03%.
>>
>> Using a single counter can result in more confusing namespacing when
>> you have .bar.clone.4 despite there only being 3 clones of .bar.
>>
>> From a practical point of view this change is helpful to anyone
>> diffing binary output such as forensic analysts, Debian Reproducible
>> Builds or even someone validating compiler output (before and after an input
>> source patch). The extra changes that this patch alleviates are a
>> distraction and could even be misleading. For example, applying a
>> source patch to the same Linux source produces the following binary
>> diff before my change:
>>
>> --- /tmp/output.o.objdump
>> +++ /tmp/patched-output.o.objdump
>> @@ -1,5 +1,5 @@
>>
>> -/tmp/uverbs_cmd/output.o:     file format elf32-i386
>> +/tmp/uverbs_cmd/patched-output.o:     file format elf32-i386
>>
>>
>>  Disassembly of section .text.get_order:
>> @@ -265,12 +265,12 @@
>>     3:	e9 fc ff ff ff       	jmp    4 <put_cq_read+0x4>
>>  			4: R_386_PC32	.text.put_uobj_read
>>
>> -Disassembly of section .text.trace_kmalloc.constprop.3:
>> +Disassembly of section .text.trace_kmalloc.constprop.4:
>>
>> -00000000 <trace_kmalloc.constprop.3>:
>> +00000000 <trace_kmalloc.constprop.4>:
>>     0:	83 3d 04 00 00 00 00 	cmpl   $0x0,0x4
>>  			2: R_386_32	__tracepoint_kmalloc
>> -   7:	74 34                	je     3d <trace_kmalloc.constprop.3+0x3d>
>> +   7:	74 34                	je     3d <trace_kmalloc.constprop.4+0x3d>
>>     9:	55                   	push   %ebp
>>     a:	89 cd                	mov    %ecx,%ebp
>>     c:	57                   	push   %edi
>> @@ -281,7 +281,7 @@
>>    13:	8b 1d 10 00 00 00    	mov    0x10,%ebx
>>  			15: R_386_32	__tracepoint_kmalloc
>>    19:	85 db                	test   %ebx,%ebx
>> -  1b:	74 1b                	je     38 <trace_kmalloc.constprop.3+0x38>
>> +  1b:	74 1b                	je     38 <trace_kmalloc.constprop.4+0x38>
>>    1d:	68 d0 00 00 00       	push   $0xd0
>>    22:	89 fa                	mov    %edi,%edx
>>    24:	89 f0                	mov    %esi,%eax
>> @@ -292,7 +292,7 @@
>>    31:	58                   	pop    %eax
>>    32:	83 3b 00             	cmpl   $0x0,(%ebx)
>>    35:	5a                   	pop    %edx
>> -  36:	eb e3                	jmp    1b <trace_kmalloc.constprop.3+0x1b>
>> +  36:	eb e3                	jmp    1b <trace_kmalloc.constprop.4+0x1b>
>>    38:	5b                   	pop    %ebx
>>    39:	5e                   	pop    %esi
>>    3a:	5f                   	pop    %edi
>> @@ -846,7 +846,7 @@
>>    78:	b8 5f 00 00 00       	mov    $0x5f,%eax
>>  			79: R_386_32	.text.ib_uverbs_alloc_pd
>>    7d:	e8 fc ff ff ff       	call   7e <ib_uverbs_alloc_pd+0x7e>
>> -			7e: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			7e: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    82:	c7 45 d4 f4 ff ff ff 	movl   $0xfffffff4,-0x2c(%ebp)
>>    89:	59                   	pop    %ecx
>>    8a:	85 db                	test   %ebx,%ebx
>> @@ -1068,7 +1068,7 @@
>>    9c:	b8 83 00 00 00       	mov    $0x83,%eax
>>  			9d: R_386_32	.text.ib_uverbs_reg_mr
>>    a1:	e8 fc ff ff ff       	call   a2 <ib_uverbs_reg_mr+0xa2>
>> -			a2: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			a2: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    a6:	ba f4 ff ff ff       	mov    $0xfffffff4,%edx
>>    ab:	58                   	pop    %eax
>>    ac:	85 db                	test   %ebx,%ebx
>> @@ -1385,7 +1385,7 @@
>>    99:	b8 7b 00 00 00       	mov    $0x7b,%eax
>>  			9a: R_386_32	.text.ib_uverbs_create_cq
>>    9e:	e8 fc ff ff ff       	call   9f <ib_uverbs_create_cq+0x9f>
>> -			9f: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			9f: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    a3:	58                   	pop    %eax
>>    a4:	85 db                	test   %ebx,%ebx
>>    a6:	75 0a                	jne    b2 <ib_uverbs_create_cq+0xb2>
>> @@ -1607,129 +1607,107 @@
>>  00000000 <ib_uverbs_poll_cq>:
>>     0:	55                   	push   %ebp
>>     1:	57                   	push   %edi
>> -   2:	89 c7                	mov    %eax,%edi
>> -   4:	56                   	push   %esi
>> -   5:	89 ce                	mov    %ecx,%esi
>> -   7:	b9 10 00 00 00       	mov    $0x10,%ecx
>> -   c:	53                   	push   %ebx
>> -   d:	83 ec 18             	sub    $0x18,%esp
>> -  10:	8d 44 24 08          	lea    0x8(%esp),%eax
>> -  14:	e8 fc ff ff ff       	call   15 <ib_uverbs_poll_cq+0x15>
>> -			15: R_386_PC32	copy_from_user
>> -  19:	85 c0                	test   %eax,%eax
>> -  1b:	0f 85 34 01 00 00    	jne    155 <ib_uverbs_poll_cq+0x155>
>> -  21:	6b 44 24 14 34       	imul   $0x34,0x14(%esp),%eax
>> -  26:	ba d0 00 00 00       	mov    $0xd0,%edx
>> -  2b:	e8 fc ff ff ff       	call   2c <ib_uverbs_poll_cq+0x2c>
>> -			2c: R_386_PC32	__kmalloc
>> -  30:	89 c5                	mov    %eax,%ebp
>> -  32:	85 c0                	test   %eax,%eax
>> -  34:	0f 84 22 01 00 00    	je     15c <ib_uverbs_poll_cq+0x15c>
>> -  3a:	6b 44 24 14 30       	imul   $0x30,0x14(%esp),%eax
>> -  3f:	ba d0 00 00 00       	mov    $0xd0,%edx
>> -  44:	83 c0 08             	add    $0x8,%eax
>> -  47:	89 44 24 04          	mov    %eax,0x4(%esp)
>> -  4b:	e8 fc ff ff ff       	call   4c <ib_uverbs_poll_cq+0x4c>
>> -			4c: R_386_PC32	__kmalloc
>> -  50:	ba f4 ff ff ff       	mov    $0xfffffff4,%edx
>> -  55:	89 04 24             	mov    %eax,(%esp)
>> -  58:	85 c0                	test   %eax,%eax
>> -  5a:	0f 84 e1 00 00 00    	je     141 <ib_uverbs_poll_cq+0x141>
>> -  60:	8b 4f 58             	mov    0x58(%edi),%ecx
>> -  63:	6a 00                	push   $0x0
>> -  65:	b8 00 00 00 00       	mov    $0x0,%eax
>> -			66: R_386_32	ib_uverbs_cq_idr
>> -  6a:	8b 54 24 14          	mov    0x14(%esp),%edx
>> -  6e:	e8 fc ff ff ff       	call   6f <ib_uverbs_poll_cq+0x6f>
>> -			6f: R_386_PC32	.text.idr_read_obj
>> -  73:	ba ea ff ff ff       	mov    $0xffffffea,%edx
>> -  78:	89 c7                	mov    %eax,%edi
>> -  7a:	58                   	pop    %eax
>> -  7b:	85 ff                	test   %edi,%edi
>> -  7d:	0f 84 ae 00 00 00    	je     131 <ib_uverbs_poll_cq+0x131>
>> -  83:	8b 1f                	mov    (%edi),%ebx
>> -  85:	8b 54 24 14          	mov    0x14(%esp),%edx
>> -  89:	89 e9                	mov    %ebp,%ecx
>> -  8b:	89 f8                	mov    %edi,%eax
>> -  8d:	ff 93 70 01 00 00    	call   *0x170(%ebx)
>> -  93:	8b 1c 24             	mov    (%esp),%ebx
>> -  96:	89 03                	mov    %eax,(%ebx)
>> -  98:	89 f8                	mov    %edi,%eax
>> -  9a:	e8 fc ff ff ff       	call   9b <ib_uverbs_poll_cq+0x9b>
>> -			9b: R_386_PC32	.text.put_cq_read
>> -  9f:	8b 1c 24             	mov    (%esp),%ebx
>> -  a2:	89 e8                	mov    %ebp,%eax
>> -  a4:	6b 3b 34             	imul   $0x34,(%ebx),%edi
>> -  a7:	8d 53 08             	lea    0x8(%ebx),%edx
>> -  aa:	01 ef                	add    %ebp,%edi
>> -  ac:	39 f8                	cmp    %edi,%eax
>> -  ae:	74 67                	je     117 <ib_uverbs_poll_cq+0x117>
>> -  b0:	8b 08                	mov    (%eax),%ecx
>> -  b2:	8b 58 04             	mov    0x4(%eax),%ebx
>> -  b5:	83 c2 30             	add    $0x30,%edx
>> -  b8:	83 c0 34             	add    $0x34,%eax
>> -  bb:	89 4a d0             	mov    %ecx,-0x30(%edx)
>> -  be:	89 5a d4             	mov    %ebx,-0x2c(%edx)
>> -  c1:	8b 48 d4             	mov    -0x2c(%eax),%ecx
>> -  c4:	89 4a d8             	mov    %ecx,-0x28(%edx)
>> -  c7:	8b 48 d8             	mov    -0x28(%eax),%ecx
>> -  ca:	89 4a dc             	mov    %ecx,-0x24(%edx)
>> -  cd:	8b 48 dc             	mov    -0x24(%eax),%ecx
>> -  d0:	89 4a e0             	mov    %ecx,-0x20(%edx)
>> -  d3:	8b 48 e0             	mov    -0x20(%eax),%ecx
>> -  d6:	89 4a e4             	mov    %ecx,-0x1c(%edx)
>> -  d9:	8b 48 e8             	mov    -0x18(%eax),%ecx
>> -  dc:	89 4a e8             	mov    %ecx,-0x18(%edx)
>> -  df:	8b 48 e4             	mov    -0x1c(%eax),%ecx
>> -  e2:	8b 49 20             	mov    0x20(%ecx),%ecx
>> -  e5:	89 4a ec             	mov    %ecx,-0x14(%edx)
>> -  e8:	8b 48 ec             	mov    -0x14(%eax),%ecx
>> -  eb:	89 4a f0             	mov    %ecx,-0x10(%edx)
>> -  ee:	8b 48 f0             	mov    -0x10(%eax),%ecx
>> -  f1:	89 4a f4             	mov    %ecx,-0xc(%edx)
>> -  f4:	8b 48 f4             	mov    -0xc(%eax),%ecx
>> -  f7:	66 89 4a f8          	mov    %cx,-0x8(%edx)
>> -  fb:	66 8b 48 f6          	mov    -0xa(%eax),%cx
>> -  ff:	66 89 4a fa          	mov    %cx,-0x6(%edx)
>> - 103:	8a 48 f8             	mov    -0x8(%eax),%cl
>> - 106:	88 4a fc             	mov    %cl,-0x4(%edx)
>> - 109:	8a 48 f9             	mov    -0x7(%eax),%cl
>> - 10c:	88 4a fd             	mov    %cl,-0x3(%edx)
>> - 10f:	8a 48 fa             	mov    -0x6(%eax),%cl
>> - 112:	88 4a fe             	mov    %cl,-0x2(%edx)
>> - 115:	eb 95                	jmp    ac <ib_uverbs_poll_cq+0xac>
>> - 117:	8b 14 24             	mov    (%esp),%edx
>> - 11a:	8b 4c 24 04          	mov    0x4(%esp),%ecx
>> - 11e:	8b 44 24 08          	mov    0x8(%esp),%eax
>> - 122:	e8 fc ff ff ff       	call   123 <ib_uverbs_poll_cq+0x123>
>> -			123: R_386_PC32	copy_to_user
>> - 127:	83 f8 01             	cmp    $0x1,%eax
>> - 12a:	19 d2                	sbb    %edx,%edx
>> - 12c:	f7 d2                	not    %edx
>> - 12e:	83 e2 f2             	and    $0xfffffff2,%edx
>> - 131:	8b 04 24             	mov    (%esp),%eax
>> - 134:	89 54 24 04          	mov    %edx,0x4(%esp)
>> - 138:	e8 fc ff ff ff       	call   139 <ib_uverbs_poll_cq+0x139>
>> -			139: R_386_PC32	kfree
>> - 13d:	8b 54 24 04          	mov    0x4(%esp),%edx
>> - 141:	89 e8                	mov    %ebp,%eax
>> - 143:	89 14 24             	mov    %edx,(%esp)
>> - 146:	e8 fc ff ff ff       	call   147 <ib_uverbs_poll_cq+0x147>
>> -			147: R_386_PC32	kfree
>> - 14b:	8b 14 24             	mov    (%esp),%edx
>> - 14e:	85 d2                	test   %edx,%edx
>> - 150:	0f 45 f2             	cmovne %edx,%esi
>> - 153:	eb 0c                	jmp    161 <ib_uverbs_poll_cq+0x161>
>> - 155:	be f2 ff ff ff       	mov    $0xfffffff2,%esi
>> - 15a:	eb 05                	jmp    161 <ib_uverbs_poll_cq+0x161>
>> - 15c:	be f4 ff ff ff       	mov    $0xfffffff4,%esi
>> - 161:	83 c4 18             	add    $0x18,%esp
>> - 164:	89 f0                	mov    %esi,%eax
>> - 166:	5b                   	pop    %ebx
>> - 167:	5e                   	pop    %esi
>> - 168:	5f                   	pop    %edi
>> - 169:	5d                   	pop    %ebp
>> - 16a:	c3                   	ret
>> +   2:	bf f2 ff ff ff       	mov    $0xfffffff2,%edi
>> +   7:	56                   	push   %esi
>> +   8:	53                   	push   %ebx
>> +   9:	89 c3                	mov    %eax,%ebx
>> +   b:	81 ec 84 00 00 00    	sub    $0x84,%esp
>> +  11:	89 4c 24 04          	mov    %ecx,0x4(%esp)
>> +  15:	8d 44 24 10          	lea    0x10(%esp),%eax
>> +  19:	b9 10 00 00 00       	mov    $0x10,%ecx
>> +  1e:	e8 fc ff ff ff       	call   1f <ib_uverbs_poll_cq+0x1f>
>> +			1f: R_386_PC32	copy_from_user
>> +  23:	89 04 24             	mov    %eax,(%esp)
>> +  26:	85 c0                	test   %eax,%eax
>> +  28:	0f 85 1f 01 00 00    	jne    14d <ib_uverbs_poll_cq+0x14d>
>> +  2e:	8b 4b 58             	mov    0x58(%ebx),%ecx
>> +  31:	6a 00                	push   $0x0
>> +  33:	b8 00 00 00 00       	mov    $0x0,%eax
>> +			34: R_386_32	ib_uverbs_cq_idr
>> +  38:	bf ea ff ff ff       	mov    $0xffffffea,%edi
>> +  3d:	8b 54 24 1c          	mov    0x1c(%esp),%edx
>> +  41:	e8 fc ff ff ff       	call   42 <ib_uverbs_poll_cq+0x42>
>> +			42: R_386_PC32	.text.idr_read_obj
>> +  46:	89 c3                	mov    %eax,%ebx
>> +  48:	58                   	pop    %eax
>> +  49:	85 db                	test   %ebx,%ebx
>> +  4b:	0f 84 fc 00 00 00    	je     14d <ib_uverbs_poll_cq+0x14d>
>> +  51:	8b 6c 24 10          	mov    0x10(%esp),%ebp
>> +  55:	b9 02 00 00 00       	mov    $0x2,%ecx
>> +  5a:	8d 7c 24 08          	lea    0x8(%esp),%edi
>> +  5e:	8b 04 24             	mov    (%esp),%eax
>> +  61:	8d 75 08             	lea    0x8(%ebp),%esi
>> +  64:	f3 ab                	rep stos %eax,%es:(%edi)
>> +  66:	8b 44 24 1c          	mov    0x1c(%esp),%eax
>> +  6a:	39 44 24 08          	cmp    %eax,0x8(%esp)
>> +  6e:	73 1f                	jae    8f <ib_uverbs_poll_cq+0x8f>
>> +  70:	8b 3b                	mov    (%ebx),%edi
>> +  72:	8d 4c 24 50          	lea    0x50(%esp),%ecx
>> +  76:	ba 01 00 00 00       	mov    $0x1,%edx
>> +  7b:	89 d8                	mov    %ebx,%eax
>> +  7d:	ff 97 70 01 00 00    	call   *0x170(%edi)
>> +  83:	89 c7                	mov    %eax,%edi
>> +  85:	85 c0                	test   %eax,%eax
>> +  87:	0f 88 b9 00 00 00    	js     146 <ib_uverbs_poll_cq+0x146>
>> +  8d:	75 21                	jne    b0 <ib_uverbs_poll_cq+0xb0>
>> +  8f:	b9 08 00 00 00       	mov    $0x8,%ecx
>> +  94:	8d 54 24 08          	lea    0x8(%esp),%edx
>> +  98:	89 e8                	mov    %ebp,%eax
>> +  9a:	bf f2 ff ff ff       	mov    $0xfffffff2,%edi
>> +  9f:	e8 fc ff ff ff       	call   a0 <ib_uverbs_poll_cq+0xa0>
>> +			a0: R_386_PC32	copy_to_user
>> +  a4:	85 c0                	test   %eax,%eax
>> +  a6:	0f 44 7c 24 04       	cmove  0x4(%esp),%edi
>> +  ab:	e9 96 00 00 00       	jmp    146 <ib_uverbs_poll_cq+0x146>
>> +  b0:	8b 44 24 50          	mov    0x50(%esp),%eax
>> +  b4:	8b 54 24 54          	mov    0x54(%esp),%edx
>> +  b8:	b9 30 00 00 00       	mov    $0x30,%ecx
>> +  bd:	89 44 24 20          	mov    %eax,0x20(%esp)
>> +  c1:	8b 44 24 58          	mov    0x58(%esp),%eax
>> +  c5:	89 54 24 24          	mov    %edx,0x24(%esp)
>> +  c9:	8b 54 24 74          	mov    0x74(%esp),%edx
>> +  cd:	89 44 24 28          	mov    %eax,0x28(%esp)
>> +  d1:	8b 44 24 5c          	mov    0x5c(%esp),%eax
>> +  d5:	89 44 24 2c          	mov    %eax,0x2c(%esp)
>> +  d9:	8b 44 24 60          	mov    0x60(%esp),%eax
>> +  dd:	89 44 24 30          	mov    %eax,0x30(%esp)
>> +  e1:	8b 44 24 64          	mov    0x64(%esp),%eax
>> +  e5:	89 44 24 34          	mov    %eax,0x34(%esp)
>> +  e9:	8b 44 24 6c          	mov    0x6c(%esp),%eax
>> +  ed:	89 44 24 38          	mov    %eax,0x38(%esp)
>> +  f1:	8b 44 24 68          	mov    0x68(%esp),%eax
>> +  f5:	8b 40 20             	mov    0x20(%eax),%eax
>> +  f8:	89 54 24 44          	mov    %edx,0x44(%esp)
>> +  fc:	8d 54 24 20          	lea    0x20(%esp),%edx
>> + 100:	c6 44 24 4f 00       	movb   $0x0,0x4f(%esp)
>> + 105:	89 44 24 3c          	mov    %eax,0x3c(%esp)
>> + 109:	8b 44 24 70          	mov    0x70(%esp),%eax
>> + 10d:	89 44 24 40          	mov    %eax,0x40(%esp)
>> + 111:	8b 44 24 78          	mov    0x78(%esp),%eax
>> + 115:	89 44 24 48          	mov    %eax,0x48(%esp)
>> + 119:	8b 44 24 7c          	mov    0x7c(%esp),%eax
>> + 11d:	66 89 44 24 4c       	mov    %ax,0x4c(%esp)
>> + 122:	8a 44 24 7e          	mov    0x7e(%esp),%al
>> + 126:	88 44 24 4e          	mov    %al,0x4e(%esp)
>> + 12a:	89 f0                	mov    %esi,%eax
>> + 12c:	e8 fc ff ff ff       	call   12d <ib_uverbs_poll_cq+0x12d>
>> +			12d: R_386_PC32	copy_to_user
>> + 131:	85 c0                	test   %eax,%eax
>> + 133:	75 0c                	jne    141 <ib_uverbs_poll_cq+0x141>
>> + 135:	83 c6 30             	add    $0x30,%esi
>> + 138:	ff 44 24 08          	incl   0x8(%esp)
>> + 13c:	e9 25 ff ff ff       	jmp    66 <ib_uverbs_poll_cq+0x66>
>> + 141:	bf f2 ff ff ff       	mov    $0xfffffff2,%edi
>> + 146:	89 d8                	mov    %ebx,%eax
>> + 148:	e8 fc ff ff ff       	call   149 <ib_uverbs_poll_cq+0x149>
>> +			149: R_386_PC32	.text.put_cq_read
>> + 14d:	81 c4 84 00 00 00    	add    $0x84,%esp
>> + 153:	89 f8                	mov    %edi,%eax
>> + 155:	5b                   	pop    %ebx
>> + 156:	5e                   	pop    %esi
>> + 157:	5f                   	pop    %edi
>> + 158:	5d                   	pop    %ebp
>> + 159:	c3                   	ret
>>
>>  Disassembly of section .text.ib_uverbs_req_notify_cq:
>>
>> @@ -1915,7 +1893,7 @@
>>    94:	b8 7b 00 00 00       	mov    $0x7b,%eax
>>  			95: R_386_32	.text.ib_uverbs_create_qp
>>    99:	e8 fc ff ff ff       	call   9a <ib_uverbs_create_qp+0x9a>
>> -			9a: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			9a: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    9e:	59                   	pop    %ecx
>>    9f:	c7 85 50 ff ff ff f4 	movl   $0xfffffff4,-0xb0(%ebp)
>>    a6:	ff ff ff
>> @@ -2241,7 +2219,7 @@
>>    68:	b8 4f 00 00 00       	mov    $0x4f,%eax
>>  			69: R_386_32	.text.ib_uverbs_query_qp
>>    6d:	e8 fc ff ff ff       	call   6e <ib_uverbs_query_qp+0x6e>
>> -			6e: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			6e: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    72:	59                   	pop    %ecx
>>    73:	c7 85 5c ff ff ff 10 	movl   $0x10,-0xa4(%ebp)
>>    7a:	00 00 00
>> @@ -2260,7 +2238,7 @@
>>    a3:	b8 86 00 00 00       	mov    $0x86,%eax
>>  			a4: R_386_32	.text.ib_uverbs_query_qp
>>    a8:	e8 fc ff ff ff       	call   a9 <ib_uverbs_query_qp+0xa9>
>> -			a9: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			a9: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    ad:	5a                   	pop    %edx
>>    ae:	85 db                	test   %ebx,%ebx
>>    b0:	0f 84 c1 01 00 00    	je     277 <ib_uverbs_query_qp+0x277>
>> @@ -2462,7 +2440,7 @@
>>    88:	b8 6f 00 00 00       	mov    $0x6f,%eax
>>  			89: R_386_32	.text.ib_uverbs_modify_qp
>>    8d:	e8 fc ff ff ff       	call   8e <ib_uverbs_modify_qp+0x8e>
>> -			8e: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			8e: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    92:	5a                   	pop    %edx
>>    93:	ba f4 ff ff ff       	mov    $0xfffffff4,%edx
>>    98:	85 db                	test   %ebx,%ebx
>> @@ -3129,7 +3107,7 @@
>>    6a:	b8 4c 00 00 00       	mov    $0x4c,%eax
>>  			6b: R_386_32	.text.ib_uverbs_create_ah
>>    6f:	e8 fc ff ff ff       	call   70 <ib_uverbs_create_ah+0x70>
>> -			70: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			70: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    74:	58                   	pop    %eax
>>    75:	85 db                	test   %ebx,%ebx
>>    77:	75 0a                	jne    83 <ib_uverbs_create_ah+0x83>
>> @@ -3396,7 +3374,7 @@
>>    af:	b8 91 00 00 00       	mov    $0x91,%eax
>>  			b0: R_386_32	.text.ib_uverbs_attach_mcast
>>    b4:	e8 fc ff ff ff       	call   b5 <ib_uverbs_attach_mcast+0xb5>
>> -			b5: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			b5: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    b9:	58                   	pop    %eax
>>    ba:	85 db                	test   %ebx,%ebx
>>    bc:	75 07                	jne    c5 <ib_uverbs_attach_mcast+0xc5>
>> @@ -3572,7 +3550,7 @@
>>    77:	b8 5e 00 00 00       	mov    $0x5e,%eax
>>  			78: R_386_32	.text.ib_uverbs_create_srq
>>    7c:	e8 fc ff ff ff       	call   7d <ib_uverbs_create_srq+0x7d>
>> -			7d: R_386_PC32	.text.trace_kmalloc.constprop.3
>> +			7d: R_386_PC32	.text.trace_kmalloc.constprop.4
>>    81:	ba f4 ff ff ff       	mov    $0xfffffff4,%edx
>>    86:	58                   	pop    %eax
>>    87:	85 db                	test   %ebx,%ebx
>>
>> Needless to say, after my change the diff only shows the truly changed
>> functions.
>>
>> - Michael
>>
> 
> Updated patch
> 
> 
> gcc:
> 2018-07-26  Michael Ploujnikov  <michael.ploujnikov@oracle.com>
> 
>        Make function clone name numbering independent.
>        * cgraph.h (clone_function_name_1): Add clone_num argument
>        * cgraphclones.c: Replace clone_fn_id_num with clone_fn_ids.
>        (clone_function_name): Use counters from the hashtable.
> 
> lto:
> 2018-07-26  Michael Ploujnikov  <michael.ploujnikov@oracle.com>
> 
>        * lto-partition.c (privatize_symbol_name_1): Pass 0 to
>        clone_function_name_1.
> 
> 
> testsuite:
> 2018-07-26  Michael Ploujnikov  <michael.ploujnikov@oracle.com>
> 
> 	Clone numbering should be independent for each function.
> 	* gcc/testsuite/gcc.dg/independent-cloneids-1.c: New test.
> 
> ---
>  gcc/cgraph.h                                  |  2 +-
>  gcc/cgraphclones.c                            | 16 ++++++++---
>  gcc/lto/lto-partition.c                       |  2 +-
>  gcc/testsuite/gcc.dg/independent-cloneids-1.c | 38 +++++++++++++++++++++++++++
>  4 files changed, 52 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/independent-cloneids-1.c
> 
> diff --git gcc/cgraph.h gcc/cgraph.h
> index a8b1b4c..fe44bd0 100644
> --- gcc/cgraph.h
> +++ gcc/cgraph.h
> @@ -2368,7 +2368,7 @@ basic_block init_lowered_empty_function (tree, bool,
> profile_count);
>  tree thunk_adjust (gimple_stmt_iterator *, tree, bool, HOST_WIDE_INT, tree);
>  /* In cgraphclones.c  */
> 
> -tree clone_function_name_1 (const char *, const char *);
> +tree clone_function_name_1 (const char *, const char *, unsigned int);
>  tree clone_function_name (tree decl, const char *);
> 
>  void tree_function_versioning (tree, tree, vec<ipa_replace_map *, va_gc> *,
> diff --git gcc/cgraphclones.c gcc/cgraphclones.c
> index 6e84a31..7de7a65 100644
> --- gcc/cgraphclones.c
> +++ gcc/cgraphclones.c
> @@ -512,13 +512,13 @@ cgraph_node::create_clone (tree new_decl, profile_count
> prof_count,
>    return new_node;
>  }
> 
> -static GTY(()) unsigned int clone_fn_id_num;
> +static GTY(()) hash_map<const char *, unsigned> *clone_fn_ids;
> 
>  /* Return a new assembler name for a clone with SUFFIX of a decl named
>     NAME.  */
> 
>  tree
> -clone_function_name_1 (const char *name, const char *suffix)
> +clone_function_name_1 (const char *name, const char *suffix, unsigned int
> clone_num)
>  {
>    size_t len = strlen (name);
>    char *tmp_name, *prefix;
> @@ -527,7 +527,7 @@ clone_function_name_1 (const char *name, const char *suffix)
>    memcpy (prefix, name, len);
>    strcpy (prefix + len + 1, suffix);
>    prefix[len] = symbol_table::symbol_suffix_separator ();
> -  ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++);
> +  ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_num);
>    return get_identifier (tmp_name);
>  }
> 
> @@ -537,7 +537,15 @@ tree
>  clone_function_name (tree decl, const char *suffix)
>  {
>    tree name = DECL_ASSEMBLER_NAME (decl);
> -  return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
> +  const char *decl_name = IDENTIFIER_POINTER (name);
> +  unsigned int *suffix_counter;
> +  if (!clone_fn_ids) {
> +    /* Initialize the per-function counter hash table on first call */
> +    clone_fn_ids = hash_map<const char *, unsigned>::create_ggc (64);
> +  }
> +  suffix_counter = &clone_fn_ids->get_or_insert(decl_name);
> +  *suffix_counter = *suffix_counter + 1;
> +  return clone_function_name_1 (decl_name, suffix, *suffix_counter);
>  }
> 
> 
> diff --git gcc/lto/lto-partition.c gcc/lto/lto-partition.c
> index c7a5710..4b15232 100644
> --- gcc/lto/lto-partition.c
> +++ gcc/lto/lto-partition.c
> @@ -965,7 +965,7 @@ privatize_symbol_name_1 (symtab_node *node, tree decl)
>    name = maybe_rewrite_identifier (name);
>    symtab->change_decl_assembler_name (decl,
>  				      clone_function_name_1 (name,
> -							     "lto_priv"));
> +							     "lto_priv", 0));
> 
>    if (node->lto_file_data)
>      lto_record_renamed_decl (node->lto_file_data, name,
> diff --git gcc/testsuite/gcc.dg/independent-cloneids-1.c
> gcc/testsuite/gcc.dg/independent-cloneids-1.c
> new file mode 100644
> index 0000000..d723e20
> --- /dev/null
> +++ gcc/testsuite/gcc.dg/independent-cloneids-1.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fipa-cp -fipa-cp-clone -fdump-ipa-cp"  } */
> +
> +extern int printf (const char *, ...);
> +
> +static int __attribute__ ((noinline))
> +foo (int arg)
> +{
> +  return 7 * arg;
> +}
> +
> +static int __attribute__ ((noinline))
> +bar (int arg)
> +{
> +  return arg * arg;
> +}
> +
> +int
> +baz (int arg)
> +{
> +  printf("%d\n", bar (3));
> +  printf("%d\n", bar (4));
> +  printf("%d\n", foo (5));
> +  printf("%d\n", foo (6));
> +  /* adding or removing the following call should not affect foo
> +     function's clone numbering */
> +  printf("%d\n", bar (7));
> +  return foo (8);
> +}
> +
> +/* { dg-final { scan-ipa-dump "Function bar.constprop.0" "cp" } } */
> +/* { dg-final { scan-ipa-dump "Function bar.constprop.1" "cp" } } */
> +/* { dg-final { scan-ipa-dump "Function bar.constprop.3" "cp" } } */
> +/* { dg-final { scan-ipa-dump "Function foo.constprop.0" "cp" } } */
> +/* { dg-final { scan-ipa-dump "Function foo.constprop.1" "cp" } } */
> +/* { dg-final { scan-ipa-dump "Function foo.constprop.2" "cp" } } */
> +/* { dg-final { scan-ipa-dump-not "Function foo.constprop.3" "cp" } } */
> +/* { dg-final { scan-ipa-dump-not "Function foo.constprop.4" "cp" } } */
> 

Ping? Could someone else please take a look at this since Richard appears to be
overloaded at the moment?


Regards,
- Michael

Attachment: signature.asc
Description: OpenPGP digital signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]