Bug 116333 - unused result of pure function is not optimized out because of inlining
Summary: unused result of pure function is not optimized out because of inlining
Status: RESOLVED DUPLICATE of bug 47255
Alias: None
Product: gcc
Classification: Unclassified
Component: ipa (show other bugs)
Version: 12.2.0
: P3 normal
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: lto
Depends on:
Blocks:
 
Reported: 2024-08-11 14:01 UTC by Pali Rohár
Modified: 2024-08-14 19:13 UTC (History)
0 users

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Pali Rohár 2024-08-11 14:01:54 UTC
If some function is marked with __attribute__((pure)) and return value of this function call is not used at all then gcc could optimize out and completely drop calling this function.

This kind of optimization does not happen for example below which uses 3 source files with LTO enabled compilation (so gcc should see inter-file calls).


$ cat pure0.c
extern int main();

__attribute__((used))
int WinMainCRTStartup(void)
{
  char * argv[] = { "argv0", (char *)0 };
  return main(1, argv);
}

__attribute__((used))
void __main(void) {}



$ cat pure1.c
#define STARTF_USESHOWWINDOW 0x00000001
#define SW_SHOWDEFAULT 10

typedef struct _STARTUPINFOA {
  unsigned int cb;
  char* lpReserved;
  char* lpDesktop;
  char* lpTitle;
  unsigned int dwX;
  unsigned int dwY;
  unsigned int dwXSize;
  unsigned int dwYSize;
  unsigned int dwXCountChars;
  unsigned int dwYCountChars;
  unsigned int dwFillAttribute;
  unsigned int dwFlags;
  unsigned short wShowWindow;
  unsigned short cbReserved2;
  unsigned char* lpReserved2;
  void* hStdInput;
  void* hStdOutput;
  void* hStdError;
} STARTUPINFOA, *LPSTARTUPINFOA;

__declspec(dllimport) extern void __stdcall GetStartupInfoA(LPSTARTUPINFOA);

__attribute__((pure))
static int _winshowcmd (void)
{
  STARTUPINFOA StartupInfo = {};

  GetStartupInfoA(&StartupInfo);

  if (StartupInfo.dwFlags & STARTF_USESHOWWINDOW)
    return StartupInfo.wShowWindow;
  else
    return SW_SHOWDEFAULT;
}

extern int __stdcall WinMain(void *, void *, char *, int);

extern unsigned char __ImageBase[];

int main()
{
  return WinMain(&__ImageBase, (void *)0, "argv0", _winshowcmd());
}



$ cat pure2.c
__declspec(dllimport) extern int __stdcall MessageBoxA(void *, const char *, const char *, unsigned int);

int __stdcall WinMain(void *instance __attribute__((unused)), void *prev_instance __attribute__((unused)), char *cmdln __attribute__((unused)), int showcmd __attribute__((unused))) {
  MessageBoxA((void *)0, "Message", "Title", 0);
  return 0;
}



Two additional helper files are needed to have example self-contained without any external dependency and also executable on windows:

$ cat kernel32.def
LIBRARY "kernel32.dll"
EXPORTS
GetStartupInfoA@4

$ cat user32.def
LIBRARY "user32.dll"
EXPORTS
MessageBoxA@16


Compile these sources as:

$ i686-w64-mingw32-dlltool -d kernel32.def -k -l libkernel32.a
$ i686-w64-mingw32-dlltool -d user32.def -k -l libuser32.a
$ i686-w64-mingw32-gcc pure0.c pure1.c pure2.c -o pure.exe -mwindows -nostartfiles -nostdlib libkernel32.a libuser32.a -Wl,--disable-runtime-pseudo-reloc -W -Wall -Os -flto


objdump on compiled pure.exe shows:


$ objdump -d pure.exe

pure.exe:     file format pei-i386


Disassembly of section .text:

00401000 <_WinMainCRTStartup>:
  401000:	55                   	push   %ebp
  401001:	31 c0                	xor    %eax,%eax
  401003:	b9 11 00 00 00       	mov    $0x11,%ecx
  401008:	89 e5                	mov    %esp,%ebp
  40100a:	57                   	push   %edi
  40100b:	8d 7d b4             	lea    -0x4c(%ebp),%edi
  40100e:	83 ec 64             	sub    $0x64,%esp
  401011:	f3 ab                	rep stos %eax,%es:(%edi)
  401013:	8d 45 b4             	lea    -0x4c(%ebp),%eax
  401016:	89 04 24             	mov    %eax,(%esp)
  401019:	ff 15 4c 40 40 00    	call   *0x40404c
  40101f:	31 d2                	xor    %edx,%edx
  401021:	31 c9                	xor    %ecx,%ecx
  401023:	50                   	push   %eax
  401024:	89 54 24 0c          	mov    %edx,0xc(%esp)
  401028:	c7 44 24 08 00 20 40 	movl   $0x402000,0x8(%esp)
  40102f:	00 
  401030:	c7 44 24 04 06 20 40 	movl   $0x402006,0x4(%esp)
  401037:	00 
  401038:	89 0c 24             	mov    %ecx,(%esp)
  40103b:	ff 15 54 40 40 00    	call   *0x404054
  401041:	8b 7d fc             	mov    -0x4(%ebp),%edi
  401044:	31 c0                	xor    %eax,%eax
  401046:	83 ec 10             	sub    $0x10,%esp
  401049:	c9                   	leave  
  40104a:	c3                   	ret    

0040104b <___main>:
  40104b:	c3                   	ret    

0040104c <_GetStartupInfoA@4>:
  40104c:	ff 25 4c 40 40 00    	jmp    *0x40404c
  401052:	90                   	nop
  401053:	90                   	nop

00401054 <_MessageBoxA@16>:
  401054:	ff 25 54 40 40 00    	jmp    *0x404054
  40105a:	90                   	nop
  40105b:	90                   	nop

0040105c <__CTOR_LIST__>:
  40105c:	ff                   	(bad)  
  40105d:	ff                   	(bad)  
  40105e:	ff                   	(bad)  
  40105f:	ff 00                	incl   (%eax)
  401061:	00 00                	add    %al,(%eax)
	...

00401064 <__DTOR_LIST__>:
  401064:	ff                   	(bad)  
  401065:	ff                   	(bad)  
  401066:	ff                   	(bad)  
  401067:	ff 00                	incl   (%eax)
  401069:	00 00                	add    %al,(%eax)
	...



Function GetStartupInfoA() was called only in _winshowcmd() function and return value of _winshowcmd() function is unused. So _winshowcmd() function is marked as pure and its return value is unused then gcc could completely optimize out _winshowcmd() function call in LTO build. But objdump on above example shows that gcc had not dropped _winshowcmd() function and it is there.

Changing __attribute__((pure)) to __attribute__((const)) for _winshowcmd() function does not help gcc to optimize out the _winshowcmd() function call.
Comment 1 Andrew Pinski 2024-08-11 15:17:25 UTC
Dup.

*** This bug has been marked as a duplicate of bug 47255 ***
Comment 2 Pali Rohár 2024-08-11 16:42:09 UTC
Hello Andrew, you have wrote that this function is not optimized out because of inlining. I tried to mark _winshowcmd() function with __attribute__((noinline)) but result is similar. Function _winshowcmd() is present in the final executable but it is not called at all.

It is really a duplicate?

$ objdump -d pure.exe


pure.exe:     file format pei-i386


Disassembly of section .text:

00401000 <__winshowcmd>:
  401000:	55                   	push   %ebp
  401001:	31 c0                	xor    %eax,%eax
  401003:	b9 11 00 00 00       	mov    $0x11,%ecx
  401008:	89 e5                	mov    %esp,%ebp
  40100a:	57                   	push   %edi
  40100b:	8d 7d b4             	lea    -0x4c(%ebp),%edi
  40100e:	83 ec 64             	sub    $0x64,%esp
  401011:	f3 ab                	rep stos %eax,%es:(%edi)
  401013:	8d 45 b4             	lea    -0x4c(%ebp),%eax
  401016:	89 04 24             	mov    %eax,(%esp)
  401019:	ff 15 4c 30 40 00    	call   *0x40304c
  40101f:	50                   	push   %eax
  401020:	b8 0a 00 00 00       	mov    $0xa,%eax
  401025:	f6 45 e0 01          	testb  $0x1,-0x20(%ebp)
  401029:	74 04                	je     40102f <__winshowcmd+0x2f>
  40102b:	0f b7 45 e4          	movzwl -0x1c(%ebp),%eax
  40102f:	8b 7d fc             	mov    -0x4(%ebp),%edi
  401032:	c9                   	leave  
  401033:	c3                   	ret    

00401034 <___main>:
  401034:	c3                   	ret    

00401035 <_WinMainCRTStartup>:
  401035:	55                   	push   %ebp
  401036:	b8 01 00 00 00       	mov    $0x1,%eax
  40103b:	89 e5                	mov    %esp,%ebp
  40103d:	83 ec 18             	sub    $0x18,%esp
  401040:	8d 55 f0             	lea    -0x10(%ebp),%edx
  401043:	c7 45 f0 0e 20 40 00 	movl   $0x40200e,-0x10(%ebp)
  40104a:	c7 45 f4 00 00 00 00 	movl   $0x0,-0xc(%ebp)
  401051:	e8 12 00 00 00       	call   401068 <_main>
  401056:	c9                   	leave  
  401057:	c3                   	ret    

00401058 <_GetStartupInfoA@4>:
  401058:	ff 25 4c 30 40 00    	jmp    *0x40304c
  40105e:	90                   	nop
  40105f:	90                   	nop

00401060 <_MessageBoxA@16>:
  401060:	ff 25 54 30 40 00    	jmp    *0x403054
  401066:	90                   	nop
  401067:	90                   	nop

00401068 <_main>:
  401068:	57                   	push   %edi
  401069:	8d 7c 24 08          	lea    0x8(%esp),%edi
  40106d:	83 e4 f0             	and    $0xfffffff0,%esp
  401070:	ff 77 fc             	pushl  -0x4(%edi)
  401073:	55                   	push   %ebp
  401074:	89 e5                	mov    %esp,%ebp
  401076:	57                   	push   %edi
  401077:	83 ec 14             	sub    $0x14,%esp
  40107a:	e8 b5 ff ff ff       	call   401034 <___main>
  40107f:	c7 44 24 0c 00 00 00 	movl   $0x0,0xc(%esp)
  401086:	00 
  401087:	c7 44 24 08 00 20 40 	movl   $0x402000,0x8(%esp)
  40108e:	00 
  40108f:	c7 44 24 04 06 20 40 	movl   $0x402006,0x4(%esp)
  401096:	00 
  401097:	c7 04 24 00 00 00 00 	movl   $0x0,(%esp)
  40109e:	ff 15 54 30 40 00    	call   *0x403054
  4010a4:	8b 7d fc             	mov    -0x4(%ebp),%edi
  4010a7:	31 c0                	xor    %eax,%eax
  4010a9:	83 ec 10             	sub    $0x10,%esp
  4010ac:	c9                   	leave  
  4010ad:	8d 67 f8             	lea    -0x8(%edi),%esp
  4010b0:	5f                   	pop    %edi
  4010b1:	c3                   	ret    
  4010b2:	90                   	nop
  4010b3:	90                   	nop

004010b4 <__CTOR_LIST__>:
  4010b4:	ff                   	(bad)  
  4010b5:	ff                   	(bad)  
  4010b6:	ff                   	(bad)  
  4010b7:	ff 00                	incl   (%eax)
  4010b9:	00 00                	add    %al,(%eax)
	...

004010bc <__DTOR_LIST__>:
  4010bc:	ff                   	(bad)  
  4010bd:	ff                   	(bad)  
  4010be:	ff                   	(bad)  
  4010bf:	ff 00                	incl   (%eax)
  4010c1:	00 00                	add    %al,(%eax)
	...
Comment 3 Pali Rohár 2024-08-14 19:06:11 UTC
Hello Andrew, have you checked if this is really duplicate?
Comment 4 Andrew Pinski 2024-08-14 19:11:26 UTC
(In reply to Pali Rohár from comment #3)
> Hello Andrew, have you checked if this is really duplicate?

That is a different issue all together. The original issue is definitely a dup.
The secondary issue looks more like a limitation in LTO on mingw.
Comment 5 Andrew Pinski 2024-08-14 19:13:22 UTC
(In reply to Andrew Pinski from comment #4)
> (In reply to Pali Rohár from comment #3)
> > Hello Andrew, have you checked if this is really duplicate?
> 
> That is a different issue all together. The original issue is definitely a
> dup.
> The secondary issue looks more like a limitation in LTO on mingw.

Or rather a dup of bug 99373 .