This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: GCC target_clone support (functionality question)


This message is separated from the question about moving code, as it is a
questions about the functionality of target_clone support.

Right now it looks like target_clone only generates the ifunc handler if there
is a call to the function in the object file.  It does not generate the ifunc
handler if there is no call.

For the default function, it generates the normal name.  This means that any
function that calls the function from a different object module will only get
the standard function.  From a library and user perspective, I think this is
wrong.  Instead the default function should be generated with a different name,
and the ifunc function should list the standard name.  Then you don't have to
change all of the other calls in the object file, the normal ifunc handling
will handle it.  It also means you can more easily put this function in a
library and automatically call the appropriate version.

Do people agree with this assessment, and should I change the code to do this
(for both x86 nd ppc targets)?  If not, what is the reason for the objection?

Consider mvc5.c from the testsuite:

	/* { dg-do compile } */
	/* { dg-require-ifunc "" } */
	/* { dg-options "-fno-inline" } */
	/* { dg-final { scan-assembler-times "foo.ifunc" 6 } } */

	__attribute__((target_clones("default","avx","avx2")))
	int
	foo ()
	{
	  return 10;
	}

	__attribute__((target_clones("default","avx","avx2")))
	int
	bar ()
	{
	  return -foo ();
	}

It generates:

		.file	"mvc5.c"
		.text
		.p2align 4,,15
		.globl	foo
		.type	foo, @function
	foo:
		movl	$10, %eax
		ret

		.type	foo.avx.2, @function
	foo.avx.2:
		movl	$10, %eax
		ret

		.type	foo.avx2.3, @function
	foo.avx2.3:
		movl	$10, %eax
		ret

		.weak	foo.resolver
		.type	foo.resolver, @function
	foo.resolver:
		subq	$8, %rsp
		call	__cpu_indicator_init
		movl	__cpu_model+12(%rip), %eax
		testb	$4, %ah
		je	.L8
		movl	$foo.avx2.3, %eax
		addq	$8, %rsp
		ret
	.L8:
		testb	$2, %ah
		movl	$foo.avx.2, %edx
		movl	$foo, %eax
		cmovne	%rdx, %rax
		addq	$8, %rsp
		ret

		.type	foo.ifunc, @gnu_indirect_function
		.set	foo.ifunc,foo.resolver

	// Note these functions are not referenced

		.type	bar.avx2.1, @function
	bar.avx2.1:
		subq	$8, %rsp
		xorl	%eax, %eax
		call	foo.ifunc
		addq	$8, %rsp
		negl	%eax
		ret

		.type	bar.avx.0, @function
	bar.avx.0:
		subq	$8, %rsp
		xorl	%eax, %eax
		call	foo.ifunc
		addq	$8, %rsp
		negl	%eax
		ret

		.type	bar, @function

	// Note how it calls foo.ifunc instead of foo.

	bar:
		subq	$8, %rsp
		xorl	%eax, %eax
		call	foo.ifunc
		addq	$8, %rsp
		negl	%eax
		ret


Now, if I remove the bar call, and just leave foo it generates:

		.type	foo, @function
	foo:
		movl	$10, %eax
		ret

		.type	foo.avx.0, @function
	foo.avx.0:
		movl	$10, %eax
		ret

		.type	foo.avx2.1, @function
	foo.avx2.1:
		movl	$10, %eax
		ret

Note, it does not generate the resolver at all.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meissner@linux.vnet.ibm.com, phone: +1 (978) 899-4797


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]