Patch for IA64 performance (inline division)

Steve Ellcey sje@cup.hp.com
Thu Feb 26 23:54:00 GMT 2004


I would like to propose turning on -finline-float-divide-max-throughput
by default on IA64.  I have tested it on HP-UX and Linux with no
regressions and I also ran the 4 C floating point spec programs on HP-UX
in 64 bit mode to test the performance difference.

The largest performance improvement was in 179.art which had a 35%
improvement with -O2 and 20% with -O3.  The other tests improved from 0%
to 4% with the exception of 188.ammp which slowed down by 1.2% at -O2
(but improved by 0.5% at -O3).  I also tried the min-latency version of
inlining, it was not as good as the max-throughput version except on
188.amp where it slowed things down a little less at -O2 and sped up a
little more at -O3.

The size increase for using inline division ranged from 1.4% to 15%,
(1.4% 177.mesa, 7.6% 179.art, 6.4% 183.equake, 15% 188.ammp).

As a side note, the HP IA64 compiler always generates inline code for
floating point division.

In addition to adding MASK_INLINE_FLOAT_DIV_THR to TARGET_DEFAULT the
following patch adds some options to turn off division inlining (and
sqrt) and changes the option checking to differentiate between the
options being on implicitly or explicitly in order to do the appropriate
warnings regardless of what the default is.

Steve Ellcey
sje@cup.hp.com


2004-02-26  Steve Ellcey  <sje@cup.hp.com>

	* config/ia64/ia64.h (no-inline-float-divide): New option.
	* config/ia64/ia64.h (no-inline-int-divide): New option.
	* config/ia64/ia64.h (no-inline-sqrt): New option.
	(TARGET_DEFAULT): Add MASK_INLINE_FLOAT_DIV_THR to define.
	* config/ia64/hpux.h (TARGET_DEFAULT): Ditto.
	* config/ia64/ia64.c (ia64_override_options): Modify error
	checking for inlined division/sqrt.


*** gcc.orig/gcc/gcc/config/ia64/ia64.h	Thu Feb 26 09:56:23 2004
--- gcc/gcc/gcc/config/ia64/ia64.h	Thu Feb 26 14:39:22 2004
*************** extern int ia64_tls_size;
*** 203,216 ****
--- 203,223 ----
        N_("Generate inline floating point division, optimize for latency") },\
    { "inline-float-divide-max-throughput", MASK_INLINE_FLOAT_DIV_THR,	\
        N_("Generate inline floating point division, optimize for throughput") },\
+   { "no-inline-float-divide", 						\
+       -(MASK_INLINE_FLOAT_DIV_LAT|MASK_INLINE_FLOAT_DIV_THR),		\
+       N_("Do not inline floating point division") },			\
    { "inline-int-divide-min-latency", MASK_INLINE_INT_DIV_LAT,		\
        N_("Generate inline integer division, optimize for latency") },	\
    { "inline-int-divide-max-throughput", MASK_INLINE_INT_DIV_THR,	\
        N_("Generate inline integer division, optimize for throughput") },\
+   { "no-inline-int-divide", -(MASK_INLINE_INT_DIV_LAT|MASK_INLINE_INT_DIV_THR),	\
+       N_("Do not inline integer division") },				\
    { "inline-sqrt-min-latency", MASK_INLINE_SQRT_LAT,			\
        N_("Generate inline square root, optimize for latency") },	\
    { "inline-sqrt-max-throughput", MASK_INLINE_SQRT_THR,			\
        N_("Generate inline square root, optimize for throughput") },     \
+   { "no-inline-sqrt", -(MASK_INLINE_SQRT_LAT|MASK_INLINE_SQRT_THR),	\
+       N_("Do not inline square root") },				\
    { "dwarf2-asm", 	MASK_DWARF2_ASM,				\
        N_("Enable Dwarf 2 line debug info via GNU as")},			\
    { "no-dwarf2-asm", 	-MASK_DWARF2_ASM,				\
*************** extern int ia64_tls_size;
*** 227,233 ****
  /* Default target_flags if no switches are specified  */
  
  #ifndef TARGET_DEFAULT
! #define TARGET_DEFAULT MASK_DWARF2_ASM
  #endif
  
  #ifndef TARGET_CPU_DEFAULT
--- 234,240 ----
  /* Default target_flags if no switches are specified  */
  
  #ifndef TARGET_DEFAULT
! #define TARGET_DEFAULT (MASK_DWARF2_ASM | MASK_INLINE_FLOAT_DIV_THR)
  #endif
  
  #ifndef TARGET_CPU_DEFAULT
*** gcc.orig/gcc/gcc/config/ia64/hpux.h	Thu Feb 26 09:56:23 2004
--- gcc/gcc/gcc/config/ia64/hpux.h	Thu Feb 26 09:55:57 2004
*************** do {							\
*** 105,111 ****
  #define JMP_BUF_SIZE  (8 * 76)
  
  #undef TARGET_DEFAULT
! #define TARGET_DEFAULT (MASK_DWARF2_ASM | MASK_BIG_ENDIAN | MASK_ILP32)
  
  /* This needs to be set to force structure arguments with a single
     field to be treated as structures and not as the type of their
--- 105,112 ----
  #define JMP_BUF_SIZE  (8 * 76)
  
  #undef TARGET_DEFAULT
! #define TARGET_DEFAULT \
!   (MASK_DWARF2_ASM | MASK_BIG_ENDIAN | MASK_ILP32 | MASK_INLINE_FLOAT_DIV_THR)
  
  /* This needs to be set to force structure arguments with a single
     field to be treated as structures and not as the type of their
*** gcc.orig/gcc/gcc/config/ia64/ia64.c	Thu Feb 26 14:45:05 2004
--- gcc/gcc/gcc/config/ia64/ia64.c	Thu Feb 26 14:45:27 2004
*************** ia64_override_options (void)
*** 4751,4770 ****
  
    if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
      {
!       warning ("cannot optimize floating point division for both latency and throughput");
!       target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
      }
  
    if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
      {
!       warning ("cannot optimize integer division for both latency and throughput");
!       target_flags &= ~MASK_INLINE_INT_DIV_THR;
      }
  
    if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
      {
!       warning ("cannot optimize square root for both latency and throughput");
!       target_flags &= ~MASK_INLINE_SQRT_THR;
      }
  
    if (TARGET_INLINE_SQRT_LAT)
--- 4751,4803 ----
  
    if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
      {
!       if ((target_flags_explicit & MASK_INLINE_FLOAT_DIV_LAT)
! 	   && (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR))
! 	{
! 	  warning ("cannot optimize floating point division for both latency and throughput");
! 	  target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
! 	}
!       else 
! 	{
! 	  if (target_flags_explicit & MASK_INLINE_FLOAT_DIV_THR)
! 	    target_flags &= ~MASK_INLINE_FLOAT_DIV_LAT;
! 	  else
! 	    target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
! 	}
      }
  
    if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
      {
!       if ((target_flags_explicit & MASK_INLINE_INT_DIV_LAT)
! 	   && (target_flags_explicit & MASK_INLINE_INT_DIV_THR))
! 	{
! 	  warning ("cannot optimize integer division for both latency and throughput");
! 	  target_flags &= ~MASK_INLINE_INT_DIV_THR;
! 	}
!       else 
! 	{
! 	  if (target_flags_explicit & MASK_INLINE_INT_DIV_THR)
! 	    target_flags &= ~MASK_INLINE_INT_DIV_LAT;
! 	  else
! 	    target_flags &= ~MASK_INLINE_INT_DIV_THR;
! 	}
      }
  
    if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
      {
!       if ((target_flags_explicit & MASK_INLINE_SQRT_LAT)
! 	   && (target_flags_explicit & MASK_INLINE_SQRT_THR))
! 	{
! 	  warning ("cannot optimize square root for both latency and throughput");
! 	  target_flags &= ~MASK_INLINE_SQRT_THR;
! 	}
!       else 
! 	{
! 	  if (target_flags_explicit & MASK_INLINE_SQRT_THR)
! 	    target_flags &= ~MASK_INLINE_SQRT_LAT;
! 	  else
! 	    target_flags &= ~MASK_INLINE_SQRT_THR;
! 	}
      }
  
    if (TARGET_INLINE_SQRT_LAT)



More information about the Gcc-patches mailing list