i386 code label alignment patch (version 3)

John Wehle john@feith.com
Thu May 21 02:50:00 GMT 1998


[ The principle change for this version is the use of a feature
  test when checking the assembler.                             ]

This change implements the alignment of code labels following
the recommendations of the "Intel Architecture Optimization Manual".
This change seem to be worth about 33% speed increase in the simple
case of:

  main()
    {
    int a;
    int i;

    a = 0;
    for (i = 0; i < 1000000000; i++)
      a += i;

    return a;
    }

ChangeLog:

Wed May 20 23:44:28 EDT 1998  John Wehle  (john@feith.com)

	* acconfig.h (HAVE_GAS_MAX_SKIP_P2ALIGN): New tag.
	* configure.in: Check for it.
	* i386/gas.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Use it.
	* final.c (uid_align, uid_shuid, label_align): Make static.
	(label_align): Change type to struct label_alignment pointer.
	(LABEL_TO_ALIGNMENT, shorten_branches): Update due to type change.
	(LABEL_TO_MAX_SKIP): Define.
	(LABEL_ALIGN_MAX_SKIP, LOOP_ALIGN_MAX_SKIP,
	LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Provide defaults.
	(shorten_branches): Record the maximum bytes to skip when
	aligning a label.
	(final_scan_insn): Use the maximum bytes to skip when aligning a label
	if ASM_OUTPUT_MAX_SKIP_ALIGN is available.
	* i386.h (LOOP_ALIGN_MAX_SKIP,
	LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Define.
	* i386.c (override_options): i386_align_jumps and i386_align_loops
	default to 4 if ASM_OUTPUT_MAX_SKIP_ALIGN is available.
	* invoke.texi: Document new i386 align-loops and align-jumps behavior.

Enjoy!

-- John Wehle
------------------8<------------------------8<------------------------
*** gcc/acconfig.h.ORIGINAL	Sat Apr  4 12:37:22 1998
--- gcc/acconfig.h	Wed May 20 00:30:42 1998
***************
*** 4,9 ****
--- 4,13 ----
  /* Define if your cpp understands the stringify operator.  */
  #undef HAVE_CPP_STRINGIFY
  
+ /* Define if your assembler supports specifying the maximum number
+    of bytes to skip when using the GAS .p2align command. */
+ #undef HAVE_GAS_MAX_SKIP_P2ALIGN
+ 
  /* Define if you have a working <inttypes.h> header file.  */
  #undef HAVE_INTTYPES_H
  
*** gcc/configure.in.ORIGINAL	Fri Apr 24 10:58:22 1998
--- gcc/configure.in	Wed May 20 00:32:31 1998
***************
*** 3184,3189 ****
--- 3184,3229 ----
  #	fi
  fi
  
+ # Figure out what assembler alignment features are present.
+ AC_MSG_CHECKING(assembler alignment features)
+ gcc_cv_as=
+ gcc_cv_as_alignment_features=
+ if [[ -x as$host_exeext ]]; then
+ 	# Build using assembler in the current directory.
+ 	gcc_cv_as=./as$host_exeext
+ elif [[ -f $srcdir/../gas/configure.in ]]; then
+ 	# Single tree build which includes gas.
+ 	for f in $srcdir/../gas/configure.in $srcdir/../gas/Makefile.in
+ 	do
+ 		gcc_cv_gas_version=`grep '^VERSION=[[0-9]]*\.[[0-9]]*' $f`
+ 		if [[ x$gcc_cv_gas_version != x ]]; then
+ 			break
+ 		fi
+ 	done
+ 	gcc_cv_gas_major_version=`expr "$gcc_cv_gas_version" : "VERSION=\([[0-9]]*\)"`
+ 	gcc_cv_gas_minor_version=`expr "$gcc_cv_gas_version" : "VERSION=[[0-9]]*\.\([[0-9]]*\)"`
+ 	# Gas version 2.8 and later support specifying the maximum
+ 	# bytes to skip when using .p2align.
+ 	if [[ "$gcc_cv_gas_major_version" -eq 2 -a "$gcc_cv_gas_minor_version" -ge 8 -o "$gcc_cv_gas_major_version" -gt 2 ]]; then
+ 		gcc_cv_as_alignment_features=".p2align including maximum skip"
+ 		AC_DEFINE(HAVE_GAS_MAX_SKIP_P2ALIGN)
+ 	fi
+ elif [[ x$host = x$target ]]; then
+ 	# Native build.
+ 	gcc_cv_as=as$host_exeext
+ fi
+ if [[ x$gcc_cv_as != x ]]; then
+ 	# Check if specifying the maximum bytes to skip when
+ 	# using .p2align is supported.
+ 	echo ".p2align 4,,7" > conftest.s
+ 	if $gcc_cv_as -o conftest.o conftest.s > /dev/null 2>&1; then
+ 		gcc_cv_as_alignment_features=".p2align including maximum skip"
+ 		AC_DEFINE(HAVE_GAS_MAX_SKIP_P2ALIGN)
+ 	fi
+ 	rm -f conftest.s conftest.o
+ fi
+ AC_MSG_RESULT($gcc_cv_as_alignment_features)
+ 
  # Figure out what language subdirectories are present.
  subdirs=
  for lang in ${srcdir}/*/config-lang.in ..
*** gcc/config/i386/gas.h.ORIGINAL	Mon Mar  2 06:54:19 1998
--- gcc/config/i386/gas.h	Wed May 20 00:33:54 1998
***************
*** 85,90 ****
--- 85,103 ----
  #define ASM_OUTPUT_ALIGN(FILE,LOG) \
    if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1<<(LOG))
  #endif
+ 
+ /* A C statement to output to the stdio stream FILE an assembler
+    command to advance the location counter to a multiple of 1<<LOG
+    bytes if it is within MAX_SKIP bytes.
+ 
+    This is used to align code labels according to Intel recommendations.  */
+ 
+ #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+ #  define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+      if ((LOG)!=0) \
+        if ((MAX_SKIP)==0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+        else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP))
+ #endif
  
  /* A C statement or statements which output an assembler instruction
     opcode to the stdio stream STREAM.  The macro-operand PTR is a
*** gcc/final.c.ORIGINAL	Wed Apr 22 08:57:04 1998
--- gcc/final.c	Sun May 17 18:48:49 1998
***************
*** 634,642 ****
     for each insn we'll call the alignment chain of this insn in the following
     comments.  */
  
! rtx *uid_align;
! int *uid_shuid;
! short *label_align;
  
  /* Indicate that branch shortening hasn't yet been done.  */
  
--- 634,647 ----
     for each insn we'll call the alignment chain of this insn in the following
     comments.  */
  
! struct label_alignment {
!   short alignment;
!   short max_skip;
! };
! 
! static rtx *uid_align;
! static int *uid_shuid;
! static struct label_alignment *label_align;
  
  /* Indicate that branch shortening hasn't yet been done.  */
  
***************
*** 779,792 ****
--- 784,809 ----
  #define LABEL_ALIGN(LABEL) 0
  #endif
  
+ #ifndef LABEL_ALIGN_MAX_SKIP
+ #define LABEL_ALIGN_MAX_SKIP 0
+ #endif
+ 
  #ifndef LOOP_ALIGN
  #define LOOP_ALIGN(LABEL) 0
  #endif
  
+ #ifndef LOOP_ALIGN_MAX_SKIP
+ #define LOOP_ALIGN_MAX_SKIP 0
+ #endif
+ 
  #ifndef LABEL_ALIGN_AFTER_BARRIER
  #define LABEL_ALIGN_AFTER_BARRIER(LABEL) 0
  #endif
  
+ #ifndef LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP
+ #define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP 0
+ #endif
+ 
  #ifndef ADDR_VEC_ALIGN
  int
  final_addr_vec_align (addr_vec)
***************
*** 811,817 ****
  static int min_labelno, max_labelno;
  
  #define LABEL_TO_ALIGNMENT(LABEL) \
!   (label_align[CODE_LABEL_NUMBER (LABEL) - min_labelno])
  
  /* For the benefit of port specific code do this also as a function.  */
  int
--- 828,837 ----
  static int min_labelno, max_labelno;
  
  #define LABEL_TO_ALIGNMENT(LABEL) \
!   (label_align[CODE_LABEL_NUMBER (LABEL) - min_labelno].alignment)
! 
! #define LABEL_TO_MAX_SKIP(LABEL) \
!   (label_align[CODE_LABEL_NUMBER (LABEL) - min_labelno].max_skip)
  
  /* For the benefit of port specific code do this also as a function.  */
  int
***************
*** 947,952 ****
--- 967,973 ----
    int max_uid;
    int i;
    int max_log;
+   int max_skip;
  #ifdef HAVE_ATTR_length
  #define MAX_CODE_ALIGN 16
    rtx seq;
***************
*** 986,994 ****
  
    max_labelno = max_label_num ();
    min_labelno = get_first_label_num ();
!   label_align
!     = (short*) xmalloc ((max_labelno - min_labelno + 1) * sizeof (short));
!   bzero (label_align, (max_labelno - min_labelno + 1) * sizeof (short));
  
    uid_shuid = (int *) xmalloc (max_uid * sizeof *uid_shuid);
  
--- 1007,1016 ----
  
    max_labelno = max_label_num ();
    min_labelno = get_first_label_num ();
!   label_align = (struct label_alignment *) xmalloc (
!     (max_labelno - min_labelno + 1) * sizeof (struct label_alignment));
!   bzero (label_align,
!     (max_labelno - min_labelno + 1) * sizeof (struct label_alignment));
  
    uid_shuid = (int *) xmalloc (max_uid * sizeof *uid_shuid);
  
***************
*** 998,1004 ****
       impose on the next CODE_LABEL (or the current one if we are processing
       the CODE_LABEL itself).  */
       
!   for (max_log = 0, insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
      {
        int log;
  
--- 1020,1029 ----
       impose on the next CODE_LABEL (or the current one if we are processing
       the CODE_LABEL itself).  */
       
!   max_log = 0;
!   max_skip = 0;
! 
!   for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
      {
        int log;
  
***************
*** 1017,1023 ****
  
  	  log = LABEL_ALIGN (insn);
  	  if (max_log < log)
! 	    max_log = log;
  	  next = NEXT_INSN (insn);
  /* ADDR_VECs only take room if read-only data goes into the text section.  */
  #if !defined(READONLY_DATA_SECTION) || defined(JUMP_TABLES_IN_TEXT_SECTION)
--- 1042,1051 ----
  
  	  log = LABEL_ALIGN (insn);
  	  if (max_log < log)
! 	    {
! 	      max_log = log;
! 	      max_skip = LABEL_ALIGN_MAX_SKIP;
! 	    }
  	  next = NEXT_INSN (insn);
  /* ADDR_VECs only take room if read-only data goes into the text section.  */
  #if !defined(READONLY_DATA_SECTION) || defined(JUMP_TABLES_IN_TEXT_SECTION)
***************
*** 1029,1040 ****
  		{
  		  log = ADDR_VEC_ALIGN (next);
  		  if (max_log < log)
! 		    max_log = log;
  		}
  	    }
  #endif
  	  LABEL_TO_ALIGNMENT (insn) = max_log;
  	  max_log = 0;
  	}
        else if (GET_CODE (insn) == BARRIER)
  	{
--- 1057,1073 ----
  		{
  		  log = ADDR_VEC_ALIGN (next);
  		  if (max_log < log)
! 		    {
! 		      max_log = log;
! 		      max_skip = LABEL_ALIGN_MAX_SKIP;
! 		    }
  		}
  	    }
  #endif
  	  LABEL_TO_ALIGNMENT (insn) = max_log;
+ 	  LABEL_TO_MAX_SKIP (insn) = max_skip;
  	  max_log = 0;
+ 	  max_skip = 0;
  	}
        else if (GET_CODE (insn) == BARRIER)
  	{
***************
*** 1046,1052 ****
  	      {
  		log = LABEL_ALIGN_AFTER_BARRIER (insn);
  		if (max_log < log)
! 		  max_log = log;
  		break;
  	      }
  	}
--- 1079,1088 ----
  	      {
  		log = LABEL_ALIGN_AFTER_BARRIER (insn);
  		if (max_log < log)
! 		  {
! 		    max_log = log;
! 		    max_skip = LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP;
! 		  }
  		break;
  	      }
  	}
***************
*** 1062,1068 ****
  	      {
  		log = LOOP_ALIGN (insn);
  		if (max_log < log)
! 		  max_log = log;
  		break;
  	      }
  	}
--- 1098,1107 ----
  	      {
  		log = LOOP_ALIGN (insn);
  		if (max_log < log)
! 		  {
! 		    max_log = log;
! 		    max_skip = LOOP_ALIGN_MAX_SKIP;
! 		  }
  		break;
  	      }
  	}
***************
*** 2201,2209 ****
--- 2240,2253 ----
        if (CODE_LABEL_NUMBER (insn) <= max_labelno)
  	{
  	  int align = LABEL_TO_ALIGNMENT (insn);
+ 	  int max_skip = LABEL_TO_MAX_SKIP (insn);
  
  	  if (align && NEXT_INSN (insn))
+ #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+ 	    ASM_OUTPUT_MAX_SKIP_ALIGN (file, align, max_skip);
+ #else
  	    ASM_OUTPUT_ALIGN (file, align);
+ #endif
  	}
        CC_STATUS_INIT;
        if (prescan > 0)
*** gcc/config/i386/i386.h.ORIGINAL	Sun May 17 17:13:47 1998
--- gcc/config/i386/i386.h	Sun May 17 18:58:08 1998
***************
*** 506,516 ****
--- 506,518 ----
  
  /* Align loop starts for optimal branching.  */
  #define LOOP_ALIGN(LABEL) (i386_align_loops)
+ #define LOOP_ALIGN_MAX_SKIP (i386_align_loops_string ? 0 : 7)
  
  /* This is how to align an instruction for optimal branching.
     On i486 we'll get better performance by aligning on a
     cache line (i.e. 16 byte) boundary.  */
  #define LABEL_ALIGN_AFTER_BARRIER(LABEL) (i386_align_jumps)
+ #define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP (i386_align_jumps_string ? 0 : 7)
  
  
  /* Standard register usage.  */
*** gcc/config/i386/i386.c.ORIGINAL	Mon Apr 20 03:41:41 1998
--- gcc/config/i386/i386.c	Sun May 17 18:55:51 1998
***************
*** 331,337 ****
--- 331,341 ----
  	       i386_align_loops, MAX_CODE_ALIGN);
      }
    else
+ #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+     i386_align_loops = 4;
+ #else
      i386_align_loops = 2;
+ #endif
  
    /* Validate -malign-jumps= value, or provide default.  */
    if (i386_align_jumps_string)
***************
*** 342,348 ****
--- 346,356 ----
  	       i386_align_jumps, MAX_CODE_ALIGN);
      }
    else
+ #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+     i386_align_jumps = 4;
+ #else
      i386_align_jumps = def_align;
+ #endif
  
    /* Validate -malign-functions= value, or provide default. */
    if (i386_align_funcs_string)
*** gcc/invoke.texi.ORIGINAL	Sun Apr 12 16:31:54 1998
--- gcc/invoke.texi	Sun May  3 22:27:13 1998
***************
*** 4879,4890 ****
  
  @item -malign-loops=@var{num}
  Align loops to a 2 raised to a @var{num} byte boundary.  If
! @samp{-malign-loops} is not specified, the default is 2.
  
  @item -malign-jumps=@var{num}
  Align instructions that are only jumped to to a 2 raised to a @var{num}
  byte boundary.  If @samp{-malign-jumps} is not specified, the default is
! 2 if optimizing for a 386, and 4 if optimizing for a 486.
  
  @item -malign-functions=@var{num}
  Align the start of functions to a 2 raised to @var{num} byte boundary.
--- 4879,4896 ----
  
  @item -malign-loops=@var{num}
  Align loops to a 2 raised to a @var{num} byte boundary.  If
! @samp{-malign-loops} is not specified, the default is 2 unless
! gas 2.8 (or later) is being used in which case the default is
! to align the loop on a 16 byte boundary if it is less than 8
! bytes away.
  
  @item -malign-jumps=@var{num}
  Align instructions that are only jumped to to a 2 raised to a @var{num}
  byte boundary.  If @samp{-malign-jumps} is not specified, the default is
! 2 if optimizing for a 386, and 4 if optimizing for a 486 unless
! gas 2.8 (or later) is being used in which case the default is
! to align the instruction on a 16 byte boundary if it is less
! than 8 bytes away.
  
  @item -malign-functions=@var{num}
  Align the start of functions to a 2 raised to @var{num} byte boundary.
-------------------------------------------------------------------------
|   Feith Systems  |   Voice: 1-215-646-8000  |  Email: john@feith.com  |
|    John Wehle    |     Fax: 1-215-540-5495  |                         |
-------------------------------------------------------------------------




More information about the Gcc-bugs mailing list