i386 code label alignment patch (version 3)
John Wehle
john@feith.com
Thu May 21 02:50:00 GMT 1998
[ The principle change for this version is the use of a feature
test when checking the assembler. ]
This change implements the alignment of code labels following
the recommendations of the "Intel Architecture Optimization Manual".
This change seem to be worth about 33% speed increase in the simple
case of:
main()
{
int a;
int i;
a = 0;
for (i = 0; i < 1000000000; i++)
a += i;
return a;
}
ChangeLog:
Wed May 20 23:44:28 EDT 1998 John Wehle (john@feith.com)
* acconfig.h (HAVE_GAS_MAX_SKIP_P2ALIGN): New tag.
* configure.in: Check for it.
* i386/gas.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Use it.
* final.c (uid_align, uid_shuid, label_align): Make static.
(label_align): Change type to struct label_alignment pointer.
(LABEL_TO_ALIGNMENT, shorten_branches): Update due to type change.
(LABEL_TO_MAX_SKIP): Define.
(LABEL_ALIGN_MAX_SKIP, LOOP_ALIGN_MAX_SKIP,
LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Provide defaults.
(shorten_branches): Record the maximum bytes to skip when
aligning a label.
(final_scan_insn): Use the maximum bytes to skip when aligning a label
if ASM_OUTPUT_MAX_SKIP_ALIGN is available.
* i386.h (LOOP_ALIGN_MAX_SKIP,
LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Define.
* i386.c (override_options): i386_align_jumps and i386_align_loops
default to 4 if ASM_OUTPUT_MAX_SKIP_ALIGN is available.
* invoke.texi: Document new i386 align-loops and align-jumps behavior.
Enjoy!
-- John Wehle
------------------8<------------------------8<------------------------
*** gcc/acconfig.h.ORIGINAL Sat Apr 4 12:37:22 1998
--- gcc/acconfig.h Wed May 20 00:30:42 1998
***************
*** 4,9 ****
--- 4,13 ----
/* Define if your cpp understands the stringify operator. */
#undef HAVE_CPP_STRINGIFY
+ /* Define if your assembler supports specifying the maximum number
+ of bytes to skip when using the GAS .p2align command. */
+ #undef HAVE_GAS_MAX_SKIP_P2ALIGN
+
/* Define if you have a working <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
*** gcc/configure.in.ORIGINAL Fri Apr 24 10:58:22 1998
--- gcc/configure.in Wed May 20 00:32:31 1998
***************
*** 3184,3189 ****
--- 3184,3229 ----
# fi
fi
+ # Figure out what assembler alignment features are present.
+ AC_MSG_CHECKING(assembler alignment features)
+ gcc_cv_as=
+ gcc_cv_as_alignment_features=
+ if [[ -x as$host_exeext ]]; then
+ # Build using assembler in the current directory.
+ gcc_cv_as=./as$host_exeext
+ elif [[ -f $srcdir/../gas/configure.in ]]; then
+ # Single tree build which includes gas.
+ for f in $srcdir/../gas/configure.in $srcdir/../gas/Makefile.in
+ do
+ gcc_cv_gas_version=`grep '^VERSION=[[0-9]]*\.[[0-9]]*' $f`
+ if [[ x$gcc_cv_gas_version != x ]]; then
+ break
+ fi
+ done
+ gcc_cv_gas_major_version=`expr "$gcc_cv_gas_version" : "VERSION=\([[0-9]]*\)"`
+ gcc_cv_gas_minor_version=`expr "$gcc_cv_gas_version" : "VERSION=[[0-9]]*\.\([[0-9]]*\)"`
+ # Gas version 2.8 and later support specifying the maximum
+ # bytes to skip when using .p2align.
+ if [[ "$gcc_cv_gas_major_version" -eq 2 -a "$gcc_cv_gas_minor_version" -ge 8 -o "$gcc_cv_gas_major_version" -gt 2 ]]; then
+ gcc_cv_as_alignment_features=".p2align including maximum skip"
+ AC_DEFINE(HAVE_GAS_MAX_SKIP_P2ALIGN)
+ fi
+ elif [[ x$host = x$target ]]; then
+ # Native build.
+ gcc_cv_as=as$host_exeext
+ fi
+ if [[ x$gcc_cv_as != x ]]; then
+ # Check if specifying the maximum bytes to skip when
+ # using .p2align is supported.
+ echo ".p2align 4,,7" > conftest.s
+ if $gcc_cv_as -o conftest.o conftest.s > /dev/null 2>&1; then
+ gcc_cv_as_alignment_features=".p2align including maximum skip"
+ AC_DEFINE(HAVE_GAS_MAX_SKIP_P2ALIGN)
+ fi
+ rm -f conftest.s conftest.o
+ fi
+ AC_MSG_RESULT($gcc_cv_as_alignment_features)
+
# Figure out what language subdirectories are present.
subdirs=
for lang in ${srcdir}/*/config-lang.in ..
*** gcc/config/i386/gas.h.ORIGINAL Mon Mar 2 06:54:19 1998
--- gcc/config/i386/gas.h Wed May 20 00:33:54 1998
***************
*** 85,90 ****
--- 85,103 ----
#define ASM_OUTPUT_ALIGN(FILE,LOG) \
if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1<<(LOG))
#endif
+
+ /* A C statement to output to the stdio stream FILE an assembler
+ command to advance the location counter to a multiple of 1<<LOG
+ bytes if it is within MAX_SKIP bytes.
+
+ This is used to align code labels according to Intel recommendations. */
+
+ #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+ # define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+ if ((LOG)!=0) \
+ if ((MAX_SKIP)==0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP))
+ #endif
/* A C statement or statements which output an assembler instruction
opcode to the stdio stream STREAM. The macro-operand PTR is a
*** gcc/final.c.ORIGINAL Wed Apr 22 08:57:04 1998
--- gcc/final.c Sun May 17 18:48:49 1998
***************
*** 634,642 ****
for each insn we'll call the alignment chain of this insn in the following
comments. */
! rtx *uid_align;
! int *uid_shuid;
! short *label_align;
/* Indicate that branch shortening hasn't yet been done. */
--- 634,647 ----
for each insn we'll call the alignment chain of this insn in the following
comments. */
! struct label_alignment {
! short alignment;
! short max_skip;
! };
!
! static rtx *uid_align;
! static int *uid_shuid;
! static struct label_alignment *label_align;
/* Indicate that branch shortening hasn't yet been done. */
***************
*** 779,792 ****
--- 784,809 ----
#define LABEL_ALIGN(LABEL) 0
#endif
+ #ifndef LABEL_ALIGN_MAX_SKIP
+ #define LABEL_ALIGN_MAX_SKIP 0
+ #endif
+
#ifndef LOOP_ALIGN
#define LOOP_ALIGN(LABEL) 0
#endif
+ #ifndef LOOP_ALIGN_MAX_SKIP
+ #define LOOP_ALIGN_MAX_SKIP 0
+ #endif
+
#ifndef LABEL_ALIGN_AFTER_BARRIER
#define LABEL_ALIGN_AFTER_BARRIER(LABEL) 0
#endif
+ #ifndef LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP
+ #define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP 0
+ #endif
+
#ifndef ADDR_VEC_ALIGN
int
final_addr_vec_align (addr_vec)
***************
*** 811,817 ****
static int min_labelno, max_labelno;
#define LABEL_TO_ALIGNMENT(LABEL) \
! (label_align[CODE_LABEL_NUMBER (LABEL) - min_labelno])
/* For the benefit of port specific code do this also as a function. */
int
--- 828,837 ----
static int min_labelno, max_labelno;
#define LABEL_TO_ALIGNMENT(LABEL) \
! (label_align[CODE_LABEL_NUMBER (LABEL) - min_labelno].alignment)
!
! #define LABEL_TO_MAX_SKIP(LABEL) \
! (label_align[CODE_LABEL_NUMBER (LABEL) - min_labelno].max_skip)
/* For the benefit of port specific code do this also as a function. */
int
***************
*** 947,952 ****
--- 967,973 ----
int max_uid;
int i;
int max_log;
+ int max_skip;
#ifdef HAVE_ATTR_length
#define MAX_CODE_ALIGN 16
rtx seq;
***************
*** 986,994 ****
max_labelno = max_label_num ();
min_labelno = get_first_label_num ();
! label_align
! = (short*) xmalloc ((max_labelno - min_labelno + 1) * sizeof (short));
! bzero (label_align, (max_labelno - min_labelno + 1) * sizeof (short));
uid_shuid = (int *) xmalloc (max_uid * sizeof *uid_shuid);
--- 1007,1016 ----
max_labelno = max_label_num ();
min_labelno = get_first_label_num ();
! label_align = (struct label_alignment *) xmalloc (
! (max_labelno - min_labelno + 1) * sizeof (struct label_alignment));
! bzero (label_align,
! (max_labelno - min_labelno + 1) * sizeof (struct label_alignment));
uid_shuid = (int *) xmalloc (max_uid * sizeof *uid_shuid);
***************
*** 998,1004 ****
impose on the next CODE_LABEL (or the current one if we are processing
the CODE_LABEL itself). */
! for (max_log = 0, insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
{
int log;
--- 1020,1029 ----
impose on the next CODE_LABEL (or the current one if we are processing
the CODE_LABEL itself). */
! max_log = 0;
! max_skip = 0;
!
! for (insn = get_insns (), i = 1; insn; insn = NEXT_INSN (insn))
{
int log;
***************
*** 1017,1023 ****
log = LABEL_ALIGN (insn);
if (max_log < log)
! max_log = log;
next = NEXT_INSN (insn);
/* ADDR_VECs only take room if read-only data goes into the text section. */
#if !defined(READONLY_DATA_SECTION) || defined(JUMP_TABLES_IN_TEXT_SECTION)
--- 1042,1051 ----
log = LABEL_ALIGN (insn);
if (max_log < log)
! {
! max_log = log;
! max_skip = LABEL_ALIGN_MAX_SKIP;
! }
next = NEXT_INSN (insn);
/* ADDR_VECs only take room if read-only data goes into the text section. */
#if !defined(READONLY_DATA_SECTION) || defined(JUMP_TABLES_IN_TEXT_SECTION)
***************
*** 1029,1040 ****
{
log = ADDR_VEC_ALIGN (next);
if (max_log < log)
! max_log = log;
}
}
#endif
LABEL_TO_ALIGNMENT (insn) = max_log;
max_log = 0;
}
else if (GET_CODE (insn) == BARRIER)
{
--- 1057,1073 ----
{
log = ADDR_VEC_ALIGN (next);
if (max_log < log)
! {
! max_log = log;
! max_skip = LABEL_ALIGN_MAX_SKIP;
! }
}
}
#endif
LABEL_TO_ALIGNMENT (insn) = max_log;
+ LABEL_TO_MAX_SKIP (insn) = max_skip;
max_log = 0;
+ max_skip = 0;
}
else if (GET_CODE (insn) == BARRIER)
{
***************
*** 1046,1052 ****
{
log = LABEL_ALIGN_AFTER_BARRIER (insn);
if (max_log < log)
! max_log = log;
break;
}
}
--- 1079,1088 ----
{
log = LABEL_ALIGN_AFTER_BARRIER (insn);
if (max_log < log)
! {
! max_log = log;
! max_skip = LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP;
! }
break;
}
}
***************
*** 1062,1068 ****
{
log = LOOP_ALIGN (insn);
if (max_log < log)
! max_log = log;
break;
}
}
--- 1098,1107 ----
{
log = LOOP_ALIGN (insn);
if (max_log < log)
! {
! max_log = log;
! max_skip = LOOP_ALIGN_MAX_SKIP;
! }
break;
}
}
***************
*** 2201,2209 ****
--- 2240,2253 ----
if (CODE_LABEL_NUMBER (insn) <= max_labelno)
{
int align = LABEL_TO_ALIGNMENT (insn);
+ int max_skip = LABEL_TO_MAX_SKIP (insn);
if (align && NEXT_INSN (insn))
+ #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+ ASM_OUTPUT_MAX_SKIP_ALIGN (file, align, max_skip);
+ #else
ASM_OUTPUT_ALIGN (file, align);
+ #endif
}
CC_STATUS_INIT;
if (prescan > 0)
*** gcc/config/i386/i386.h.ORIGINAL Sun May 17 17:13:47 1998
--- gcc/config/i386/i386.h Sun May 17 18:58:08 1998
***************
*** 506,516 ****
--- 506,518 ----
/* Align loop starts for optimal branching. */
#define LOOP_ALIGN(LABEL) (i386_align_loops)
+ #define LOOP_ALIGN_MAX_SKIP (i386_align_loops_string ? 0 : 7)
/* This is how to align an instruction for optimal branching.
On i486 we'll get better performance by aligning on a
cache line (i.e. 16 byte) boundary. */
#define LABEL_ALIGN_AFTER_BARRIER(LABEL) (i386_align_jumps)
+ #define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP (i386_align_jumps_string ? 0 : 7)
/* Standard register usage. */
*** gcc/config/i386/i386.c.ORIGINAL Mon Apr 20 03:41:41 1998
--- gcc/config/i386/i386.c Sun May 17 18:55:51 1998
***************
*** 331,337 ****
--- 331,341 ----
i386_align_loops, MAX_CODE_ALIGN);
}
else
+ #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+ i386_align_loops = 4;
+ #else
i386_align_loops = 2;
+ #endif
/* Validate -malign-jumps= value, or provide default. */
if (i386_align_jumps_string)
***************
*** 342,348 ****
--- 346,356 ----
i386_align_jumps, MAX_CODE_ALIGN);
}
else
+ #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+ i386_align_jumps = 4;
+ #else
i386_align_jumps = def_align;
+ #endif
/* Validate -malign-functions= value, or provide default. */
if (i386_align_funcs_string)
*** gcc/invoke.texi.ORIGINAL Sun Apr 12 16:31:54 1998
--- gcc/invoke.texi Sun May 3 22:27:13 1998
***************
*** 4879,4890 ****
@item -malign-loops=@var{num}
Align loops to a 2 raised to a @var{num} byte boundary. If
! @samp{-malign-loops} is not specified, the default is 2.
@item -malign-jumps=@var{num}
Align instructions that are only jumped to to a 2 raised to a @var{num}
byte boundary. If @samp{-malign-jumps} is not specified, the default is
! 2 if optimizing for a 386, and 4 if optimizing for a 486.
@item -malign-functions=@var{num}
Align the start of functions to a 2 raised to @var{num} byte boundary.
--- 4879,4896 ----
@item -malign-loops=@var{num}
Align loops to a 2 raised to a @var{num} byte boundary. If
! @samp{-malign-loops} is not specified, the default is 2 unless
! gas 2.8 (or later) is being used in which case the default is
! to align the loop on a 16 byte boundary if it is less than 8
! bytes away.
@item -malign-jumps=@var{num}
Align instructions that are only jumped to to a 2 raised to a @var{num}
byte boundary. If @samp{-malign-jumps} is not specified, the default is
! 2 if optimizing for a 386, and 4 if optimizing for a 486 unless
! gas 2.8 (or later) is being used in which case the default is
! to align the instruction on a 16 byte boundary if it is less
! than 8 bytes away.
@item -malign-functions=@var{num}
Align the start of functions to a 2 raised to @var{num} byte boundary.
-------------------------------------------------------------------------
| Feith Systems | Voice: 1-215-646-8000 | Email: john@feith.com |
| John Wehle | Fax: 1-215-540-5495 | |
-------------------------------------------------------------------------
More information about the Gcc-bugs
mailing list