K6 alignment patch 1 version 2

Jan Hubicka hubicka@atrey.karlin.mff.cuni.cz
Mon Jul 19 23:09:00 GMT 1999


Tue Jul 20 02:10:42 EDT 1999  Jan Hubicka  <hubicka@freesoft.cz>
	* i386.c (K6_ALIGN): New constant.
	(ix86_align_loops_max_skip): New global variable.
	(ix86_align_jumps_max_skip): Likewise.
	(ix86_align_max_skip): Likewise.
	(ix86_decode_align_string): New function.
	(override_options): Use it, change max_skip handling.
	(ix86_adjust_align): New function.
	* i386.h (LOOP_ALIGN): Use ix86_adjust_align.
	(LOOP_ALIGN_MAX_SKIP): Use ix86_align_max_skip.
	(LABEL_AFTER_BARRIER): Use ix86_adjust_align.
	(LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Use ix86_align_max_skip.
	(ix86_align_max_skip): Declare.
	(ix86_align_loops_max_skip): Declare.
	(ix86_align_jumps_max_skip): Declare.
	* invoke.texi (-malign-loops): Document new behaviour.
	(-malign-jumps): Likewise.

*** /root/i386/i386.c	Sat Jul 10 11:06:55 1999
--- i386.c	Mon Jul 19 22:56:51 1999
*************** int ix86_regparm;
*** 185,190 ****
--- 189,200 ----
  
  /* Alignment to use for loops and jumps:  */
  
+ /* Comes from final.c -- no real reason to change it.  */
+ #define MAX_CODE_ALIGN 16
+ 
+ /* Following values have special meaning for alignment variables: */
+ #define K6_ALIGN  (MAX_CODE_ALIGN + 1)       /* Use K6 alignment algorithm. */
+ 
  /* Power of two alignment for loops. */
  const char *ix86_align_loops_string;
  
*************** const char *ix86_branch_cost_string;
*** 197,209 ****
--- 207,229 ----
  
  /* Power of two alignment for functions. */
  int ix86_align_funcs;
+ 
  const char *ix86_align_funcs_string;
  
  /* Power of two alignment for loops. */
  int ix86_align_loops;
  
+ /* Max skip value for loops. */
+ int ix86_align_loops_max_skip;
+ 
  /* Power of two alignment for non-loop jumps. */
  int ix86_align_jumps;
+ 
+ /* Max skip value for loops. */
+ int ix86_align_jumps_max_skip;
+ 
+ /* max skip value for last adjusted label alignment.  */
+ int ix86_align_max_skip;
  
  static void output_pic_addr_const PROTO ((FILE *, rtx, int));
  static void put_condition_code PROTO ((enum rtx_code, enum machine_mode,
*************** struct ix86_address
*** 233,238 ****
--- 255,297 ----
  
  static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
  
+ /* Decode the parameter for alignment options. This parameter can be single
+    integer interpreted as power of two for alignment, or two integers separated
+    by ',' where second means maximal skip. The first integer can be replaced
+    by the string 'k6' to enable AMD-K6 alignment algorithm.  */
+ 
+ static void
+ ix86_decode_align_string (string, name, align, max_skip)
+      char *string;
+      char *name;
+      int *align;
+      int *max_skip;
+ {
+   char *wholestring = string;
+   if (!strncmp (string, "k6", 2))
+     *align = K6_ALIGN, string += 2;
+   else
+     {
+       *align = atoi (string);
+       if (*align < 0 || *align > MAX_CODE_ALIGN)
+ 	fatal ("%s=%s alignment is not between 0 and %d",
+ 	       name, wholestring, MAX_CODE_ALIGN);
+       while (*string >= '0' && *string <= '9')
+ 	string++;
+     }
+   if (!*string)
+     {
+       *max_skip = 0;
+       return;
+     }
+   if (*string != ',')
+     fatal ("%s=%s bad format.", name, wholestring);
+   string++;
+   *max_skip = atoi (string);
+   if (*max_skip < 0 || *max_skip > (1 << *align))
+     fatal ("%s=%s max skip is not between 0 and %d",
+ 	   name, wholestring, 1 << *align);
+ }
  /* Sometimes certain combinations of command options do not make
     sense on a particular target machine.  You can define a macro
     `OVERRIDE_OPTIONS' to take account of this.  This macro, if
*************** static int ix86_decompose_address PARAMS
*** 245,252 ****
  void
  override_options ()
  {
-   /* Comes from final.c -- no real reason to change it.  */
- #define MAX_CODE_ALIGN 16
  
    static struct ptt
      {
--- 304,309 ----
*************** override_options ()
*** 254,270 ****
        int target_enable;		/* Target flags to enable.  */
        int target_disable;		/* Target flags to disable.  */
        int align_loop;			/* Default alignments.  */
        int align_jump;
        int align_func;
        int branch_cost;
      }
    const processor_target_table[PROCESSOR_max] = 
      {
!       {&i386_cost, 0, 0, 2, 2, 2, 1},
!       {&i486_cost, 0, 0, 4, 4, 4, 1},
!       {&pentium_cost, 0, 0, -4, -4, -4, 1},
!       {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
!       {&k6_cost, 0, 0, -4, -4, -4, 1}
      };
  
    static struct pta
--- 311,330 ----
        int target_enable;		/* Target flags to enable.  */
        int target_disable;		/* Target flags to disable.  */
        int align_loop;			/* Default alignments.  */
+       int align_loop_max_skip;
        int align_jump;
+       int align_jump_max_skip;
        int align_func;
+       int align_func_max_skip;		/* ??? currently ignored.  */
        int branch_cost;
      }
    const processor_target_table[PROCESSOR_max] = 
      {
!       {&i386_cost, 0, 0, 2, 0, 2, 0, 2, 0, 1},
!       {&i486_cost, 0, 0, 4, 0, 4, 0, 4, 0, 1},
!       {&pentium_cost, 0, 0, 4, 7, 4, 7, 4, 7, 1},
!       {&pentiumpro_cost, 0, 0, 4, 0, 4, 7, 4, 0, 1},
!       {&k6_cost, 0, 0, K6_ALIGN, 0, K6_ALIGN, 5, 5, 15, 1}
      };
  
    static struct pta
*************** override_options ()
*** 366,387 ****
  
    /* Validate -malign-loops= value, or provide default.  */
    ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
    if (ix86_align_loops_string)
      {
!       ix86_align_loops = atoi (ix86_align_loops_string);
!       if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
! 	fatal ("-malign-loops=%d is not between 0 and %d",
! 	       ix86_align_loops, MAX_CODE_ALIGN);
      }
  
    /* Validate -malign-jumps= value, or provide default.  */
    ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
    if (ix86_align_jumps_string)
      {
!       ix86_align_jumps = atoi (ix86_align_jumps_string);
!       if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
! 	fatal ("-malign-jumps=%d is not between 0 and %d",
! 	       ix86_align_jumps, MAX_CODE_ALIGN);
      }
  
    /* Validate -malign-functions= value, or provide default. */
--- 426,445 ----
  
    /* Validate -malign-loops= value, or provide default.  */
    ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
+   ix86_align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
    if (ix86_align_loops_string)
      {
!       ix86_decode_align_string (ix86_align_loops_string, "-malign-loops", 
! 		                &ix86_align_loops, &ix86_align_loops_max_skip);
      }
  
    /* Validate -malign-jumps= value, or provide default.  */
    ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
+   ix86_align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
    if (ix86_align_jumps_string)
      {
!       ix86_decode_align_string (ix86_align_jumps_string, "-malign-jumps", 
! 		                &ix86_align_jumps, &ix86_align_jumps_max_skip);
      }
  
    /* Validate -malign-functions= value, or provide default. */
*************** override_options ()
*** 418,423 ****
--- 476,542 ----
      flag_branch_on_count_reg = 1;
  }
  
+ /* Return alignment for LABEL, where default alignment is ALIGN.
+    Set IX86_ALIGN_MAX_SKIP variable as well.  We take special care for K6
+    alignment issued here, because K6 don't like unnecesary code bloat caused
+    by alignment.  So we only ensure that branch targets are alignmed in a way,
+    that next decode pair plus neccesary predecode information is on the same
+    cache line.  */
+ int
+ ix86_adjust_align (align, max_skip, label)
+      int align;
+      int max_skip;
+      rtx label;
+ {
+   if (align == K6_ALIGN)
+     {
+ #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+       int length = 0;
+ 
+       do
+ 	label = next_nonnote_insn (label);
+       while (label && GET_RTX_CLASS (GET_CODE (label)) != 'i');
+ 
+       if (label)
+ 	{
+ 	  if (recog_memoized (label) < 0)
+ 	    goto unknown;
+ 	  length += get_attr_length (label);
+ 
+ 	  /* Attempt to find instruction to pair.  */
+ 	  if (ix86_safe_uops (label) < UOPS_MANY)
+ 	    {
+ 	      do
+ 		label = next_nonnote_insn (label);
+ 	      while (label && GET_RTX_CLASS (GET_CODE (label)) != 'i');
+ 	      if (recog_memoized (label) < 0)
+ 		goto unknown;
+ 	      /* Add length in any case (even when instructions do not pair),
+ 	         because second decoder needs this information anyway (to figure
+ 	         out that this instruction is inpairable.  */
+ 	      length += get_attr_length (label);
+ 	    }
+ 	}
+ 
+       if (length + max_skip < 32)
+ 	length += max_skip;
+       else if (length > 32)
+ 	goto unknown;
+       else
+ 	length = 32;
+       ix86_align_max_skip = length;
+       return 5;
+     unknown:
+       ix86_align_max_skip = 7;
+       return 5;
+ #endif
+       ix86_align_max_skip = 0;
+       return 0;
+     }
+   ix86_align_max_skip = max_skip;
+   return align;
+ }
+ 
  /* A C statement (sans semicolon) to choose the order in which to
     allocate hard registers for pseudo-registers local to a basic
     block.
*** /root/i386/i386.h	Sat Jul 10 11:07:24 1999
--- i386.h	Sat Jul 17 19:24:38 1999
*************** extern int ix86_arch;
*** 542,558 ****
  /* Required on the 386 since it doesn't have bitfield insns.  */
  #define PCC_BITFIELD_TYPE_MATTERS 1
  
  /* Align loop starts for optimal branching.  */
  #define LOOP_ALIGN(LABEL) \
! 	(ix86_align_loops < 0 ? -ix86_align_loops : ix86_align_loops)
! #define LOOP_ALIGN_MAX_SKIP \
! 	(ix86_align_loops < -3 ? (1<<(-ix86_align_loops-1))-1 : 0)
  
  /* This is how to align an instruction for optimal branching.  */
  #define LABEL_ALIGN_AFTER_BARRIER(LABEL) \
! 	(ix86_align_jumps < 0 ? -ix86_align_jumps : ix86_align_jumps)
! #define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP \
! 	(ix86_align_jumps < -3 ? (1<<(-ix86_align_jumps-1))-1 : 0)
  
  /* Standard register usage.  */
  
--- 547,564 ----
  /* Required on the 386 since it doesn't have bitfield insns.  */
  #define PCC_BITFIELD_TYPE_MATTERS 1
  
+ /* Max skip value for last adjusted label.  */
+ extern int ix86_align_max_skip;
+ 
  /* Align loop starts for optimal branching.  */
  #define LOOP_ALIGN(LABEL) \
! 	ix86_adjust_align (ix86_align_loops, ix86_align_loops_max_skip, label)
! #define LOOP_ALIGN_MAX_SKIP ix86_align_max_skip
  
  /* This is how to align an instruction for optimal branching.  */
  #define LABEL_ALIGN_AFTER_BARRIER(LABEL) \
! 	ix86_adjust_align (ix86_align_jumps, ix86_align_jumps_max_skip, label)
! #define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP ix86_align_max_skip
  
  /* Standard register usage.  */
  
*************** extern int ix86_attr_length_default XPAR
*** 2485,2493 ****
  extern int ix86_issue_rate XPARAMS((void));
  extern int ix86_adjust_cost XPARAMS((xrtx, xrtx, xrtx, int));
  extern void ix86_sched_init XPARAMS((FILE *, int));
! extern void ix86_sched_reorder XPARAMS((FILE *, int, xrtx *, int));
  extern int ix86_variable_issue XPARAMS((FILE *, int, xrtx, int));
  
  
  #undef XPARAMS
  #undef xrtx
--- 2501,2512 ----
  extern int ix86_issue_rate XPARAMS((void));
  extern int ix86_adjust_cost XPARAMS((xrtx, xrtx, xrtx, int));
  extern void ix86_sched_init XPARAMS((FILE *, int));
! extern void ix86_sched_reorder XPARAMS((FILE *, int, xrtx *, int, int));
  extern int ix86_variable_issue XPARAMS((FILE *, int, xrtx, int));
  
+ extern void ix86_insn_alignment XPARAMS((rtx, int *, int *));
+ extern int ix86_adjust_align XPARAMS((rtx, int, int));
+ 
  
  #undef XPARAMS
  #undef xrtx
*************** extern const char *ix86_align_funcs_stri
*** 2506,2512 ****
--- 2525,2533 ----
  extern const char *ix86_branch_cost_string;	/* values 1-5: see jump.c */
  extern int ix86_regparm;			/* ix86_regparm_string as a number */
  extern int ix86_align_loops;			/* power of two alignment for loops */
+ extern int ix86_align_loops_max_skip;		/* max skip value for loops */
  extern int ix86_align_jumps;			/* power of two alignment for non-loop jumps */
+ extern int ix86_align_jumps_max_skip;		/* max skip value for jumps */
  extern int ix86_align_funcs;			/* power of two alignment for functions */
  extern int ix86_branch_cost;			/* values 1-5: see jump.c */
  extern const char * const hi_reg_name[];	/* names for 16 bit regs */
*** invoke.texi.old	Sat Jul 17 17:57:06 1999
--- invoke.texi	Sat Jul 17 17:57:36 1999
*************** function by using the function attribute
*** 5178,5193 ****
  value, including any libraries.  This includes the system libraries and
  startup modules.
  
! @item -malign-loops=@var{num}
  Align loops to a 2 raised to a @var{num} byte boundary.  If
! @samp{-malign-loops} is not specified, the default is 2 unless
  gas 2.8 (or later) is being used in which case the default is
  to align the loop on a 16 byte boundary if it is less than 8
  bytes away.
  
! @item -malign-jumps=@var{num}
! Align instructions that are only jumped to to a 2 raised to a @var{num}
! byte boundary.  If @samp{-malign-jumps} is not specified, the default is
  2 if optimizing for a 386, and 4 if optimizing for a 486 unless
  gas 2.8 (or later) is being used in which case the default is
  to align the instruction on a 16 byte boundary if it is less
--- 5178,5203 ----
  value, including any libraries.  This includes the system libraries and
  startup modules.
  
! @item -malign-loops=@var{num} | @var{num},@var{num2}
  Align loops to a 2 raised to a @var{num} byte boundary.  If
! @var{num2} is specified, align only if it is less than @var{num2}
! bytes away. (This feature works only for gas 2.0 or later).
! <p>
! GCC contains special code for AMD-K6 alignment.  The branch targets
! are aligned two instructions away from the cache line boundary (32 bytes).
! To enable this algorithm use @code{k6} string in @var{num} parameter.
! The @var{num2} is then added to calculated value. This is usefull to keep
! cache fill effecienty.
! <p>
! If @samp{-malign-loops} is not specified, the default is 2 unless
  gas 2.8 (or later) is being used in which case the default is
  to align the loop on a 16 byte boundary if it is less than 8
  bytes away.
  
! @item -malign-jumps=@var{num} | @var{num},@var{num2}
! Control alignemnt of instruction that are only jumped. See @code{-malign-loops} bellow for discusion about the format.
! <P>
! If @samp{-malign-jumps} is not specified, the default is
  2 if optimizing for a 386, and 4 if optimizing for a 486 unless
  gas 2.8 (or later) is being used in which case the default is
  to align the instruction on a 16 byte boundary if it is less


More information about the Gcc-patches mailing list