FIx -march=athlon-xp/pentium4 regression

Jan Hubicka jh@suse.cz
Wed Feb 6 08:54:00 GMT 2002


Hi,
when -msse is on, we attempt to use it for FP operations, but most of
operations are not available.  Since our regclass pass is not able
to propagate the information, it believes that register A can be in
SSE because it is used only for operations available in SSE even
when it is used together with register B that requires x87.  THis
results in lousy code.  THe atached patch workarounds it by changing
regalloc order depending on SSE_MATH

Wed Feb  6 16:07:41 CET 2002  Jan Hubicka  <jh@suse.cz>

	* i386-protos.h (x86_order_regs_for_local_alloc): Declare
	* i386.c (x86_order_regs_for_local_alloc): New global function.
	* i386.h (REG_ALLOC_ORDER): CLeanup.
	(ORDER_REGS_FOR_LOCAL_ALLOC): New.

Index: gcc/config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386-protos.h,v
retrieving revision 1.67
diff -c -3 -p -r1.67 i386-protos.h
*** i386-protos.h	2002/01/12 10:05:25	1.67
--- i386-protos.h	2002/02/06 15:01:17
*************** extern int ix86_memory_move_cost PARAMS 
*** 169,174 ****
--- 169,175 ----
  extern void ix86_set_move_mem_attrs PARAMS ((rtx, rtx, rtx, rtx, rtx));
  extern void emit_i387_cw_initialization PARAMS ((rtx, rtx));
  extern bool ix86_fp_jump_nontrivial_p PARAMS ((enum rtx_code));
+ extern void x86_order_regs_for_local_alloc PARAMS ((void));
  
  
  #ifdef TREE_CODE
Index: gcc/config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.359
diff -c -3 -p -r1.359 i386.c
*** i386.c	2002/01/21 15:53:30	1.359
--- i386.c	2002/02/06 15:01:21
*************** ix86_svr3_asm_out_constructor (symbol, p
*** 12312,12314 ****
--- 12312,12358 ----
    fputc ('\n', asm_out_file);
  }
  #endif
+ 
+ /* Order the registers for register allocator.  */
+ 
+ void
+ x86_order_regs_for_local_alloc ()
+ {
+    int pos = 0;
+    int i;
+ 
+    /* First allocate the local general purpose registers.  */
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (GENERAL_REGNO_P (i) && call_used_regs[i])
+ 	reg_alloc_order [pos++] = i;
+ 
+    /* Global general purpose registers.  */
+    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
+ 	reg_alloc_order [pos++] = i;
+ 
+    /* x87 registers come first in case we are doing FP math
+       using them.  */
+    if (!TARGET_SSE_MATH)
+      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+        reg_alloc_order [pos++] = i;
+    
+    /* SSE registers.  */
+    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+      reg_alloc_order [pos++] = i;
+    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+      reg_alloc_order [pos++] = i;
+ 
+    /* x87 registerts.  */
+    if (TARGET_SSE_MATH)
+      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+        reg_alloc_order [pos++] = i;
+ 
+    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
+      reg_alloc_order [pos++] = i;
+ 
+    /* Initialize the rest of array as we do not allocate some registers
+       at all.  */
+    while (pos < FIRST_PSEUDO_REGISTER)
+      reg_alloc_order [pos++] = 0;
+ }
Index: gcc/config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.241
diff -c -3 -p -r1.241 i386.h
*** i386.h	2002/02/02 00:05:11	1.241
--- i386.h	2002/02/06 15:01:23
*************** extern int ix86_arch;
*** 926,961 ****
     registers listed in CALL_USED_REGISTERS, keeping the others
     available for storage of persistent values.
  
!    Three different versions of REG_ALLOC_ORDER have been tried:
  
!    If the order is edx, ecx, eax, ... it produces a slightly faster compiler,
!    but slower code on simple functions returning values in eax.
  
!    If the order is eax, ecx, edx, ... it causes reload to abort when compiling
!    perl 4.036 due to not being able to create a DImode register (to hold a 2
!    word union).
  
!    If the order is eax, edx, ecx, ... it produces better code for simple
!    functions, and a slightly slower compiler.  Users complained about the code
!    generated by allocating edx first, so restore the 'natural' order of things.  */
  
- #define REG_ALLOC_ORDER 					\
- /*ax,dx,cx,*/							\
- {  0, 1, 2,							\
- /* bx,si,di,bp,sp,*/						\
-    3, 4, 5, 6, 7,						\
- /*r8,r9,r10,r11,*/						\
-   37,38, 39, 40,						\
- /*r12,r15,r14,r13*/						\
-   41, 44, 43, 42,						\
- /*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/			\
-     21,  22,  23,  24,  25,  26,  27,  28,			\
- /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/		\
-     45,  46,   47,   48,   49,   50,   51,   52,		\
- /*st,st1,st2,st3,st4,st5,st6,st7*/				\
-    8,  9, 10, 11, 12, 13, 14, 15,				\
- /*,arg,cc,fpsr,dir,frame*/					\
-      16,17, 18, 19,   20,					\
- /*mmx0,mmx1,mmx2,mmx3,mmx4,mmx5,mmx6,mmx7*/			\
-     29,  30,  31,  32,  33,  34,  35,  36 }
  
  /* Macro to conditionally modify fixed_regs/call_used_regs.  */
  #define CONDITIONAL_REGISTER_USAGE					\
--- 926,948 ----
     registers listed in CALL_USED_REGISTERS, keeping the others
     available for storage of persistent values.
  
!    The ORDER_REGS_FOR_LOCAL_ALLOC actually overwrite the order,
!    so this is just empty initializer for array.  */
  
! #define REG_ALLOC_ORDER 					\
! {  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\
!    18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,	\
!    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,  \
!    48, 49, 50, 51, 52 }
  
! /* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order
!    to be rearranged based on a particular function.  When using sse math,
!    we want to allocate SSE registers first and vice versa.  */
  
! #define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc ()
  
  
  /* Macro to conditionally modify fixed_regs/call_used_regs.  */
  #define CONDITIONAL_REGISTER_USAGE					\



More information about the Gcc-patches mailing list