This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

i386 fp/int patterns tweek


Hi
This patch changes way integer arguments of fp operations are pushed to memory.
Originally it was created for x86-64 port, since I am using there the red-zone
of stack instead of doing explicit push/use/pop sequence, but brought me to few
ideas how to optimize code and results in fewer splitters so I am sending this
part to mainline too.

Main idea is to put generating of push/pop instructions offline so the splitters
are same for x86-64 and i386.  The optimizations I've implemented is to push
SImode even for HImode cases (to avoid prefix) and to not explicitly emit the
pop.

This lets combine_stack_adjustemnts to do it's job as well as reduces
dependencies.  In case the deallocation remains in the insn stream, it is most
probably converted to pop or add by peephole2 pass.

Interestingly enought I can actually measure the benefits in byte bencharks...

Honza

So lis  4 18:13:37 CET 2000  Jan Hubicka  <jh@suse.cz>

	* i386.c (ix86_force_to_memory, ix86_free_from_memory): New.
	* i386-protos.h (ix86_force_to_memory, ix86_free_from_memory): Declare
	* i386.md (float?i?f, int/fp operations): Rewrite spliters to use
	ix86_force_to_memory and ix86_free_from_memory.

*** i386-protos.h.old	Sat Nov  4 17:29:34 2000
--- i386-protos.h	Sat Nov  4 17:29:47 2000
*************** extern int ix86_sched_reorder PARAMS ((F
*** 128,133 ****
--- 128,136 ----
  extern int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
  extern enum machine_mode ix86_fp_compare_mode PARAMS ((enum rtx_code));
  
+ extern rtx ix86_force_to_memory PARAMS ((enum machine_mode, rtx));
+ extern void ix86_free_from_memory PARAMS ((enum machine_mode));
+ 
  #ifdef TREE_CODE
  extern void init_cumulative_args PARAMS ((CUMULATIVE_ARGS *, tree, rtx));
  extern rtx function_arg PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, tree, int));
*** i386.md.old	Sat Nov  4 17:30:55 2000
--- i386.md	Sat Nov  4 17:32:02 2000
***************
*** 3682,3720 ****
  ;; %%% Kill these when reload knows how to do it.
  (define_split
    [(set (match_operand 0 "register_operand" "")
! 	(float (match_operand:HI 1 "register_operand" "")))]
    "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))"
!   [(set (mem:HI (pre_dec:SI (reg:SI 7))) (match_dup 1))
!    (set (match_dup 0) (match_dup 2))
!    (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 2)))]
!   "operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]),
! 			       	gen_rtx_MEM (HImode, stack_pointer_rtx));")
! 
! (define_split
!   [(set (match_operand 0 "register_operand" "")
! 	(float (match_operand:SI 1 "register_operand" "")))]
!   "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))"
!   [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 1))
!    (set (match_dup 0) (match_dup 2))
!    (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
! 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
!   "operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]),
! 			       	gen_rtx_MEM (SImode, stack_pointer_rtx));")
! 
! (define_split
!   [(set (match_operand 0 "register_operand" "")
! 	(float (match_operand:DI 1 "nonmemory_operand" "")))]
!   "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))"
!   [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 2))
!    (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 1))
!    (set (match_dup 0) (match_dup 3))
!    (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
! 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
!    (parallel [(set (match_dup 2) (mem:SI (reg:SI 7)))
! 	      (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
!   "split_di (operands+1, 1, operands+1, operands+2);
!    operands[3] = gen_rtx_FLOAT (GET_MODE (operands[0]),
! 			       	gen_rtx_MEM (DImode, stack_pointer_rtx));")
  
  ;; Add instructions
  
--- 3682,3695 ----
  ;; %%% Kill these when reload knows how to do it.
  (define_split
    [(set (match_operand 0 "register_operand" "")
! 	(float (match_operand 1 "register_operand" "")))]
    "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))"
!   [(const_int 0)]
!   "operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
!    operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]);
!    emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
!    ix86_free_from_memory (GET_MODE (operands[1]));
!    DONE;")
  
  ;; Add instructions
  
***************
*** 9602,9614 ****
  	    (match_operand 2 "register_operand" "")]))]
    "TARGET_80387 && reload_completed
     && FLOAT_MODE_P (GET_MODE (operands[0]))"
!   [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 1))
!    (set (match_dup 0)
! 	(match_op_dup 3 [(match_dup 4) (match_dup 2)]))
!    (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
!               (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
!   "operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]),
! 			       	gen_rtx_MEM (SImode, stack_pointer_rtx));")
  
  (define_split
    [(set (match_operand 0 "register_operand" "")
--- 9577,9592 ----
  	    (match_operand 2 "register_operand" "")]))]
    "TARGET_80387 && reload_completed
     && FLOAT_MODE_P (GET_MODE (operands[0]))"
!   [(const_int 0)]
!   "operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
!    operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
!    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
! 			   gen_rtx_fmt_ee (GET_CODE (operands[3]),
! 					   GET_MODE (operands[3]),
! 					   operands[4],
! 					   operands[2])));
!    ix86_free_from_memory (GET_MODE (operands[1]));
!    DONE;")
  
  (define_split
    [(set (match_operand 0 "register_operand" "")
***************
*** 9617,9629 ****
  	    (float (match_operand:SI 2 "register_operand" ""))]))]
    "TARGET_80387 && reload_completed
     && FLOAT_MODE_P (GET_MODE (operands[0]))"
!   [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 2))
!    (set (match_dup 0)
! 	(match_op_dup 3 [(match_dup 1) (match_dup 4)]))
!    (parallel [(set (match_dup 2) (mem:SI (reg:SI 7)))
!               (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
!   "operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]),
! 			       	gen_rtx_MEM (SImode, stack_pointer_rtx));")
  
  ;; FPU special functions.
  
--- 9595,9610 ----
  	    (float (match_operand:SI 2 "register_operand" ""))]))]
    "TARGET_80387 && reload_completed
     && FLOAT_MODE_P (GET_MODE (operands[0]))"
!   [(const_int 0)]
!   "operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
!    operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
!    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
! 			   gen_rtx_fmt_ee (GET_CODE (operands[3]),
! 					   GET_MODE (operands[3]),
! 					   operands[1],
! 					   operands[4])));
!    ix86_free_from_memory (GET_MODE (operands[2]));
!    DONE;")
  
  ;; FPU special functions.
  
*** i386.c.old	Sat Nov  4 17:28:42 2000
--- i386.c	Sat Nov  4 18:35:46 2000
*************** ix86_expand_builtin (exp, target, subtar
*** 8320,8322 ****
--- 8320,8386 ----
    /* @@@ Should really do something sensible here.  */
    return 0;
  }
+ 
+ /* Store OPERAND to the memory after reload is completed.  This means
+    that we can't easilly use assign_stack_local.  */
+ rtx
+ ix86_force_to_memory (mode, operand)
+      enum machine_mode mode;
+      rtx operand;
+ {
+   if (!reload_completed)
+     abort ();
+   switch (mode)
+     {
+       case DImode:
+ 	{
+ 	  rtx operands[2];
+ 	  split_di (&operand, 1, operands, operands+1);
+ 	  emit_insn (
+ 	    gen_rtx_SET (VOIDmode,
+ 			 gen_rtx_MEM (SImode,
+ 				      gen_rtx_PRE_DEC (Pmode,
+ 						       stack_pointer_rtx)),
+ 			 operands[1]));
+ 	  emit_insn (
+ 	    gen_rtx_SET (VOIDmode,
+ 			 gen_rtx_MEM (SImode,
+ 				      gen_rtx_PRE_DEC (Pmode,
+ 						       stack_pointer_rtx)),
+ 			 operands[0]));
+ 	}
+ 	break;
+       case HImode:
+ 	/* It is better to store HImodes as SImodes.  */
+ 	if (!TARGET_PARTIAL_REG_STALL)
+ 	  operand = gen_lowpart (SImode, operand);
+ 	/* FALLTHRU */
+       case SImode:
+ 	emit_insn (
+ 	  gen_rtx_SET (VOIDmode,
+ 		       gen_rtx_MEM (GET_MODE (operand),
+ 				    gen_rtx_PRE_DEC (SImode,
+ 						     stack_pointer_rtx)),
+ 		       operand));
+ 	break;
+       default:
+ 	abort();
+     }
+   return gen_rtx_MEM (mode, stack_pointer_rtx);
+ }
+ 
+ /* Free operand from the memory.  */
+ void
+ ix86_free_from_memory (mode)
+      enum machine_mode mode;
+ {
+   /* Use LEA to deallocate stack space.  In peephole2 it will be converted
+      to pop or add instruction if registers are available.  */
+   emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ 			  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ 					GEN_INT (mode == DImode
+ 						 ? 8
+ 						 : mode == HImode && TARGET_PARTIAL_REG_STALL
+ 						 ? 2
+ 						 : 4))));
+ }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]