This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[3.4-BIB] Add i386 trunc/nearbyint patterns II


> 
> Hi,
> this adds the low hanging fruits for floor and rint.
> To add the others, I need to make optimize_mode_switching to compute proper control words for me.
> What I am thinking about is following scheme:
> 1) define entity for each control word setting (normal, round_down, round_up, round_towards_zero, round_away_from_zero)
>    Each having 3 values - unuinitalized, any, store
> 2) optimize mode switching entities in order.  switcch from nromal to store will emit the code
>    to store cw into fixed memory and load it into fixed pseudo
>    other switches will use the pseudo to update control word ocrrespondingly and store it.
> 
> 3) later add post-reload optimize_mode_switching pass that will emit the
>    fldcw/fstcw as I've seen several loops in such software where I can do that
>    safely.  Especially in SSE mode.
> 
> Does this sound as sane plan?

And this is updated version with nearbyint version

Tue Nov  5 19:18:21 PST 2002  Jan Hubicka  <jh@suse.cz>
	* reg-stack.c (subst_stack_regs_pat): Add UNSPEC_TRUNC and UNSPEC_NEARBYINT.
	* i386.h (fp_cw_mode): Kill.
	(MODE_NEEDED): Write using get_attr_cw_mode
	(EMIT_MODE_SET): Likewise.
	* i386.md (UNSPEC_TRUNC, UNSPEC_NEARBYINT): New constants.
	(fp_cw): New attribute.
	(trunc*, nearbyint*): New patterns and splitters.

Index: reg-stack.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/reg-stack.c,v
retrieving revision 1.112.2.2
diff -c -3 -p -r1.112.2.2 reg-stack.c
*** reg-stack.c	30 Oct 2002 22:08:19 -0000	1.112.2.2
--- reg-stack.c	6 Nov 2002 10:12:36 -0000
*************** subst_stack_regs_pat (insn, regstack, pa
*** 1703,1708 ****
--- 1703,1710 ----
  	      {
  	      case UNSPEC_SIN:
  	      case UNSPEC_COS:
+ 	      case UNSPEC_TRUNC:
+ 	      case UNSPEC_NEARBYINT:
  		/* These insns only operate on the top of the stack.  */
  
  		src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.280.2.3
diff -c -3 -p -r1.280.2.3 i386.h
*** config/i386/i386.h	30 Oct 2002 22:09:18 -0000	1.280.2.3
--- config/i386/i386.h	6 Nov 2002 10:12:40 -0000
*************** extern rtx ix86_compare_op1;	/* operand 
*** 3372,3379 ****
     Post-reload pass may be later used to eliminate the redundant fildcw if
     needed.  */
  
- enum fp_cw_mode {FP_CW_STORED, FP_CW_UNINITIALIZED, FP_CW_ANY};
- 
  /* Define this macro if the port needs extra instructions inserted
     for mode switching in an optimizing compilation.  */
  
--- 3372,3377 ----
*************** enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
*** 3395,3408 ****
     in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
     must be switched into prior to the execution of INSN.  */
  
! #define MODE_NEEDED(ENTITY, I)						\
!   (GET_CODE (I) == CALL_INSN						\
!    || (GET_CODE (I) == INSN && (asm_noperands (PATTERN (I)) >= 0 	\
! 				|| GET_CODE (PATTERN (I)) == ASM_INPUT))\
!    ? FP_CW_UNINITIALIZED						\
!    : recog_memoized (I) < 0 || get_attr_type (I) != TYPE_FISTP		\
!    ? FP_CW_ANY								\
!    : FP_CW_STORED)
  
  /* This macro specifies the order in which modes for ENTITY are
     processed.  0 is the highest priority.  */
--- 3393,3400 ----
     in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
     must be switched into prior to the execution of INSN.  */
  
! #define MODE_NEEDED(ENTITY, I) \
! 	(recog_memoized (I) >= 0 ? get_attr_fp_cw (I) : FP_CW_ANY)
  
  /* This macro specifies the order in which modes for ENTITY are
     processed.  0 is the highest priority.  */
*************** enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
*** 3414,3420 ****
     are to be inserted.  */
  
  #define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) 			\
!   ((MODE) == FP_CW_STORED						\
     ? emit_i387_cw_initialization (assign_386_stack_local (HImode, 1),	\
  				  assign_386_stack_local (HImode, 2)), 0\
     : 0)
--- 3406,3412 ----
     are to be inserted.  */
  
  #define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) 			\
!   ((MODE) == FP_CW_ROUND_DOWN						\
     ? emit_i387_cw_initialization (assign_386_stack_local (HImode, 1),	\
  				  assign_386_stack_local (HImode, 2)), 0\
     : 0)
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.380.2.6
diff -c -3 -p -r1.380.2.6 i386.md
*** config/i386/i386.md	31 Oct 2002 15:12:50 -0000	1.380.2.6
--- config/i386/i386.md	6 Nov 2002 10:12:44 -0000
***************
*** 85,90 ****
--- 85,92 ----
     (UNSPEC_FSTCW		26)
     (UNSPEC_ADD_CARRY		27)
     (UNSPEC_FLDCW		28)
+    (UNSPEC_TRUNC		62)
+    (UNSPEC_NEARBYINT			63)
  
     ; For SSE/MMX support:
     (UNSPEC_FIX			30)
***************
*** 366,372 ****
--- 368,387 ----
  ;; Describe a user's asm statement.
  (define_asm_attributes
    [(set_attr "length" "128")
+    (set_attr "fp_cw" "uninitialized")
     (set_attr "type" "multi")])
+ 
+ ;; FP control word required by the instruction.
+ ;; UNINITIALIZED is used in case instruction can change rounding mode and saved copies
+ ;; of it needs to be updated
+ (define_attr "fp_cw" "round_down,uninitialized,any"
+   (cond [(eq_attr "type" "call,callv")
+ 	   (const_string "uninitialized")
+ 	 (eq_attr "type" "fistp")
+ 	   (const_string "round_down")
+ 	]
+ 	(const_string "any")))
+   
  
  (include "pentium.md")
  (include "ppro.md")
***************
*** 15194,15199 ****
--- 15210,15352 ----
    "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 
     && flag_unsafe_math_optimizations"
    "fcos"
+   [(set_attr "type" "fpspc")
+    (set_attr "mode" "XF")])
+ 
+ (define_expand "truncsf2"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+ 	(unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+   "")
+ 
+ (define_expand "truncdf2"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+ 	(unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+   "")
+ 
+ (define_expand "truncxf2"
+   [(set (match_operand:XF 0 "register_operand" "=f")
+ 	(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+   "")
+ 
+ (define_expand "trunctf2"
+   [(set (match_operand:TF 0 "register_operand" "=f")
+ 	(unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+   "")
+ 
+ (define_insn_and_split "*trunc_1"
+   [(set (match_operand 0 "register_operand" "=f")
+ 	(unspec [(match_operand 1 "register_operand" "0")] UNSPEC_TRUNC))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+    && FLOAT_MODE_P (GET_MODE (operands[0]))
+    && GET_MODE (operands[0]) == GET_MODE (operands[1])
+    && !reload_completed && !reload_in_progress"
+   "#"
+   "&& 1"
+   [(parallel [(set (match_dup 0) (match_dup 1))
+ 	      (use (match_dup 2))
+ 	      (use (match_dup 3))])]
+ {
+   operands[1] = gen_rtx_UNSPEC (GET_MODE (operands[0]),
+ 				gen_rtvec (1, operands[1]), UNSPEC_TRUNC);
+   operands[2] = assign_386_stack_local (HImode, 1);
+   operands[3] = assign_386_stack_local (HImode, 2);
+ }
+   [(set_attr "type" "fpspc")
+    (set_attr "fp_cw" "round_down")
+    (set_attr "mode" "XF")])
+ 
+ (define_insn_and_split "*trunc_1_ext"
+   [(set (match_operand 0 "register_operand" "=f")
+ 	(unspec [(float_extend (match_operand 1 "register_operand" "0"))] UNSPEC_TRUNC))]
+   "TARGET_80387
+    && FLOAT_MODE_P (GET_MODE (operands[0]))
+    && FLOAT_MODE_P (GET_MODE (operands[1]))
+    && !reload_completed && !reload_in_progress"
+   "#"
+   "&& 1"
+   [(parallel [(set (match_dup 0) (match_dup 1))
+ 	      (use (match_dup 2))
+ 	      (use (match_dup 3))])]
+ {
+   operands[1] = gen_rtx_UNSPEC (GET_MODE (operands[0]),
+     gen_rtvec (1, gen_rtx_FLOAT_EXTEND (GET_MODE (operands[0]),
+ 					operands[1])), UNSPEC_TRUNC);
+   operands[2] = assign_386_stack_local (HImode, 1);
+   operands[3] = assign_386_stack_local (HImode, 2);
+ }
+   [(set_attr "type" "fpspc")
+    (set_attr "fp_cw" "round_down")
+    (set_attr "mode" "XF")])
+ 
+ (define_insn "*trunc_2"
+   [(set (match_operand 0 "register_operand" "=f")
+ 	(unspec [(match_operand 1 "register_operand" "0")] UNSPEC_TRUNC))
+    (use (match_operand:HI 2 "memory_operand" "m"))
+    (use (match_operand:HI 3 "memory_operand" "m"))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+    && FLOAT_MODE_P (GET_MODE (operands[0]))
+    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ {
+   output_asm_insn ("fldcw\t%3", operands);
+   output_asm_insn ("frndint", operands);
+   output_asm_insn ("fldcw\t%2", operands);
+   return "";
+ }
+   [(set_attr "type" "fpspc")
+    (set_attr "fp_cw" "round_down")
+    (set_attr "mode" "XF")])
+ 
+ (define_insn "*trunc_2_ext"
+   [(set (match_operand 0 "register_operand" "=f")
+ 	(unspec [(float_extend (match_operand 1 "register_operand" "0"))] UNSPEC_TRUNC))
+    (use (match_operand:HI 2 "memory_operand" "m"))
+    (use (match_operand:HI 3 "memory_operand" "m"))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+    && FLOAT_MODE_P (GET_MODE (operands[0])) && FLOAT_MODE_P (GET_MODE (operands[1]))"
+ {
+   output_asm_insn ("fldcw\t%3", operands);
+   output_asm_insn ("frndint", operands);
+   output_asm_insn ("fldcw\t%2", operands);
+   return "";
+ }
+   [(set_attr "type" "fpspc")
+    (set_attr "fp_cw" "round_down")
+    (set_attr "mode" "XF")])
+ 
+ (define_expand "nearbyintsf2"
+   [(set (match_operand:SF 0 "register_operand" "=f")
+ 	(unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+   "")
+ 
+ (define_expand "nearbyintdf2"
+   [(set (match_operand:DF 0 "register_operand" "=f")
+ 	(unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+   "")
+ 
+ (define_expand "nearbyintxf2"
+   [(set (match_operand:XF 0 "register_operand" "=f")
+ 	(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+   "")
+ 
+ (define_expand "nearbyinttf2"
+   [(set (match_operand:TF 0 "register_operand" "=f")
+ 	(unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+   "")
+ 
+ (define_insn "*nearbyint_1"
+   [(set (match_operand 0 "register_operand" "=f")
+ 	(unspec [(match_operand 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+   "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+    && FLOAT_MODE_P (GET_MODE (operands[0])) && FLOAT_MODE_P (GET_MODE (operands[1]))"
+   "frndint"
    [(set_attr "type" "fpspc")
     (set_attr "mode" "XF")])
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]