This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[3.4-BIB] Add i386 trunc/rint patterns
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org, rth at cygnus dot com
- Date: Wed, 6 Nov 2002 11:28:20 +0100
- Subject: [3.4-BIB] Add i386 trunc/rint patterns
Hi,
this adds the low hanging fruits for floor and rint.
To add the others, I need to make optimize_mode_switching to compute proper control words for me.
What I am thinking about is following scheme:
1) define entity for each control word setting (normal, round_down, round_up, round_towards_zero, round_away_from_zero)
Each having 3 values - unuinitalized, any, store
2) optimize mode switching entities in order. switcch from nromal to store will emit the code
to store cw into fixed memory and load it into fixed pseudo
other switches will use the pseudo to update control word ocrrespondingly and store it.
3) later add post-reload optimize_mode_switching pass that will emit the
fldcw/fstcw as I've seen several loops in such software where I can do that
safely. Especially in SSE mode.
Does this sound as sane plan?
Honza
Tue Nov 5 19:18:21 PST 2002 Jan Hubicka <jh@suse.cz>
* reg-stack.c (subst_stack_regs_pat): Add UNSPEC_TRUNC and UNSPEC_RINT.
* i386.h (fp_cw_mode): Kill.
(MODE_NEEDED): Write using get_attr_cw_mode
(EMIT_MODE_SET): Likewise.
* i386.md (UNSPEC_TRUNC, UNSPEC_RINT): New constants.
(fp_cw): New attribute.
(trunc*, rint*): New patterns and splitters.
Index: reg-stack.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/reg-stack.c,v
retrieving revision 1.112.2.2
diff -c -3 -p -r1.112.2.2 reg-stack.c
*** reg-stack.c 30 Oct 2002 22:08:19 -0000 1.112.2.2
--- reg-stack.c 6 Nov 2002 10:12:36 -0000
*************** subst_stack_regs_pat (insn, regstack, pa
*** 1703,1708 ****
--- 1703,1710 ----
{
case UNSPEC_SIN:
case UNSPEC_COS:
+ case UNSPEC_TRUNC:
+ case UNSPEC_RINT:
/* These insns only operate on the top of the stack. */
src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.280.2.3
diff -c -3 -p -r1.280.2.3 i386.h
*** config/i386/i386.h 30 Oct 2002 22:09:18 -0000 1.280.2.3
--- config/i386/i386.h 6 Nov 2002 10:12:40 -0000
*************** extern rtx ix86_compare_op1; /* operand
*** 3372,3379 ****
Post-reload pass may be later used to eliminate the redundant fildcw if
needed. */
- enum fp_cw_mode {FP_CW_STORED, FP_CW_UNINITIALIZED, FP_CW_ANY};
-
/* Define this macro if the port needs extra instructions inserted
for mode switching in an optimizing compilation. */
--- 3372,3377 ----
*************** enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
*** 3395,3408 ****
in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
must be switched into prior to the execution of INSN. */
! #define MODE_NEEDED(ENTITY, I) \
! (GET_CODE (I) == CALL_INSN \
! || (GET_CODE (I) == INSN && (asm_noperands (PATTERN (I)) >= 0 \
! || GET_CODE (PATTERN (I)) == ASM_INPUT))\
! ? FP_CW_UNINITIALIZED \
! : recog_memoized (I) < 0 || get_attr_type (I) != TYPE_FISTP \
! ? FP_CW_ANY \
! : FP_CW_STORED)
/* This macro specifies the order in which modes for ENTITY are
processed. 0 is the highest priority. */
--- 3393,3400 ----
in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
must be switched into prior to the execution of INSN. */
! #define MODE_NEEDED(ENTITY, I) \
! (recog_memoized (I) >= 0 ? get_attr_fp_cw (I) : FP_CW_ANY)
/* This macro specifies the order in which modes for ENTITY are
processed. 0 is the highest priority. */
*************** enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
*** 3414,3420 ****
are to be inserted. */
#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
! ((MODE) == FP_CW_STORED \
? emit_i387_cw_initialization (assign_386_stack_local (HImode, 1), \
assign_386_stack_local (HImode, 2)), 0\
: 0)
--- 3406,3412 ----
are to be inserted. */
#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
! ((MODE) == FP_CW_ROUND_DOWN \
? emit_i387_cw_initialization (assign_386_stack_local (HImode, 1), \
assign_386_stack_local (HImode, 2)), 0\
: 0)
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.380.2.6
diff -c -3 -p -r1.380.2.6 i386.md
*** config/i386/i386.md 31 Oct 2002 15:12:50 -0000 1.380.2.6
--- config/i386/i386.md 6 Nov 2002 10:12:44 -0000
***************
*** 85,90 ****
--- 85,92 ----
(UNSPEC_FSTCW 26)
(UNSPEC_ADD_CARRY 27)
(UNSPEC_FLDCW 28)
+ (UNSPEC_TRUNC 62)
+ (UNSPEC_RINT 63)
; For SSE/MMX support:
(UNSPEC_FIX 30)
***************
*** 366,372 ****
--- 368,387 ----
;; Describe a user's asm statement.
(define_asm_attributes
[(set_attr "length" "128")
+ (set_attr "fp_cw" "uninitialized")
(set_attr "type" "multi")])
+
+ ;; FP control word required by the instruction.
+ ;; UNINITIALIZED is used in case instruction can change rounding mode and saved copies
+ ;; of it needs to be updated
+ (define_attr "fp_cw" "round_down,uninitialized,any"
+ (cond [(eq_attr "type" "call,callv")
+ (const_string "uninitialized")
+ (eq_attr "type" "fistp")
+ (const_string "round_down")
+ ]
+ (const_string "any")))
+
(include "pentium.md")
(include "ppro.md")
***************
*** 15194,15199 ****
--- 15210,15352 ----
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+ (define_expand "truncsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "truncdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "truncxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "trunctf2"
+ [(set (match_operand:TF 0 "register_operand" "=f")
+ (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_insn_and_split "*trunc_1"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(match_operand 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])
+ && !reload_completed && !reload_in_progress"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ {
+ operands[1] = gen_rtx_UNSPEC (GET_MODE (operands[0]),
+ gen_rtvec (1, operands[1]), UNSPEC_TRUNC);
+ operands[2] = assign_386_stack_local (HImode, 1);
+ operands[3] = assign_386_stack_local (HImode, 2);
+ }
+ [(set_attr "type" "fpspc")
+ (set_attr "fp_cw" "round_down")
+ (set_attr "mode" "XF")])
+
+ (define_insn_and_split "*trunc_1_ext"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(float_extend (match_operand 1 "register_operand" "0"))] UNSPEC_TRUNC))]
+ "TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0]))
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !reload_completed && !reload_in_progress"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ {
+ operands[1] = gen_rtx_UNSPEC (GET_MODE (operands[0]),
+ gen_rtvec (1, gen_rtx_FLOAT_EXTEND (GET_MODE (operands[0]),
+ operands[1])), UNSPEC_TRUNC);
+ operands[2] = assign_386_stack_local (HImode, 1);
+ operands[3] = assign_386_stack_local (HImode, 2);
+ }
+ [(set_attr "type" "fpspc")
+ (set_attr "fp_cw" "round_down")
+ (set_attr "mode" "XF")])
+
+ (define_insn "*trunc_2"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(match_operand 1 "register_operand" "0")] UNSPEC_TRUNC))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ {
+ output_asm_insn ("fldcw\t%3", operands);
+ output_asm_insn ("frndint", operands);
+ output_asm_insn ("fldcw\t%2", operands);
+ return "";
+ }
+ [(set_attr "type" "fpspc")
+ (set_attr "fp_cw" "round_down")
+ (set_attr "mode" "XF")])
+
+ (define_insn "*trunc_2_ext"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(float_extend (match_operand 1 "register_operand" "0"))] UNSPEC_TRUNC))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0])) && FLOAT_MODE_P (GET_MODE (operands[1]))"
+ {
+ output_asm_insn ("fldcw\t%3", operands);
+ output_asm_insn ("frndint", operands);
+ output_asm_insn ("fldcw\t%2", operands);
+ return "";
+ }
+ [(set_attr "type" "fpspc")
+ (set_attr "fp_cw" "round_down")
+ (set_attr "mode" "XF")])
+
+ (define_expand "rintsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_RINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "rintdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_RINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "rintxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_RINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "rinttf2"
+ [(set (match_operand:TF 0 "register_operand" "=f")
+ (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_RINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_insn "*rint_1"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(match_operand 1 "register_operand" "0")] UNSPEC_RINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0])) && FLOAT_MODE_P (GET_MODE (operands[1]))"
+ "frndint"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")])