This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[3.4-BIB] Add i386 trunc/nearbyint patterns II
- From: Jan Hubicka <jh at suse dot cz>
- To: Jan Hubicka <jh at suse dot cz>
- Cc: gcc-patches at gcc dot gnu dot org, rth at cygnus dot com
- Date: Fri, 8 Nov 2002 11:58:02 +0100
- Subject: [3.4-BIB] Add i386 trunc/nearbyint patterns II
- References: <20021106102820.GG22059@kam.mff.cuni.cz>
>
> Hi,
> this adds the low hanging fruits for floor and rint.
> To add the others, I need to make optimize_mode_switching to compute proper control words for me.
> What I am thinking about is following scheme:
> 1) define entity for each control word setting (normal, round_down, round_up, round_towards_zero, round_away_from_zero)
> Each having 3 values - unuinitalized, any, store
> 2) optimize mode switching entities in order. switcch from nromal to store will emit the code
> to store cw into fixed memory and load it into fixed pseudo
> other switches will use the pseudo to update control word ocrrespondingly and store it.
>
> 3) later add post-reload optimize_mode_switching pass that will emit the
> fldcw/fstcw as I've seen several loops in such software where I can do that
> safely. Especially in SSE mode.
>
> Does this sound as sane plan?
And this is updated version with nearbyint version
Tue Nov 5 19:18:21 PST 2002 Jan Hubicka <jh@suse.cz>
* reg-stack.c (subst_stack_regs_pat): Add UNSPEC_TRUNC and UNSPEC_NEARBYINT.
* i386.h (fp_cw_mode): Kill.
(MODE_NEEDED): Write using get_attr_cw_mode
(EMIT_MODE_SET): Likewise.
* i386.md (UNSPEC_TRUNC, UNSPEC_NEARBYINT): New constants.
(fp_cw): New attribute.
(trunc*, nearbyint*): New patterns and splitters.
Index: reg-stack.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/reg-stack.c,v
retrieving revision 1.112.2.2
diff -c -3 -p -r1.112.2.2 reg-stack.c
*** reg-stack.c 30 Oct 2002 22:08:19 -0000 1.112.2.2
--- reg-stack.c 6 Nov 2002 10:12:36 -0000
*************** subst_stack_regs_pat (insn, regstack, pa
*** 1703,1708 ****
--- 1703,1710 ----
{
case UNSPEC_SIN:
case UNSPEC_COS:
+ case UNSPEC_TRUNC:
+ case UNSPEC_NEARBYINT:
/* These insns only operate on the top of the stack. */
src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.280.2.3
diff -c -3 -p -r1.280.2.3 i386.h
*** config/i386/i386.h 30 Oct 2002 22:09:18 -0000 1.280.2.3
--- config/i386/i386.h 6 Nov 2002 10:12:40 -0000
*************** extern rtx ix86_compare_op1; /* operand
*** 3372,3379 ****
Post-reload pass may be later used to eliminate the redundant fildcw if
needed. */
- enum fp_cw_mode {FP_CW_STORED, FP_CW_UNINITIALIZED, FP_CW_ANY};
-
/* Define this macro if the port needs extra instructions inserted
for mode switching in an optimizing compilation. */
--- 3372,3377 ----
*************** enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
*** 3395,3408 ****
in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
must be switched into prior to the execution of INSN. */
! #define MODE_NEEDED(ENTITY, I) \
! (GET_CODE (I) == CALL_INSN \
! || (GET_CODE (I) == INSN && (asm_noperands (PATTERN (I)) >= 0 \
! || GET_CODE (PATTERN (I)) == ASM_INPUT))\
! ? FP_CW_UNINITIALIZED \
! : recog_memoized (I) < 0 || get_attr_type (I) != TYPE_FISTP \
! ? FP_CW_ANY \
! : FP_CW_STORED)
/* This macro specifies the order in which modes for ENTITY are
processed. 0 is the highest priority. */
--- 3393,3400 ----
in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
must be switched into prior to the execution of INSN. */
! #define MODE_NEEDED(ENTITY, I) \
! (recog_memoized (I) >= 0 ? get_attr_fp_cw (I) : FP_CW_ANY)
/* This macro specifies the order in which modes for ENTITY are
processed. 0 is the highest priority. */
*************** enum fp_cw_mode {FP_CW_STORED, FP_CW_UNI
*** 3414,3420 ****
are to be inserted. */
#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
! ((MODE) == FP_CW_STORED \
? emit_i387_cw_initialization (assign_386_stack_local (HImode, 1), \
assign_386_stack_local (HImode, 2)), 0\
: 0)
--- 3406,3412 ----
are to be inserted. */
#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
! ((MODE) == FP_CW_ROUND_DOWN \
? emit_i387_cw_initialization (assign_386_stack_local (HImode, 1), \
assign_386_stack_local (HImode, 2)), 0\
: 0)
Index: config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.380.2.6
diff -c -3 -p -r1.380.2.6 i386.md
*** config/i386/i386.md 31 Oct 2002 15:12:50 -0000 1.380.2.6
--- config/i386/i386.md 6 Nov 2002 10:12:44 -0000
***************
*** 85,90 ****
--- 85,92 ----
(UNSPEC_FSTCW 26)
(UNSPEC_ADD_CARRY 27)
(UNSPEC_FLDCW 28)
+ (UNSPEC_TRUNC 62)
+ (UNSPEC_NEARBYINT 63)
; For SSE/MMX support:
(UNSPEC_FIX 30)
***************
*** 366,372 ****
--- 368,387 ----
;; Describe a user's asm statement.
(define_asm_attributes
[(set_attr "length" "128")
+ (set_attr "fp_cw" "uninitialized")
(set_attr "type" "multi")])
+
+ ;; FP control word required by the instruction.
+ ;; UNINITIALIZED is used in case instruction can change rounding mode and saved copies
+ ;; of it needs to be updated
+ (define_attr "fp_cw" "round_down,uninitialized,any"
+ (cond [(eq_attr "type" "call,callv")
+ (const_string "uninitialized")
+ (eq_attr "type" "fistp")
+ (const_string "round_down")
+ ]
+ (const_string "any")))
+
(include "pentium.md")
(include "ppro.md")
***************
*** 15194,15199 ****
--- 15210,15352 ----
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+ (define_expand "truncsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "truncdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "truncxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "trunctf2"
+ [(set (match_operand:TF 0 "register_operand" "=f")
+ (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_insn_and_split "*trunc_1"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(match_operand 1 "register_operand" "0")] UNSPEC_TRUNC))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])
+ && !reload_completed && !reload_in_progress"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ {
+ operands[1] = gen_rtx_UNSPEC (GET_MODE (operands[0]),
+ gen_rtvec (1, operands[1]), UNSPEC_TRUNC);
+ operands[2] = assign_386_stack_local (HImode, 1);
+ operands[3] = assign_386_stack_local (HImode, 2);
+ }
+ [(set_attr "type" "fpspc")
+ (set_attr "fp_cw" "round_down")
+ (set_attr "mode" "XF")])
+
+ (define_insn_and_split "*trunc_1_ext"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(float_extend (match_operand 1 "register_operand" "0"))] UNSPEC_TRUNC))]
+ "TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0]))
+ && FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !reload_completed && !reload_in_progress"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ {
+ operands[1] = gen_rtx_UNSPEC (GET_MODE (operands[0]),
+ gen_rtvec (1, gen_rtx_FLOAT_EXTEND (GET_MODE (operands[0]),
+ operands[1])), UNSPEC_TRUNC);
+ operands[2] = assign_386_stack_local (HImode, 1);
+ operands[3] = assign_386_stack_local (HImode, 2);
+ }
+ [(set_attr "type" "fpspc")
+ (set_attr "fp_cw" "round_down")
+ (set_attr "mode" "XF")])
+
+ (define_insn "*trunc_2"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(match_operand 1 "register_operand" "0")] UNSPEC_TRUNC))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ {
+ output_asm_insn ("fldcw\t%3", operands);
+ output_asm_insn ("frndint", operands);
+ output_asm_insn ("fldcw\t%2", operands);
+ return "";
+ }
+ [(set_attr "type" "fpspc")
+ (set_attr "fp_cw" "round_down")
+ (set_attr "mode" "XF")])
+
+ (define_insn "*trunc_2_ext"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(float_extend (match_operand 1 "register_operand" "0"))] UNSPEC_TRUNC))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0])) && FLOAT_MODE_P (GET_MODE (operands[1]))"
+ {
+ output_asm_insn ("fldcw\t%3", operands);
+ output_asm_insn ("frndint", operands);
+ output_asm_insn ("fldcw\t%2", operands);
+ return "";
+ }
+ [(set_attr "type" "fpspc")
+ (set_attr "fp_cw" "round_down")
+ (set_attr "mode" "XF")])
+
+ (define_expand "nearbyintsf2"
+ [(set (match_operand:SF 0 "register_operand" "=f")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "nearbyintdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "nearbyintxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_expand "nearbyinttf2"
+ [(set (match_operand:TF 0 "register_operand" "=f")
+ (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
+ "")
+
+ (define_insn "*nearbyint_1"
+ [(set (match_operand 0 "register_operand" "=f")
+ (unspec [(match_operand 1 "register_operand" "0")] UNSPEC_NEARBYINT))]
+ "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ && FLOAT_MODE_P (GET_MODE (operands[0])) && FLOAT_MODE_P (GET_MODE (operands[1]))"
+ "frndint"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")])