From 9199f050fa24815234deccf9c562eb8266a39eb2 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 15 Mar 2005 15:44:09 +0100 Subject: [PATCH] re PR target/18668 (use prescott's fisttp) PR target/18668 * config/i386/i386.h (x86_fisttp): New. (TARGET_FISTTP): New macro. * config/i386/i386.c (x86_fisttp): Set for NOCONA. (output_fix_trunc): Add fisttp parameter. Generate fisttp x87 instruction when fisttp flag is set. * config/i386/i386-protos.h (output_fix_trunc): Change declaration. * config/i386/i386.md (type attribute): Add fisttp. (unit attribute): Set to i387 for fisttp type. (X87MODEF, X87MODEI, SSEMODEF, SSEMODEI24): New mode macros. (fix_truncxfdi2, fix_truncxfsi2): Generate fisttp patterns for TARGET_FISTTP. (fix_truncdfdi2, fix_truncsfdi2, fix_truncdfsi2, fix_truncsfsi2): Generate fisttp patterns for TARGET_FISTTP. Implement using mode macros. (fix_truncxfhi2, fix_truncdfhi2, fix_truncsfhi2): Generate fisttp patterns for TARGET_FISTTP. Enable patterns for (TARGET_FISTTP && !TARGET_SSE_MATH). Implement using mode macros. (fix_trunc_i387_fisttp_1, fix_trunc_i387_fisttp, fix_trunc_i387_fisttp_with_temp): New instruction patterns to implement fisttp x87 insn. (fix_trunc*_i387_fisttp splitters): New patterns. (*fix_truncdi_i387, *fix_truncsi_i387, *fix_trunchi_i387): Rename to *fix_trunc_i387_1. Implement using mode macros. Disable patterns for TARGET_FISTTP. Add comment about FLAGS_REG clobber. (fix_truncdi_memory, fix_truncdi_nomemory, fix_trunchi_nomemory): Rename to fix_trunc_i387 and fix_trunc_i387_with_temp. Implement using mode macros. Disable patterns for TARGET_FISTTP. (fix_truncsi_memory, fix_truncsi_nomemory, fix_trunchi_memory, fix_trunchi_nomemory): Rename to fix_trunc_i387 and fix_trunc_i387_with_temp. Implement using mode macros. Disable patterns for TARGET_FISTTP. (fix_trunc*_i387 splitters): Implement usign mode macros. (fix_truncdfdi_sse, fix_truncsfdi_sse, fix_truncdfsi_sse, fix_truncsfsi_sse): Disable for (TARGET_FISTTP && !TARGET_SSE_MATH). (fix_trunx*_sse peephole2s): Implement using mode macros. From-SVN: r96477 --- gcc/ChangeLog | 41 +++ gcc/config/i386/i386-protos.h | 2 +- gcc/config/i386/i386.c | 20 +- gcc/config/i386/i386.h | 3 +- gcc/config/i386/i386.md | 577 +++++++++++++++------------------- 5 files changed, 317 insertions(+), 326 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 48e144d72f7..b4489a48013 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,44 @@ +2005-03-15 Uros Bizjak + + PR target/18668 + * config/i386/i386.h (x86_fisttp): New. + (TARGET_FISTTP): New macro. + * config/i386/i386.c (x86_fisttp): Set for NOCONA. + (output_fix_trunc): Add fisttp parameter. Generate fisttp x87 + instruction when fisttp flag is set. + * config/i386/i386-protos.h (output_fix_trunc): Change declaration. + + * config/i386/i386.md (type attribute): Add fisttp. + (unit attribute): Set to i387 for fisttp type. + (X87MODEF, X87MODEI, SSEMODEF, SSEMODEI24): New mode macros. + (fix_truncxfdi2, fix_truncxfsi2): Generate fisttp patterns for + TARGET_FISTTP. + (fix_truncdfdi2, fix_truncsfdi2, fix_truncdfsi2, fix_truncsfsi2): + Generate fisttp patterns for TARGET_FISTTP. Implement using mode + macros. + (fix_truncxfhi2, fix_truncdfhi2, fix_truncsfhi2): Generate fisttp + patterns for TARGET_FISTTP. Enable patterns for + (TARGET_FISTTP && !TARGET_SSE_MATH). Implement using mode macros. + (fix_trunc_i387_fisttp_1, fix_trunc_i387_fisttp, + fix_trunc_i387_fisttp_with_temp): New instruction patterns to + implement fisttp x87 insn. + (fix_trunc*_i387_fisttp splitters): New patterns. + (*fix_truncdi_i387, *fix_truncsi_i387, *fix_trunchi_i387): + Rename to *fix_trunc_i387_1. Implement using mode macros. + Disable patterns for TARGET_FISTTP. Add comment about FLAGS_REG + clobber. + (fix_truncdi_memory, fix_truncdi_nomemory, fix_trunchi_nomemory): + Rename to fix_trunc_i387 and fix_trunc_i387_with_temp. + Implement using mode macros. Disable patterns for TARGET_FISTTP. + (fix_truncsi_memory, fix_truncsi_nomemory, fix_trunchi_memory, + fix_trunchi_nomemory): Rename to fix_trunc_i387 and + fix_trunc_i387_with_temp. Implement using mode macros. + Disable patterns for TARGET_FISTTP. + (fix_trunc*_i387 splitters): Implement usign mode macros. + (fix_truncdfdi_sse, fix_truncsfdi_sse, fix_truncdfsi_sse, + fix_truncsfsi_sse): Disable for (TARGET_FISTTP && !TARGET_SSE_MATH). + (fix_trunx*_sse peephole2s): Implement using mode macros. + 2005-03-15 J"orn Rennecke PR rtl-optimization/20291 diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 53edfd16104..bf693860da7 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -117,7 +117,7 @@ extern void split_ti (rtx[], int, rtx[], rtx[]); extern const char *output_set_got (rtx); extern const char *output_387_binary_op (rtx, rtx*); extern const char *output_387_reg_move (rtx, rtx*); -extern const char *output_fix_trunc (rtx, rtx*); +extern const char *output_fix_trunc (rtx, rtx*, int); extern const char *output_fp_compare (rtx, rtx*, int, int); extern void i386_output_dwarf_dtprel (FILE*, int, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index bc311aaf23a..f76812f335f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -525,6 +525,7 @@ const int x86_double_with_add = ~m_386; const int x86_use_bit_test = m_386; const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6; const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA; +const int x86_fisttp = m_NOCONA; const int x86_3dnow_a = m_ATHLON_K8; const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA; /* Branch hints were put in P4 based on simulation result. But @@ -7282,7 +7283,7 @@ emit_i387_cw_initialization (rtx current_mode, rtx new_mode, int mode) operand may be [SDX]Fmode. */ const char * -output_fix_trunc (rtx insn, rtx *operands) +output_fix_trunc (rtx insn, rtx *operands, int fisttp) { int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; int dimode_p = GET_MODE (operands[0]) == DImode; @@ -7290,7 +7291,7 @@ output_fix_trunc (rtx insn, rtx *operands) /* Jump through a hoop or two for DImode, since the hardware has no non-popping instruction. We used to do this a different way, but that was somewhat fragile and broke with post-reload splitters. */ - if (dimode_p && !stack_top_dies) + if ((dimode_p || fisttp) && !stack_top_dies) output_asm_insn ("fld\t%y1", operands); if (!STACK_TOP_P (operands[1])) @@ -7299,12 +7300,17 @@ output_fix_trunc (rtx insn, rtx *operands) if (GET_CODE (operands[0]) != MEM) abort (); - output_asm_insn ("fldcw\t%3", operands); - if (stack_top_dies || dimode_p) - output_asm_insn ("fistp%z0\t%0", operands); + if (fisttp) + output_asm_insn ("fisttp%z0\t%0", operands); else - output_asm_insn ("fist%z0\t%0", operands); - output_asm_insn ("fldcw\t%2", operands); + { + output_asm_insn ("fldcw\t%3", operands); + if (stack_top_dies || dimode_p) + output_asm_insn ("fistp%z0\t%0", operands); + else + output_asm_insn ("fist%z0\t%0", operands); + output_asm_insn ("fldcw\t%2", operands); + } return ""; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d5b8c82b9c9..765eff8b9fd 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -229,7 +229,7 @@ extern int target_flags; #define TUNEMASK (1 << ix86_tune) extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and; -extern const int x86_use_bit_test, x86_cmove, x86_deep_branch; +extern const int x86_use_bit_test, x86_cmove, x86_fisttp, x86_deep_branch; extern const int x86_branch_hints, x86_unroll_strlen; extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx; extern const int x86_use_loop, x86_use_himode_fiop, x86_use_simode_fiop; @@ -258,6 +258,7 @@ extern int x86_prefetch_sse; /* For sane SSE instruction set generation we need fcomi instruction. It is safe to enable all CMOVE instructions. */ #define TARGET_CMOVE ((x86_cmove & (1 << ix86_arch)) || TARGET_SSE) +#define TARGET_FISTTP (x86_fisttp & (1 << ix86_arch)) #define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & TUNEMASK) #define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & TUNEMASK) #define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & TUNEMASK) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 61904db4b58..e168d112933 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -194,7 +194,7 @@ icmp,test,ibr,setcc,icmov, push,pop,call,callv,leave, str,cld, - fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, sselog,sselog1,sseiadd,sseishft,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" @@ -207,7 +207,7 @@ ;; The CPU unit operations uses. (define_attr "unit" "integer,i387,sse,mmx,unknown" - (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint") + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") (const_string "i387") (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul, sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv") @@ -442,8 +442,21 @@ [(set_attr "length" "128") (set_attr "type" "multi")]) +;; All x87 floating point modes +(define_mode_macro X87MODEF [SF DF XF]) + +;; All integer modes handled by x87 fisttp operator. +(define_mode_macro X87MODEI [HI SI DI]) + ;; All integer modes handled by integer x87 operators. (define_mode_macro X87MODEI12 [HI SI]) + +;; All SSE floating point modes +(define_mode_macro SSEMODEF [SF DF]) + +;; All integer modes handled by SSE cvtts?2si* operators. +(define_mode_macro SSEMODEI24 [SI DI]) + ;; Scheduling descriptions @@ -3998,8 +4011,6 @@ [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] "") -;; %%% Break up all these bad boys. - ;; Signed conversion to DImode. (define_expand "fix_truncxfdi2" @@ -4007,361 +4018,234 @@ (fix:DI (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387" - "") - -(define_expand "fix_truncdfdi2" - [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 || (TARGET_64BIT && TARGET_SSE2)" { - if (TARGET_64BIT && TARGET_SSE2) + if (TARGET_FISTTP) { - rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); - emit_insn (gen_fix_truncdfdi_sse (out, operands[1])); - if (out != operands[0]) - emit_move_insn (operands[0], out); + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); DONE; } }) -(define_expand "fix_truncsfdi2" +(define_expand "fix_truncdi2" [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 || (TARGET_64BIT && TARGET_SSE)" + (fix:DI (match_operand:SSEMODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (mode))" { - if (TARGET_64BIT && TARGET_SSE) + if (TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (TARGET_64BIT && SSE_FLOAT_MODE_P (mode)) { rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); - emit_insn (gen_fix_truncsfdi_sse (out, operands[1])); + emit_insn (gen_fix_truncdi_sse (out, operands[1])); if (out != operands[0]) emit_move_insn (operands[0], out); DONE; } }) -;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_truncdi_i387" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (fix:DI (match_operand 1 "register_operand" "f,f"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" - "#" - "&& 1" - [(const_int 0)] -{ - ix86_optimize_mode_switching = 1; - operands[2] = assign_386_stack_local (HImode, 1); - operands[3] = assign_386_stack_local (HImode, 2); - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_truncdi_memory (operands[0], operands[1], - operands[2], operands[3])); - else - { - operands[4] = assign_386_stack_local (DImode, 0); - emit_insn (gen_fix_truncdi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); - } - DONE; -} - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "DI")]) - -(define_insn "fix_truncdi_nomemory" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") - (fix:DI (match_operand 1 "register_operand" "f,f"))) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "=m,m")) - (clobber (match_scratch:DF 5 "=&1f,&1f"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" - "#" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "DI")]) - -(define_insn "fix_truncdi_memory" - [(set (match_operand:DI 0 "memory_operand" "=m") - (fix:DI (match_operand 1 "register_operand" "f"))) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m")) - (clobber (match_scratch:DF 4 "=&1f"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "DI")]) - -(define_split - [(set (match_operand:DI 0 "register_operand" "") - (fix:DI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:DI 4 "memory_operand" "")) - (clobber (match_scratch 5 ""))] - "reload_completed" - [(parallel [(set (match_dup 4) (fix:DI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 5))]) - (set (match_dup 0) (match_dup 4))] - "") - -(define_split - [(set (match_operand:DI 0 "memory_operand" "") - (fix:DI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:DI 4 "memory_operand" "")) - (clobber (match_scratch 5 ""))] - "reload_completed" - [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 5))])] - "") - -;; When SSE available, it is always faster to use it! -(define_insn "fix_truncsfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_64BIT && TARGET_SSE" - "cvttss2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector")]) - -;; Avoid vector decoded form of the instruction. -(define_peephole2 - [(match_scratch:SF 2 "x") - (set (match_operand:DI 0 "register_operand" "") - (fix:DI (match_operand:SF 1 "memory_operand" "")))] - "TARGET_K8 && !optimize_size" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:DI (match_dup 2)))] - "") - -(define_insn "fix_truncdfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] - "TARGET_64BIT && TARGET_SSE2" - "cvttsd2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt,sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector")]) - -;; Avoid vector decoded form of the instruction. -(define_peephole2 - [(match_scratch:DF 2 "Y") - (set (match_operand:DI 0 "register_operand" "") - (fix:DI (match_operand:DF 1 "memory_operand" "")))] - "TARGET_K8 && !optimize_size" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:DI (match_dup 2)))] - "") - ;; Signed conversion to SImode. (define_expand "fix_truncxfsi2" [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:XF 1 "register_operand" ""))) + (fix:SI (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387" - "") - -(define_expand "fix_truncdfsi2" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 || TARGET_SSE2" { - if (TARGET_SSE2) + if (TARGET_FISTTP) { - rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); - emit_insn (gen_fix_truncdfsi_sse (out, operands[1])); - if (out != operands[0]) - emit_move_insn (operands[0], out); + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); DONE; } }) -(define_expand "fix_truncsfsi2" +(define_expand "fix_truncsi2" [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 || TARGET_SSE" + (fix:SI (match_operand:SSEMODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode))" { - if (TARGET_SSE) + if (TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (SSE_FLOAT_MODE_P (mode)) { rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); - emit_insn (gen_fix_truncsfsi_sse (out, operands[1])); + emit_insn (gen_fix_truncsi_sse (out, operands[1])); if (out != operands[0]) emit_move_insn (operands[0], out); DONE; } }) -;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_truncsi_i387" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") - (fix:SI (match_operand 1 "register_operand" "f,f"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - "&& 1" - [(const_int 0)] +;; Signed conversion to HImode. + +(define_expand "fix_trunchi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:X87MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 + && !(SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))" { - ix86_optimize_mode_switching = 1; - operands[2] = assign_386_stack_local (HImode, 1); - operands[3] = assign_386_stack_local (HImode, 2); - if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_truncsi_memory (operands[0], operands[1], - operands[2], operands[3])); - else - { - operands[4] = assign_386_stack_local (SImode, 0); - emit_insn (gen_fix_truncsi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); - } - DONE; -} - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "SI")]) + if (TARGET_FISTTP) + { + emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) -(define_insn "fix_truncsi_nomemory" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") - (fix:SI (match_operand 1 "register_operand" "f,f"))) - (use (match_operand:HI 2 "memory_operand" "m,m")) - (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:SI 4 "memory_operand" "=m,m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "#" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "SI")]) +;; When SSE is available, it is always faster to use it! +(define_insn "fix_truncsfdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] + "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttss2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double,vector")]) -(define_insn "fix_truncsi_memory" - [(set (match_operand:SI 0 "memory_operand" "=m") - (fix:SI (match_operand 1 "register_operand" "f"))) - (use (match_operand:HI 2 "memory_operand" "m")) - (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp") - (set_attr "i387_cw" "trunc") - (set_attr "mode" "SI")]) +(define_insn "fix_truncdfdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] + "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvttsd2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) -;; When SSE available, it is always faster to use it! (define_insn "fix_truncsfsi_sse" [(set (match_operand:SI 0 "register_operand" "=r,r") (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_SSE" + "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" "cvttss2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "DF") (set_attr "athlon_decode" "double,vector")]) -;; Avoid vector decoded form of the instruction. -(define_peephole2 - [(match_scratch:SF 2 "x") - (set (match_operand:SI 0 "register_operand" "") - (fix:SI (match_operand:SF 1 "memory_operand" "")))] - "TARGET_K8 && !optimize_size" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SI (match_dup 2)))] - "") - (define_insn "fix_truncdfsi_sse" [(set (match_operand:SI 0 "register_operand" "=r,r") (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] - "TARGET_SSE2" + "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" "cvttsd2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "DF") (set_attr "athlon_decode" "double,vector")]) -;; Avoid vector decoded form of the instruction. +;; Avoid vector decoded forms of the instruction. (define_peephole2 [(match_scratch:DF 2 "Y") - (set (match_operand:SI 0 "register_operand" "") - (fix:SI (match_operand:DF 1 "memory_operand" "")))] + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] "TARGET_K8 && !optimize_size" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SI (match_dup 2)))] + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] "") -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (fix:SI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:SI 4 "memory_operand" ""))] - "reload_completed" - [(parallel [(set (match_dup 4) (fix:SI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3))]) - (set (match_dup 0) (match_dup 4))] +(define_peephole2 + [(match_scratch:SF 2 "x") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] + "TARGET_K8 && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] "") -(define_split - [(set (match_operand:SI 0 "memory_operand" "") - (fix:SI (match_operand 1 "register_operand" ""))) - (use (match_operand:HI 2 "memory_operand" "")) - (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:SI 4 "memory_operand" ""))] - "reload_completed" - [(parallel [(set (match_dup 0) (fix:SI (match_dup 1))) - (use (match_dup 2)) - (use (match_dup 3))])] - "") +(define_insn_and_split "fix_trunc_fisttp_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))] + "TARGET_80387 && TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc_i387_fisttp (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (mode, 0); + emit_insn (gen_fix_trunc_i387_fisttp_with_temp (operands[0], + operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) -;; Signed conversion to HImode. +(define_insn "fix_trunc_i387_fisttp" + [(set (match_operand:X87MODEI 0 "memory_operand" "=m") + (fix:X87MODEI (match_operand 1 "register_operand" "f"))) + (clobber (match_scratch:XF 2 "=&1f"))] + "TARGET_80387 && TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "* return output_fix_trunc (insn, operands, 1);" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) -(define_expand "fix_truncxfhi2" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:XF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387" - "") +(define_insn "fix_trunc_i387_fisttp_with_temp" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "TARGET_80387 && TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "#" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) -(define_expand "fix_truncdfhi2" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:DF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 && !TARGET_SSE2" +(define_split + [(set (match_operand:X87MODEI 0 "register_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] "") -(define_expand "fix_truncsfhi2" - [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:SF 1 "register_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_80387 && !TARGET_SSE" +(define_split + [(set (match_operand:X87MODEI 0 "memory_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))])] "") ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description -;; of the machinery. -(define_insn_and_split "*fix_trunchi_i387" - [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") - (fix:HI (match_operand 1 "register_operand" "f,f"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !reload_completed && !reload_in_progress - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" +;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control +;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG +;; clobbering insns can be used. Look at emit_i387_cw_initialization () +;; function in i386.c. +(define_insn_and_split "*fix_trunc_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && !(reload_completed || reload_in_progress)" "#" "&& 1" [(const_int 0)] @@ -4370,70 +4254,129 @@ operands[2] = assign_386_stack_local (HImode, 1); operands[3] = assign_386_stack_local (HImode, 2); if (memory_operand (operands[0], VOIDmode)) - emit_insn (gen_fix_trunchi_memory (operands[0], operands[1], - operands[2], operands[3])); + emit_insn (gen_fix_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); else { - operands[4] = assign_386_stack_local (HImode, 0); - emit_insn (gen_fix_trunchi_nomemory (operands[0], operands[1], - operands[2], operands[3], - operands[4])); + operands[4] = assign_386_stack_local (mode, 0); + emit_insn (gen_fix_trunc_i387_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); } DONE; } [(set_attr "type" "fistp") (set_attr "i387_cw" "trunc") - (set_attr "mode" "HI")]) + (set_attr "mode" "")]) + +(define_insn "fix_truncdi_i387" + [(set (match_operand:DI 0 "memory_operand" "=m") + (fix:DI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) -(define_insn "fix_trunchi_nomemory" - [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") - (fix:HI (match_operand 1 "register_operand" "f,f"))) +(define_insn "fix_truncdi_i387_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (fix:DI (match_operand 1 "register_operand" "f,f"))) (use (match_operand:HI 2 "memory_operand" "m,m")) (use (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:HI 4 "memory_operand" "=m,m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) - && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" "#" [(set_attr "type" "fistp") (set_attr "i387_cw" "trunc") - (set_attr "mode" "HI")]) + (set_attr "mode" "DI")]) -(define_insn "fix_trunchi_memory" - [(set (match_operand:HI 0 "memory_operand" "=m") - (fix:HI (match_operand 1 "register_operand" "f"))) +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fix_trunc_i387" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f"))) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] - "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" - "* return output_fix_trunc (insn, operands);" + "* return output_fix_trunc (insn, operands, 0);" [(set_attr "type" "fistp") (set_attr "i387_cw" "trunc") - (set_attr "mode" "HI")]) + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))] + "TARGET_80387 && !TARGET_FISTTP + && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) (define_split - [(set (match_operand:HI 0 "memory_operand" "") - (fix:HI (match_operand 1 "register_operand" ""))) + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) (use (match_operand:HI 2 "memory_operand" "")) (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:HI 4 "memory_operand" ""))] + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] "reload_completed" - [(parallel [(set (match_dup 0) (fix:HI (match_dup 1))) + [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1))) (use (match_dup 2)) - (use (match_dup 3))])] + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] "") (define_split - [(set (match_operand:HI 0 "register_operand" "") - (fix:HI (match_operand 1 "register_operand" ""))) + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) (use (match_operand:HI 2 "memory_operand" "")) (use (match_operand:HI 3 "memory_operand" "")) - (clobber (match_operand:HI 4 "memory_operand" ""))] + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] "reload_completed" - [(parallel [(set (match_dup 4) (fix:HI (match_dup 1))) + [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1))) (use (match_dup 2)) - (use (match_dup 3)) - (clobber (match_dup 4))]) - (set (match_dup 0) (match_dup 4))] + (use (match_dup 3))])] "") (define_insn "x86_fnstcw_1" -- 2.43.5