This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
ia64 inline integer division
- To: gcc-patches at gcc dot gnu dot org
- Subject: ia64 inline integer division
- From: Richard Henderson <rth at cygnus dot com>
- Date: Mon, 11 Sep 2000 14:18:55 -0700
This is an experiment to see how much difference can be had
performance-wise if we inline division instead of deferring
to library routines.
There are two switches,
-minline-divide-min-latency
-minline-divide-max-throughput
which choose different implementations. I have a feeling
that for most code this will just bloat things, so the
default is still library routines.
r~
* config/ia64/ia64-protos.h (fr_nonimmediate_operand): Declare.
* config/ia64/ia64.c (fr_nonimmediate_operand): New.
(ia64_override_options): Prevent optimizing division for both
latency and throughput.
(rtx_needs_barrier): Handle frcpa.
* config/ia64/ia64.h (MASK_INLINE_DIV_LAT): New.
(MASK_INLINE_DIV_THR, TARGET_INLINE_DIV_LAT): New.
(TARGET_INLINE_DIV_THR, TARGET_INLINE_DIV): New.
(TARGET_SWITCHES): Add -minline-divide-min-latency and
-minline-divide-max-throughput.
(PREDICATE_CODES): Update.
* config/ia64/ia64.md (extendsidi2): Remove * from f case.
(zero_extendsidi2): Likewise. Fix typo in f case insn.
(extendsfdf2): Add cases for gr<->fr and fr<->mem.
(extendsftf2): Likewise.
(extenddftf2): Likewise.
(fix_trunctfdi2_alts): New.
(fixuns_trunctfdi2_alts): New.
(madd*4): Rename from madd*3.
(divsi3, modsi3, udivsi3, umodsi3): New.
(divsi3_internal): New.
(divdi3, moddi3, udivdi3, umoddi3): New.
(divdi3_internal_lat, divdi3_internal_thr): New.
(multf3_alts, maddtf4_alts, nmaddtf4_alts): New.
(recip_approx): New.
Index: ia64-protos.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64-protos.h,v
retrieving revision 1.20
diff -c -p -d -r1.20 ia64-protos.h
*** ia64-protos.h 2000/09/01 22:22:54 1.20
--- ia64-protos.h 2000/09/11 21:07:44
*************** extern int gr_register_operand PARAMS((r
*** 38,43 ****
--- 38,44 ----
extern int fr_register_operand PARAMS((rtx, enum machine_mode));
extern int grfr_register_operand PARAMS((rtx, enum machine_mode));
extern int gr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
+ extern int fr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
extern int grfr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
extern int gr_reg_or_0_operand PARAMS((rtx, enum machine_mode));
extern int gr_reg_or_5bit_operand PARAMS((rtx, enum machine_mode));
Index: ia64.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.c,v
retrieving revision 1.48
diff -c -p -d -r1.48 ia64.c
*** ia64.c 2000/09/01 22:22:54 1.48
--- ia64.c 2000/09/11 21:07:44
*************** gr_nonimmediate_operand (op, mode)
*** 397,402 ****
--- 397,422 ----
return 1;
}
+ /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */
+
+ int
+ fr_nonimmediate_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+ {
+ if (! nonimmediate_operand (op, mode))
+ return 0;
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ if (GET_CODE (op) == REG)
+ {
+ unsigned int regno = REGNO (op);
+ if (regno < FIRST_PSEUDO_REGISTER)
+ return FR_REGNO_P (regno);
+ }
+ return 1;
+ }
+
/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */
int
*************** ia64_override_options ()
*** 3484,3489 ****
--- 3504,3515 ----
if (TARGET_AUTO_PIC)
target_flags |= MASK_CONST_GP;
+ if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
+ {
+ warning ("cannot optimize division for both latency and throughput");
+ target_flags &= ~MASK_INLINE_DIV_THR;
+ }
+
if (ia64_fixed_range_string)
fix_range (ia64_fixed_range_string);
*************** rtx_needs_barrier (x, flags, pred)
*** 3970,3975 ****
--- 3996,4006 ----
case 20: /* mov = ar.bsp */
case 21: /* flushrs */
break;
+
+ case 5: /* recip_approx */
+ need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+ need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+ break;
case 13: /* cmpxchg_acq */
need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
Index: ia64.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.h,v
retrieving revision 1.43
diff -c -p -d -r1.43 ia64.h
*** ia64.h 2000/09/08 01:35:53 1.43
--- ia64.h 2000/09/11 21:07:44
*************** extern int target_flags;
*** 63,68 ****
--- 63,72 ----
#define MASK_AUTO_PIC 0x00000200 /* generate automatically PIC */
+ #define MASK_INLINE_DIV_LAT 0x00000400 /* inline div, min latency. */
+
+ #define MASK_INLINE_DIV_THR 0x00000800 /* inline div, max throughput. */
+
#define MASK_DWARF2_ASM 0x40000000 /* test dwarf2 line info via gas. */
#define TARGET_BIG_ENDIAN (target_flags & MASK_BIG_ENDIAN)
*************** extern int target_flags;
*** 85,90 ****
--- 89,101 ----
#define TARGET_AUTO_PIC (target_flags & MASK_AUTO_PIC)
+ #define TARGET_INLINE_DIV_LAT (target_flags & MASK_INLINE_DIV_LAT)
+
+ #define TARGET_INLINE_DIV_THR (target_flags & MASK_INLINE_DIV_THR)
+
+ #define TARGET_INLINE_DIV \
+ (target_flags & (MASK_INLINE_DIV_LAT | MASK_INLINE_DIV_THR))
+
#define TARGET_DWARF2_ASM (target_flags & MASK_DWARF2_ASM)
/* This macro defines names of command options to set and clear bits in
*************** extern int target_flags;
*** 123,128 ****
--- 134,143 ----
N_("gp is constant (but save/restore gp on indirect calls)") }, \
{ "auto-pic", MASK_AUTO_PIC, \
N_("Generate self-relocatable code") }, \
+ { "inline-divide-min-latency", MASK_INLINE_DIV_LAT, \
+ N_("Generate inline division, optimize for latency") }, \
+ { "inline-divide-max-throughput", MASK_INLINE_DIV_THR, \
+ N_("Generate inline division, optimize for throughput") }, \
{ "dwarf2-asm", MASK_DWARF2_ASM, \
N_("Enable Dwarf 2 line debug info via GNU as")}, \
{ "no-dwarf2-asm", -MASK_DWARF2_ASM, \
*************** do { \
*** 2646,2651 ****
--- 2661,2667 ----
{ "fr_register_operand", {SUBREG, REG}}, \
{ "grfr_register_operand", {SUBREG, REG}}, \
{ "gr_nonimmediate_operand", {SUBREG, REG, MEM}}, \
+ { "fr_nonimmediate_operand", {SUBREG, REG, MEM}}, \
{ "grfr_nonimmediate_operand", {SUBREG, REG, MEM}}, \
{ "gr_reg_or_0_operand", {SUBREG, REG, CONST_INT}}, \
{ "gr_reg_or_5bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \
Index: ia64.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.md,v
retrieving revision 1.45
diff -c -p -d -r1.45 ia64.md
*** ia64.md 2000/09/06 01:46:04 1.45
--- ia64.md 2000/09/11 21:07:44
***************
*** 57,62 ****
--- 57,63 ----
;; 2 gr_restore
;; 3 fr_spill
;; 4 fr_restore
+ ;; 5 recip_approx
;; 8 popcnt
;; 12 mf
;; 13 cmpxchg_acq
***************
*** 950,957 ****
[(set_attr "type" "I")])
(define_insn "extendsidi2"
! [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
! (sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,*f")))]
""
"@
sxt4 %0 = %1
--- 951,958 ----
[(set_attr "type" "I")])
(define_insn "extendsidi2"
! [(set (match_operand:DI 0 "grfr_register_operand" "=r,?f")
! (sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,f")))]
""
"@
sxt4 %0 = %1
***************
*** 979,992 ****
[(set_attr "type" "I,M")])
(define_insn "zero_extendsidi2"
! [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,*f")
(zero_extend:DI
! (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,*f")))]
""
"@
zxt4 %0 = %1
ld4%O1 %0 = %1%P1
! fsxt.r %0 = f1, %1%B0"
[(set_attr "type" "I,M,F")])
;; Convert between floating point types of different sizes.
--- 980,993 ----
[(set_attr "type" "I,M")])
(define_insn "zero_extendsidi2"
! [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,?f")
(zero_extend:DI
! (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,f")))]
""
"@
zxt4 %0 = %1
ld4%O1 %0 = %1%P1
! fmix.r %0 = f0, %1%B0"
[(set_attr "type" "I,M,F")])
;; Convert between floating point types of different sizes.
***************
*** 997,1030 ****
;; would let combine merge the thing into adjacent insns.
(define_insn_and_split "extendsfdf2"
! [(set (match_operand:DF 0 "fr_register_operand" "=f,f")
! (float_extend:DF (match_operand:SF 1 "fr_register_operand" "0,f")))]
""
! "mov %0 = %1"
"reload_completed"
[(set (match_dup 0) (float_extend:DF (match_dup 1)))]
"if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
! [(set_attr "type" "F")])
(define_insn_and_split "extendsftf2"
! [(set (match_operand:TF 0 "fr_register_operand" "=f,f")
! (float_extend:TF (match_operand:SF 1 "fr_register_operand" "0,f")))]
""
! "mov %0 = %1"
"reload_completed"
[(set (match_dup 0) (float_extend:TF (match_dup 1)))]
"if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
! [(set_attr "type" "F")])
(define_insn_and_split "extenddftf2"
! [(set (match_operand:TF 0 "fr_register_operand" "=f,f")
! (float_extend:TF (match_operand:DF 1 "fr_register_operand" "0,f")))]
""
! "mov %0 = %1"
"reload_completed"
[(set (match_dup 0) (float_extend:TF (match_dup 1)))]
"if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
! [(set_attr "type" "F")])
(define_insn "truncdfsf2"
[(set (match_operand:SF 0 "fr_register_operand" "=f")
--- 998,1050 ----
;; would let combine merge the thing into adjacent insns.
(define_insn_and_split "extendsfdf2"
! [(set (match_operand:DF 0 "grfr_nonimmediate_operand" "=f,f,f,f,m,*r")
! (float_extend:DF
! (match_operand:SF 1 "grfr_nonimmediate_operand" "0,f,m,*r,f,f")))]
""
! "@
! mov %0 = %1
! mov %0 = %1
! ldfs %0 = %1%P1
! setf.s %0 = %1
! stfd %0 = %1%P0
! getf.d %0 = %1"
"reload_completed"
[(set (match_dup 0) (float_extend:DF (match_dup 1)))]
"if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
! [(set_attr "type" "F,F,M,M,M,M")])
(define_insn_and_split "extendsftf2"
! [(set (match_operand:TF 0 "fr_nonimmediate_operand" "=f,f,f,f,Q")
! (float_extend:TF
! (match_operand:SF 1 "grfr_nonimmediate_operand" "0,f,Q,*r,f")))]
""
! "@
! mov %0 = %1
! mov %0 = %1
! ldfs %0 = %1%P1
! setf.s %0 = %1
! stfe %0 = %1%P0"
"reload_completed"
[(set (match_dup 0) (float_extend:TF (match_dup 1)))]
"if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
! [(set_attr "type" "F,F,M,M,M")])
(define_insn_and_split "extenddftf2"
! [(set (match_operand:TF 0 "fr_nonimmediate_operand" "=f,f,f,f,Q")
! (float_extend:TF
! (match_operand:DF 1 "grfr_nonimmediate_operand" "0,f,Q,*r,f")))]
""
! "@
! mov %0 = %1
! mov %0 = %1
! ldfd %0 = %1%P1
! setf.d %0 = %1
! stfe %0 = %1%P0"
"reload_completed"
[(set (match_dup 0) (float_extend:TF (match_dup 1)))]
"if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
! [(set_attr "type" "F,F,M,M,M")])
(define_insn "truncdfsf2"
[(set (match_operand:SF 0 "fr_register_operand" "=f")
***************
*** 1077,1082 ****
--- 1097,1110 ----
"fcvt.fx.trunc %0 = %1%B0"
[(set_attr "type" "F")])
+ (define_insn "fix_trunctfdi2_alts"
+ [(set (match_operand:DI 0 "fr_register_operand" "=f")
+ (fix:DI (match_operand:TF 1 "fr_register_operand" "f")))
+ (use (match_operand:SI 2 "const_int_operand" ""))]
+ ""
+ "fcvt.fx.trunc.s%2 %0 = %1%B0"
+ [(set_attr "type" "F")])
+
;; Convert between unsigned integer types and floating point.
(define_insn "floatunsdisf2"
***************
*** 1120,1125 ****
--- 1148,1161 ----
""
"fcvt.fxu.trunc %0 = %1%B0"
[(set_attr "type" "F")])
+
+ (define_insn "fixuns_trunctfdi2_alts"
+ [(set (match_operand:DI 0 "fr_register_operand" "=f")
+ (unsigned_fix:DI (match_operand:TF 1 "fr_register_operand" "f")))
+ (use (match_operand:SI 2 "const_int_operand" ""))]
+ ""
+ "fcvt.fxu.trunc.s%2 %0 = %1%B0"
+ [(set_attr "type" "F")])
;; ::::::::::::::::::::
;; ::
***************
*** 1400,1406 ****
"xma.l %0 = %1, %2, f0%B0"
[(set_attr "type" "F")])
! (define_insn "*maddsi3"
[(set (match_operand:SI 0 "fr_register_operand" "=f")
(plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
(match_operand:SI 2 "grfr_register_operand" "f"))
--- 1436,1442 ----
"xma.l %0 = %1, %2, f0%B0"
[(set_attr "type" "F")])
! (define_insn "maddsi4"
[(set (match_operand:SI 0 "fr_register_operand" "=f")
(plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
(match_operand:SI 2 "grfr_register_operand" "f"))
***************
*** 1481,1486 ****
--- 1517,1688 ----
operands[3] = gen_reg_rtx (CCmode);
}")
+ (define_expand "divsi3"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (div:SI (match_operand:SI 1 "general_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ "TARGET_INLINE_DIV"
+ "
+ {
+ rtx op1_tf, op2_tf, op0_tf, op0_di, twon34;
+
+ op0_tf = gen_reg_rtx (TFmode);
+ op0_di = gen_reg_rtx (DImode);
+
+ if (CONSTANT_P (operands[1]))
+ operands[1] = force_reg (SImode, operands[1]);
+ op1_tf = gen_reg_rtx (TFmode);
+ expand_float (op1_tf, operands[1], 0);
+
+ if (CONSTANT_P (operands[2]))
+ operands[2] = force_reg (SImode, operands[2]);
+ op2_tf = gen_reg_rtx (TFmode);
+ expand_float (op2_tf, operands[2], 0);
+
+ /* 2^-34 */
+ #if 0
+ twon34 = (CONST_DOUBLE_FROM_REAL_VALUE
+ (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode));
+ twon34 = force_reg (TFmode, twon34);
+ #else
+ twon34 = gen_reg_rtx (TFmode);
+ convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0);
+ #endif
+
+ emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34));
+
+ emit_insn (gen_fix_trunctfdi2_alts (op0_di, op0_tf, const1_rtx));
+ emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+ DONE;
+ }")
+
+ (define_expand "modsi3"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (mod:SI (match_operand:SI 1 "general_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ "TARGET_INLINE_DIV"
+ "
+ {
+ rtx op2_neg, op1_di, div;
+
+ div = gen_reg_rtx (SImode);
+ emit_insn (gen_divsi3 (div, operands[1], operands[2]));
+
+ op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+
+ /* This is a trick to get us to reuse the value that we're sure to
+ have already copied to the FP regs. */
+ op1_di = gen_reg_rtx (DImode);
+ convert_move (op1_di, operands[1], 0);
+
+ emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+ gen_lowpart (SImode, op1_di)));
+ DONE;
+ }")
+
+ (define_expand "udivsi3"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (udiv:SI (match_operand:SI 1 "general_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ "TARGET_INLINE_DIV"
+ "
+ {
+ rtx op1_tf, op2_tf, op0_tf, op0_di, twon34;
+
+ op0_tf = gen_reg_rtx (TFmode);
+ op0_di = gen_reg_rtx (DImode);
+
+ if (CONSTANT_P (operands[1]))
+ operands[1] = force_reg (SImode, operands[1]);
+ op1_tf = gen_reg_rtx (TFmode);
+ expand_float (op1_tf, operands[1], 1);
+
+ if (CONSTANT_P (operands[2]))
+ operands[2] = force_reg (SImode, operands[2]);
+ op2_tf = gen_reg_rtx (TFmode);
+ expand_float (op2_tf, operands[2], 1);
+
+ /* 2^-34 */
+ #if 0
+ twon34 = (CONST_DOUBLE_FROM_REAL_VALUE
+ (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode));
+ twon34 = force_reg (TFmode, twon34);
+ #else
+ twon34 = gen_reg_rtx (TFmode);
+ convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0);
+ #endif
+
+ emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34));
+
+ emit_insn (gen_fixuns_trunctfdi2_alts (op0_di, op0_tf, const1_rtx));
+ emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+ DONE;
+ }")
+
+ (define_expand "umodsi3"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (umod:SI (match_operand:SI 1 "general_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ "TARGET_INLINE_DIV"
+ "
+ {
+ rtx op2_neg, op1_di, div;
+
+ div = gen_reg_rtx (SImode);
+ emit_insn (gen_udivsi3 (div, operands[1], operands[2]));
+
+ op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+
+ /* This is a trick to get us to reuse the value that we're sure to
+ have already copied to the FP regs. */
+ op1_di = gen_reg_rtx (DImode);
+ convert_move (op1_di, operands[1], 1);
+
+ emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+ gen_lowpart (SImode, op1_di)));
+ DONE;
+ }")
+
+ (define_insn_and_split "divsi3_internal"
+ [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+ (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+ (match_operand:TF 2 "fr_register_operand" "f"))))
+ (clobber (match_scratch:TF 4 "=&f"))
+ (clobber (match_scratch:TF 5 "=&f"))
+ (clobber (match_scratch:CC 6 "=c"))
+ (use (match_operand:TF 3 "fr_register_operand" "f"))]
+ "TARGET_INLINE_DIV"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+ (set (match_dup 6) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+ (use (const_int 1))])
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 5)
+ (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+ (match_dup 7)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 4)
+ (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+ (match_dup 4)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 5)
+ (plus:TF (mult:TF (match_dup 5) (match_dup 5))
+ (match_dup 3)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+ (match_dup 4)))
+ (use (const_int 1))]))
+ ]
+ "operands[7] = CONST1_RTX (TFmode);"
+ [(set_attr "predicable" "no")])
;; ::::::::::::::::::::
;; ::
***************
*** 1557,1563 ****
;; ??? Maybe we should change how adds are canonicalized.
! (define_insn "*madddi3"
[(set (match_operand:DI 0 "fr_register_operand" "=f")
(plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
(match_operand:DI 2 "grfr_register_operand" "f"))
--- 1759,1765 ----
;; ??? Maybe we should change how adds are canonicalized.
! (define_insn "madddi4"
[(set (match_operand:DI 0 "fr_register_operand" "=f")
(plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
(match_operand:DI 2 "grfr_register_operand" "f"))
***************
*** 1572,1581 ****
;; We have to use nonmemory_operand for operand 4, to ensure that the
;; validate_changes call inside eliminate_regs will always succeed. If it
! ;; doesn't succeed, then this remain a madddi3 pattern, and will be reloaded
;; incorrectly.
! (define_insn "*madddi3_elim"
[(set (match_operand:DI 0 "register_operand" "=&r")
(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f")
(match_operand:DI 2 "register_operand" "f"))
--- 1774,1783 ----
;; We have to use nonmemory_operand for operand 4, to ensure that the
;; validate_changes call inside eliminate_regs will always succeed. If it
! ;; doesn't succeed, then this remain a madddi4 pattern, and will be reloaded
;; incorrectly.
! (define_insn "*madddi4_elim"
[(set (match_operand:DI 0 "register_operand" "=&r")
(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f")
(match_operand:DI 2 "register_operand" "f"))
***************
*** 1734,1739 ****
--- 1936,2143 ----
"popcnt %0 = %1"
[(set_attr "type" "I")])
+ (define_expand "divdi3"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (div:DI (match_operand:DI 1 "general_operand" "")
+ (match_operand:DI 2 "general_operand" "")))]
+ "TARGET_INLINE_DIV"
+ "
+ {
+ rtx op1_tf, op2_tf, op0_tf;
+
+ op0_tf = gen_reg_rtx (TFmode);
+
+ if (CONSTANT_P (operands[1]))
+ operands[1] = force_reg (DImode, operands[1]);
+ op1_tf = gen_reg_rtx (TFmode);
+ expand_float (op1_tf, operands[1], 0);
+
+ if (CONSTANT_P (operands[2]))
+ operands[2] = force_reg (DImode, operands[2]);
+ op2_tf = gen_reg_rtx (TFmode);
+ expand_float (op2_tf, operands[2], 0);
+
+ if (TARGET_INLINE_DIV_LAT)
+ emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf));
+ else
+ emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf));
+
+ emit_insn (gen_fix_trunctfdi2_alts (operands[0], op0_tf, const1_rtx));
+ DONE;
+ }")
+
+ (define_expand "moddi3"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mod:SI (match_operand:DI 1 "general_operand" "")
+ (match_operand:DI 2 "general_operand" "")))]
+ "TARGET_INLINE_DIV"
+ "
+ {
+ rtx op2_neg, div;
+
+ div = gen_reg_rtx (DImode);
+ emit_insn (gen_divdi3 (div, operands[1], operands[2]));
+
+ op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+
+ emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+ DONE;
+ }")
+
+ (define_expand "udivdi3"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (udiv:DI (match_operand:DI 1 "general_operand" "")
+ (match_operand:DI 2 "general_operand" "")))]
+ "TARGET_INLINE_DIV"
+ "
+ {
+ rtx op1_tf, op2_tf, op0_tf;
+
+ op0_tf = gen_reg_rtx (TFmode);
+
+ if (CONSTANT_P (operands[1]))
+ operands[1] = force_reg (DImode, operands[1]);
+ op1_tf = gen_reg_rtx (TFmode);
+ expand_float (op1_tf, operands[1], 1);
+
+ if (CONSTANT_P (operands[2]))
+ operands[2] = force_reg (DImode, operands[2]);
+ op2_tf = gen_reg_rtx (TFmode);
+ expand_float (op2_tf, operands[2], 1);
+
+ if (TARGET_INLINE_DIV_LAT)
+ emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf));
+ else
+ emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf));
+
+ emit_insn (gen_fixuns_trunctfdi2_alts (operands[0], op0_tf, const1_rtx));
+ DONE;
+ }")
+
+ (define_expand "umoddi3"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (umod:DI (match_operand:DI 1 "general_operand" "")
+ (match_operand:DI 2 "general_operand" "")))]
+ "TARGET_INLINE_DIV"
+ "
+ {
+ rtx op2_neg, div;
+
+ div = gen_reg_rtx (DImode);
+ emit_insn (gen_udivdi3 (div, operands[1], operands[2]));
+
+ op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+
+ emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+ DONE;
+ }")
+
+ (define_insn_and_split "divdi3_internal_lat"
+ [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+ (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+ (match_operand:TF 2 "fr_register_operand" "f"))))
+ (clobber (match_scratch:TF 3 "=&f"))
+ (clobber (match_scratch:TF 4 "=&f"))
+ (clobber (match_scratch:TF 5 "=&f"))
+ (clobber (match_scratch:CC 6 "=c"))]
+ "TARGET_INLINE_DIV_LAT"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+ (set (match_dup 6) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+ (use (const_int 1))])
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 3)
+ (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+ (match_dup 7)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 5) (mult:TF (match_dup 3) (match_dup 3)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 4)
+ (plus:TF (mult:TF (match_dup 3) (match_dup 4))
+ (match_dup 4)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+ (match_dup 0)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 3)
+ (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+ (match_dup 4)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (plus:TF (mult:TF (match_dup 5) (match_dup 0))
+ (match_dup 0)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 4)
+ (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3)))
+ (match_dup 1)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 6) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (plus:TF (mult:TF (match_dup 4) (match_dup 0))
+ (match_dup 3)))
+ (use (const_int 1))]))
+ ]
+ "operands[7] = CONST1_RTX (TFmode);"
+ [(set_attr "predicable" "no")])
+
+ (define_insn_and_split "divdi3_internal_thr"
+ [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+ (float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+ (match_operand:TF 2 "fr_register_operand" "f"))))
+ (clobber (match_scratch:TF 3 "=&f"))
+ (clobber (match_scratch:TF 4 "=f"))
+ (clobber (match_scratch:CC 5 "=c"))]
+ "TARGET_INLINE_DIV_THR"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+ (set (match_dup 5) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+ (use (const_int 1))])
+ (cond_exec (ne (match_dup 5) (const_int 0))
+ (parallel [(set (match_dup 3)
+ (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+ (match_dup 6)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 5) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+ (match_dup 0)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 5) (const_int 0))
+ (parallel [(set (match_dup 3) (mult:TF (match_dup 3) (match_dup 3)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 5) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+ (match_dup 0)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 5) (const_int 0))
+ (parallel [(set (match_dup 3) (mult:TF (match_dup 0) (match_dup 1)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 5) (const_int 0))
+ (parallel [(set (match_dup 4)
+ (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3)))
+ (match_dup 1)))
+ (use (const_int 1))]))
+ (cond_exec (ne (match_dup 5) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (plus:TF (mult:TF (match_dup 4) (match_dup 0))
+ (match_dup 3)))
+ (use (const_int 1))]))
+ ]
+ "operands[6] = CONST1_RTX (TFmode);"
+ [(set_attr "predicable" "no")])
;; ::::::::::::::::::::
;; ::
***************
*** 1802,1808 ****
"fmax %0 = %1, %F2%B0"
[(set_attr "type" "F")])
! (define_insn "*maddsf3"
[(set (match_operand:SF 0 "fr_register_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
(match_operand:SF 2 "fr_register_operand" "f"))
--- 2206,2212 ----
"fmax %0 = %1, %F2%B0"
[(set_attr "type" "F")])
! (define_insn "*maddsf4"
[(set (match_operand:SF 0 "fr_register_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
(match_operand:SF 2 "fr_register_operand" "f"))
***************
*** 1811,1817 ****
"fma.s %0 = %1, %2, %F3%B0"
[(set_attr "type" "F")])
! (define_insn "*msubsf3"
[(set (match_operand:SF 0 "fr_register_operand" "=f")
(minus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
(match_operand:SF 2 "fr_register_operand" "f"))
--- 2215,2221 ----
"fma.s %0 = %1, %2, %F3%B0"
[(set_attr "type" "F")])
! (define_insn "*msubsf4"
[(set (match_operand:SF 0 "fr_register_operand" "=f")
(minus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
(match_operand:SF 2 "fr_register_operand" "f"))
***************
*** 1830,1836 ****
;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
! (define_insn "*nmaddsf3"
[(set (match_operand:SF 0 "fr_register_operand" "=f")
(plus:SF (neg:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
(match_operand:SF 2 "fr_register_operand" "f")))
--- 2234,2240 ----
;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
! (define_insn "*nmaddsf4"
[(set (match_operand:SF 0 "fr_register_operand" "=f")
(plus:SF (neg:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
(match_operand:SF 2 "fr_register_operand" "f")))
***************
*** 1907,1913 ****
"fmax %0 = %1, %F2%B0"
[(set_attr "type" "F")])
! (define_insn "*madddf3"
[(set (match_operand:DF 0 "fr_register_operand" "=f")
(plus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
(match_operand:DF 2 "fr_register_operand" "f"))
--- 2311,2317 ----
"fmax %0 = %1, %F2%B0"
[(set_attr "type" "F")])
! (define_insn "*madddf4"
[(set (match_operand:DF 0 "fr_register_operand" "=f")
(plus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
(match_operand:DF 2 "fr_register_operand" "f"))
***************
*** 1916,1922 ****
"fma.d %0 = %1, %2, %F3%B0"
[(set_attr "type" "F")])
! (define_insn "*msubdf3"
[(set (match_operand:DF 0 "fr_register_operand" "=f")
(minus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
(match_operand:DF 2 "fr_register_operand" "f"))
--- 2320,2326 ----
"fma.d %0 = %1, %2, %F3%B0"
[(set_attr "type" "F")])
! (define_insn "*msubdf4"
[(set (match_operand:DF 0 "fr_register_operand" "=f")
(minus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
(match_operand:DF 2 "fr_register_operand" "f"))
***************
*** 1935,1941 ****
;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
! (define_insn "*nmadddf3"
[(set (match_operand:DF 0 "fr_register_operand" "=f")
(plus:DF (neg:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
(match_operand:DF 2 "fr_register_operand" "f")))
--- 2339,2345 ----
;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
! (define_insn "*nmadddf4"
[(set (match_operand:DF 0 "fr_register_operand" "=f")
(plus:DF (neg:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
(match_operand:DF 2 "fr_register_operand" "f")))
***************
*** 1974,1979 ****
--- 2378,2392 ----
"fmpy %0 = %F1, %F2%B0"
[(set_attr "type" "F")])
+ (define_insn "*multf3_alts"
+ [(set (match_operand:TF 0 "fr_register_operand" "=f")
+ (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+ (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))
+ (use (match_operand:SI 3 "const_int_operand" ""))]
+ ""
+ "fmpy.s%3 %0 = %F1, %F2%B0"
+ [(set_attr "type" "F")])
+
(define_insn "abstf2"
[(set (match_operand:TF 0 "fr_register_operand" "=f")
(abs:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")))]
***************
*** 2011,2017 ****
"fmax %0 = %F1, %F2%B0"
[(set_attr "type" "F")])
! (define_insn "*maddtf3"
[(set (match_operand:TF 0 "fr_register_operand" "=f")
(plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
(match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
--- 2424,2430 ----
"fmax %0 = %F1, %F2%B0"
[(set_attr "type" "F")])
! (define_insn "*maddtf4"
[(set (match_operand:TF 0 "fr_register_operand" "=f")
(plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
(match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
***************
*** 2020,2027 ****
"fma %0 = %F1, %F2, %F3%B0"
[(set_attr "type" "F")])
! (define_insn "*msubtf3"
[(set (match_operand:TF 0 "fr_register_operand" "=f")
(minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
(match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
(match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))]
--- 2433,2450 ----
"fma %0 = %F1, %F2, %F3%B0"
[(set_attr "type" "F")])
! (define_insn "*maddtf4_alts"
[(set (match_operand:TF 0 "fr_register_operand" "=f")
+ (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+ (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
+ (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))
+ (use (match_operand:SI 4 "const_int_operand" ""))]
+ ""
+ "fma.s%4 %0 = %F1, %F2, %F3%B0"
+ [(set_attr "type" "F")])
+
+ (define_insn "*msubtf4"
+ [(set (match_operand:TF 0 "fr_register_operand" "=f")
(minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
(match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
(match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))]
***************
*** 2039,2045 ****
;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
! (define_insn "*nmaddtf3"
[(set (match_operand:TF 0 "fr_register_operand" "=f")
(plus:TF (neg:TF (mult:TF
(match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
--- 2462,2468 ----
;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
! (define_insn "*nmaddtf4"
[(set (match_operand:TF 0 "fr_register_operand" "=f")
(plus:TF (neg:TF (mult:TF
(match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
***************
*** 2047,2052 ****
--- 2470,2498 ----
(match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))]
""
"fnma %0 = %F1, %F2, %F3%B0"
+ [(set_attr "type" "F")])
+
+ (define_insn "*nmaddtf4_alts"
+ [(set (match_operand:TF 0 "fr_register_operand" "=f")
+ (plus:TF (neg:TF (mult:TF
+ (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+ (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))
+ (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))
+ (use (match_operand:SI 4 "const_int_operand" ""))]
+ ""
+ "fnma.s%4 %0 = %F1, %F2, %F3%B0"
+ [(set_attr "type" "F")])
+
+ (define_insn "*recip_approx"
+ [(set (match_operand:TF 0 "fr_register_operand" "=f")
+ (div:TF (const_int 1)
+ (match_operand:TF 3 "fr_register_operand" "f")))
+ (set (match_operand:CC 1 "register_operand" "=c")
+ (unspec:CC [(match_operand:TF 2 "fr_register_operand" "f")
+ (match_dup 3)] 5))
+ (use (match_operand:SI 4 "const_int_operand" ""))]
+ ""
+ "frcpa.s%4 %0, %1 = %2, %3"
[(set_attr "type" "F")])
;; ::::::::::::::::::::