This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
ia64 inline integer division

To: gcc-patches at gcc dot gnu dot org
Subject: ia64 inline integer division
From: Richard Henderson <rth at cygnus dot com>
Date: Mon, 11 Sep 2000 14:18:55 -0700
This is an experiment to see how much difference can be had
performance-wise if we inline division instead of deferring
to library routines.

There are two switches,

  -minline-divide-min-latency
  -minline-divide-max-throughput

which choose different implementations.  I have a feeling
that for most code this will just bloat things, so the 
default is still library routines.


r~


        * config/ia64/ia64-protos.h (fr_nonimmediate_operand): Declare.
        * config/ia64/ia64.c (fr_nonimmediate_operand): New.
        (ia64_override_options): Prevent optimizing division for both
        latency and throughput.
        (rtx_needs_barrier): Handle frcpa.
        * config/ia64/ia64.h (MASK_INLINE_DIV_LAT): New.
        (MASK_INLINE_DIV_THR, TARGET_INLINE_DIV_LAT): New.
        (TARGET_INLINE_DIV_THR, TARGET_INLINE_DIV): New.
        (TARGET_SWITCHES): Add -minline-divide-min-latency and
        -minline-divide-max-throughput.
        (PREDICATE_CODES): Update.
        * config/ia64/ia64.md (extendsidi2): Remove * from f case.
        (zero_extendsidi2): Likewise.  Fix typo in f case insn.
        (extendsfdf2): Add cases for gr<->fr and fr<->mem.
        (extendsftf2): Likewise.
        (extenddftf2): Likewise.
        (fix_trunctfdi2_alts): New.
        (fixuns_trunctfdi2_alts): New.
        (madd*4): Rename from madd*3.
        (divsi3, modsi3, udivsi3, umodsi3): New.
        (divsi3_internal): New.
        (divdi3, moddi3, udivdi3, umoddi3): New.
        (divdi3_internal_lat, divdi3_internal_thr): New.
        (multf3_alts, maddtf4_alts, nmaddtf4_alts): New.
        (recip_approx): New.

Index: ia64-protos.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64-protos.h,v
retrieving revision 1.20
diff -c -p -d -r1.20 ia64-protos.h
*** ia64-protos.h	2000/09/01 22:22:54	1.20
--- ia64-protos.h	2000/09/11 21:07:44
*************** extern int gr_register_operand PARAMS((r
*** 38,43 ****
--- 38,44 ----
  extern int fr_register_operand PARAMS((rtx, enum machine_mode));
  extern int grfr_register_operand PARAMS((rtx, enum machine_mode));
  extern int gr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
+ extern int fr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
  extern int grfr_nonimmediate_operand PARAMS((rtx, enum machine_mode));
  extern int gr_reg_or_0_operand PARAMS((rtx, enum machine_mode));
  extern int gr_reg_or_5bit_operand PARAMS((rtx, enum machine_mode));
Index: ia64.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.c,v
retrieving revision 1.48
diff -c -p -d -r1.48 ia64.c
*** ia64.c	2000/09/01 22:22:54	1.48
--- ia64.c	2000/09/11 21:07:44
*************** gr_nonimmediate_operand (op, mode)
*** 397,402 ****
--- 397,422 ----
    return 1;
  }
  
+ /* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
+ 
+ int
+ fr_nonimmediate_operand (op, mode)
+      rtx op;
+      enum machine_mode mode;
+ {
+   if (! nonimmediate_operand (op, mode))
+     return 0;
+   if (GET_CODE (op) == SUBREG)
+     op = SUBREG_REG (op);
+   if (GET_CODE (op) == REG)
+     {
+       unsigned int regno = REGNO (op);
+       if (regno < FIRST_PSEUDO_REGISTER)
+ 	return FR_REGNO_P (regno);
+     }
+   return 1;
+ }
+ 
  /* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
  
  int
*************** ia64_override_options ()
*** 3484,3489 ****
--- 3504,3515 ----
    if (TARGET_AUTO_PIC)
      target_flags |= MASK_CONST_GP;
  
+   if (TARGET_INLINE_DIV_LAT && TARGET_INLINE_DIV_THR)
+     {
+       warning ("cannot optimize division for both latency and throughput");
+       target_flags &= ~MASK_INLINE_DIV_THR;
+     }
+ 
    if (ia64_fixed_range_string)
      fix_range (ia64_fixed_range_string);
  
*************** rtx_needs_barrier (x, flags, pred)
*** 3970,3975 ****
--- 3996,4006 ----
  	case 20: /* mov = ar.bsp */
  	case 21: /* flushrs */
            break;
+ 
+ 	case 5: /* recip_approx */
+ 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+ 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+ 	  break;
  
          case 13: /* cmpxchg_acq */
  	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
Index: ia64.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.h,v
retrieving revision 1.43
diff -c -p -d -r1.43 ia64.h
*** ia64.h	2000/09/08 01:35:53	1.43
--- ia64.h	2000/09/11 21:07:44
*************** extern int target_flags;
*** 63,68 ****
--- 63,72 ----
  
  #define MASK_AUTO_PIC	0x00000200	/* generate automatically PIC */
  
+ #define MASK_INLINE_DIV_LAT 0x00000400	/* inline div, min latency.  */
+ 
+ #define MASK_INLINE_DIV_THR 0x00000800	/* inline div, max throughput.  */
+ 
  #define MASK_DWARF2_ASM 0x40000000	/* test dwarf2 line info via gas.  */
  
  #define TARGET_BIG_ENDIAN	(target_flags & MASK_BIG_ENDIAN)
*************** extern int target_flags;
*** 85,90 ****
--- 89,101 ----
  
  #define TARGET_AUTO_PIC		(target_flags & MASK_AUTO_PIC)
  
+ #define TARGET_INLINE_DIV_LAT	(target_flags & MASK_INLINE_DIV_LAT)
+ 
+ #define TARGET_INLINE_DIV_THR	(target_flags & MASK_INLINE_DIV_THR)
+ 
+ #define TARGET_INLINE_DIV \
+   (target_flags & (MASK_INLINE_DIV_LAT | MASK_INLINE_DIV_THR))
+ 
  #define TARGET_DWARF2_ASM	(target_flags & MASK_DWARF2_ASM)
  
  /* This macro defines names of command options to set and clear bits in
*************** extern int target_flags;
*** 123,128 ****
--- 134,143 ----
        N_("gp is constant (but save/restore gp on indirect calls)") },	\
    { "auto-pic",		MASK_AUTO_PIC,					\
        N_("Generate self-relocatable code") },				\
+   { "inline-divide-min-latency", MASK_INLINE_DIV_LAT,			\
+       N_("Generate inline division, optimize for latency") },		\
+   { "inline-divide-max-throughput", MASK_INLINE_DIV_THR,		\
+       N_("Generate inline division, optimize for throughput") },	\
    { "dwarf2-asm", 	MASK_DWARF2_ASM,				\
        N_("Enable Dwarf 2 line debug info via GNU as")},			\
    { "no-dwarf2-asm", 	-MASK_DWARF2_ASM,				\
*************** do {									\
*** 2646,2651 ****
--- 2661,2667 ----
  { "fr_register_operand", {SUBREG, REG}},				\
  { "grfr_register_operand", {SUBREG, REG}},				\
  { "gr_nonimmediate_operand", {SUBREG, REG, MEM}},			\
+ { "fr_nonimmediate_operand", {SUBREG, REG, MEM}},			\
  { "grfr_nonimmediate_operand", {SUBREG, REG, MEM}},			\
  { "gr_reg_or_0_operand", {SUBREG, REG, CONST_INT}},			\
  { "gr_reg_or_5bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}},	\
Index: ia64.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.md,v
retrieving revision 1.45
diff -c -p -d -r1.45 ia64.md
*** ia64.md	2000/09/06 01:46:04	1.45
--- ia64.md	2000/09/11 21:07:44
***************
*** 57,62 ****
--- 57,63 ----
  ;;	2	gr_restore
  ;;	3	fr_spill
  ;;	4	fr_restore
+ ;;	5	recip_approx
  ;;	8	popcnt
  ;;	12	mf
  ;;	13	cmpxchg_acq
***************
*** 950,957 ****
    [(set_attr "type" "I")])
  
  (define_insn "extendsidi2"
!   [(set (match_operand:DI 0 "grfr_register_operand" "=r,*f")
! 	(sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,*f")))]
    ""
    "@
     sxt4 %0 = %1
--- 951,958 ----
    [(set_attr "type" "I")])
  
  (define_insn "extendsidi2"
!   [(set (match_operand:DI 0 "grfr_register_operand" "=r,?f")
! 	(sign_extend:DI (match_operand:SI 1 "grfr_register_operand" "r,f")))]
    ""
    "@
     sxt4 %0 = %1
***************
*** 979,992 ****
    [(set_attr "type" "I,M")])
  
  (define_insn "zero_extendsidi2"
!   [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,*f")
  	(zero_extend:DI
! 	  (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,*f")))]
    ""
    "@
     zxt4 %0 = %1
     ld4%O1 %0 = %1%P1
!    fsxt.r %0 = f1, %1%B0"
    [(set_attr "type" "I,M,F")])
  
  ;; Convert between floating point types of different sizes.
--- 980,993 ----
    [(set_attr "type" "I,M")])
  
  (define_insn "zero_extendsidi2"
!   [(set (match_operand:DI 0 "grfr_register_operand" "=r,r,?f")
  	(zero_extend:DI
! 	  (match_operand:SI 1 "grfr_nonimmediate_operand" "r,m,f")))]
    ""
    "@
     zxt4 %0 = %1
     ld4%O1 %0 = %1%P1
!    fmix.r %0 = f0, %1%B0"
    [(set_attr "type" "I,M,F")])
  
  ;; Convert between floating point types of different sizes.
***************
*** 997,1030 ****
  ;; would let combine merge the thing into adjacent insns.
  
  (define_insn_and_split "extendsfdf2"
!   [(set (match_operand:DF 0 "fr_register_operand" "=f,f")
! 	(float_extend:DF (match_operand:SF 1 "fr_register_operand" "0,f")))]
    ""
!   "mov %0 = %1"
    "reload_completed"
    [(set (match_dup 0) (float_extend:DF (match_dup 1)))]
    "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
!   [(set_attr "type" "F")])
  
  (define_insn_and_split "extendsftf2"
!   [(set (match_operand:TF 0 "fr_register_operand" "=f,f")
! 	(float_extend:TF (match_operand:SF 1 "fr_register_operand" "0,f")))]
    ""
!   "mov %0 = %1"
    "reload_completed"
    [(set (match_dup 0) (float_extend:TF (match_dup 1)))]
    "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
!   [(set_attr "type" "F")])
  
  (define_insn_and_split "extenddftf2"
!   [(set (match_operand:TF 0 "fr_register_operand" "=f,f")
! 	(float_extend:TF (match_operand:DF 1 "fr_register_operand" "0,f")))]
    ""
!   "mov %0 = %1"
    "reload_completed"
    [(set (match_dup 0) (float_extend:TF (match_dup 1)))]
    "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
!   [(set_attr "type" "F")])
  
  (define_insn "truncdfsf2"
    [(set (match_operand:SF 0 "fr_register_operand" "=f")
--- 998,1050 ----
  ;; would let combine merge the thing into adjacent insns.
  
  (define_insn_and_split "extendsfdf2"
!   [(set (match_operand:DF 0 "grfr_nonimmediate_operand" "=f,f,f,f,m,*r")
! 	(float_extend:DF
! 	  (match_operand:SF 1 "grfr_nonimmediate_operand" "0,f,m,*r,f,f")))]
    ""
!   "@
!    mov %0 = %1
!    mov %0 = %1
!    ldfs %0 = %1%P1
!    setf.s %0 = %1
!    stfd %0 = %1%P0
!    getf.d %0 = %1"
    "reload_completed"
    [(set (match_dup 0) (float_extend:DF (match_dup 1)))]
    "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
!   [(set_attr "type" "F,F,M,M,M,M")])
  
  (define_insn_and_split "extendsftf2"
!   [(set (match_operand:TF 0 "fr_nonimmediate_operand" "=f,f,f,f,Q")
! 	(float_extend:TF
! 	  (match_operand:SF 1 "grfr_nonimmediate_operand" "0,f,Q,*r,f")))]
    ""
!   "@
!    mov %0 = %1
!    mov %0 = %1
!    ldfs %0 = %1%P1
!    setf.s %0 = %1
!    stfe %0 = %1%P0"
    "reload_completed"
    [(set (match_dup 0) (float_extend:TF (match_dup 1)))]
    "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
!   [(set_attr "type" "F,F,M,M,M")])
  
  (define_insn_and_split "extenddftf2"
!   [(set (match_operand:TF 0 "fr_nonimmediate_operand" "=f,f,f,f,Q")
! 	(float_extend:TF
! 	  (match_operand:DF 1 "grfr_nonimmediate_operand" "0,f,Q,*r,f")))]
    ""
!   "@
!    mov %0 = %1
!    mov %0 = %1
!    ldfd %0 = %1%P1
!    setf.d %0 = %1
!    stfe %0 = %1%P0"
    "reload_completed"
    [(set (match_dup 0) (float_extend:TF (match_dup 1)))]
    "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;"
!   [(set_attr "type" "F,F,M,M,M")])
  
  (define_insn "truncdfsf2"
    [(set (match_operand:SF 0 "fr_register_operand" "=f")
***************
*** 1077,1082 ****
--- 1097,1110 ----
    "fcvt.fx.trunc %0 = %1%B0"
    [(set_attr "type" "F")])
  
+ (define_insn "fix_trunctfdi2_alts"
+   [(set (match_operand:DI 0 "fr_register_operand" "=f")
+ 	(fix:DI (match_operand:TF 1 "fr_register_operand" "f")))
+    (use (match_operand:SI 2 "const_int_operand" ""))]
+   ""
+   "fcvt.fx.trunc.s%2 %0 = %1%B0"
+   [(set_attr "type" "F")])
+ 
  ;; Convert between unsigned integer types and floating point.
  
  (define_insn "floatunsdisf2"
***************
*** 1120,1125 ****
--- 1148,1161 ----
    ""
    "fcvt.fxu.trunc %0 = %1%B0"
    [(set_attr "type" "F")])
+ 
+ (define_insn "fixuns_trunctfdi2_alts"
+   [(set (match_operand:DI 0 "fr_register_operand" "=f")
+ 	(unsigned_fix:DI (match_operand:TF 1 "fr_register_operand" "f")))
+    (use (match_operand:SI 2 "const_int_operand" ""))]
+   ""
+   "fcvt.fxu.trunc.s%2 %0 = %1%B0"
+   [(set_attr "type" "F")])
  
  ;; ::::::::::::::::::::
  ;; ::
***************
*** 1400,1406 ****
    "xma.l %0 = %1, %2, f0%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*maddsi3"
    [(set (match_operand:SI 0 "fr_register_operand" "=f")
  	(plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
  			  (match_operand:SI 2 "grfr_register_operand" "f"))
--- 1436,1442 ----
    "xma.l %0 = %1, %2, f0%B0"
    [(set_attr "type" "F")])
  
! (define_insn "maddsi4"
    [(set (match_operand:SI 0 "fr_register_operand" "=f")
  	(plus:SI (mult:SI (match_operand:SI 1 "grfr_register_operand" "f")
  			  (match_operand:SI 2 "grfr_register_operand" "f"))
***************
*** 1481,1486 ****
--- 1517,1688 ----
    operands[3] = gen_reg_rtx (CCmode);
  }")
  
+ (define_expand "divsi3"
+   [(set (match_operand:SI 0 "register_operand" "")
+ 	(div:SI (match_operand:SI 1 "general_operand" "")
+ 		(match_operand:SI 2 "general_operand" "")))]
+   "TARGET_INLINE_DIV"
+   "
+ {
+   rtx op1_tf, op2_tf, op0_tf, op0_di, twon34;
+ 
+   op0_tf = gen_reg_rtx (TFmode);
+   op0_di = gen_reg_rtx (DImode);
+ 
+   if (CONSTANT_P (operands[1]))
+     operands[1] = force_reg (SImode, operands[1]);
+   op1_tf = gen_reg_rtx (TFmode);
+   expand_float (op1_tf, operands[1], 0);
+ 
+   if (CONSTANT_P (operands[2]))
+     operands[2] = force_reg (SImode, operands[2]);
+   op2_tf = gen_reg_rtx (TFmode);
+   expand_float (op2_tf, operands[2], 0);
+ 
+   /* 2^-34 */
+ #if 0
+   twon34 = (CONST_DOUBLE_FROM_REAL_VALUE
+ 	    (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode));
+   twon34 = force_reg (TFmode, twon34);
+ #else
+   twon34 = gen_reg_rtx (TFmode);
+   convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0);
+ #endif
+ 
+   emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34));
+ 
+   emit_insn (gen_fix_trunctfdi2_alts (op0_di, op0_tf, const1_rtx));
+   emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+   DONE;
+ }")
+ 
+ (define_expand "modsi3"
+   [(set (match_operand:SI 0 "register_operand" "")
+ 	(mod:SI (match_operand:SI 1 "general_operand" "")
+ 		(match_operand:SI 2 "general_operand" "")))]
+   "TARGET_INLINE_DIV"
+   "
+ {
+   rtx op2_neg, op1_di, div;
+ 
+   div = gen_reg_rtx (SImode);
+   emit_insn (gen_divsi3 (div, operands[1], operands[2]));
+ 
+   op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+ 
+   /* This is a trick to get us to reuse the value that we're sure to
+      have already copied to the FP regs.  */
+   op1_di = gen_reg_rtx (DImode);
+   convert_move (op1_di, operands[1], 0);
+ 
+   emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+ 			  gen_lowpart (SImode, op1_di)));
+   DONE;
+ }")
+ 
+ (define_expand "udivsi3"
+   [(set (match_operand:SI 0 "register_operand" "")
+ 	(udiv:SI (match_operand:SI 1 "general_operand" "")
+ 		 (match_operand:SI 2 "general_operand" "")))]
+   "TARGET_INLINE_DIV"
+   "
+ {
+   rtx op1_tf, op2_tf, op0_tf, op0_di, twon34;
+ 
+   op0_tf = gen_reg_rtx (TFmode);
+   op0_di = gen_reg_rtx (DImode);
+ 
+   if (CONSTANT_P (operands[1]))
+     operands[1] = force_reg (SImode, operands[1]);
+   op1_tf = gen_reg_rtx (TFmode);
+   expand_float (op1_tf, operands[1], 1);
+ 
+   if (CONSTANT_P (operands[2]))
+     operands[2] = force_reg (SImode, operands[2]);
+   op2_tf = gen_reg_rtx (TFmode);
+   expand_float (op2_tf, operands[2], 1);
+ 
+   /* 2^-34 */
+ #if 0
+   twon34 = (CONST_DOUBLE_FROM_REAL_VALUE
+ 	    (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), TFmode));
+   twon34 = force_reg (TFmode, twon34);
+ #else
+   twon34 = gen_reg_rtx (TFmode);
+   convert_move (twon34, force_const_mem (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (REAL_VALUE_FROM_TARGET_SINGLE (0x2e800000), SFmode)), 0);
+ #endif
+ 
+   emit_insn (gen_divsi3_internal (op0_tf, op1_tf, op2_tf, twon34));
+ 
+   emit_insn (gen_fixuns_trunctfdi2_alts (op0_di, op0_tf, const1_rtx));
+   emit_move_insn (operands[0], gen_lowpart (SImode, op0_di));
+   DONE;
+ }")
+ 
+ (define_expand "umodsi3"
+   [(set (match_operand:SI 0 "register_operand" "")
+ 	(umod:SI (match_operand:SI 1 "general_operand" "")
+ 		 (match_operand:SI 2 "general_operand" "")))]
+   "TARGET_INLINE_DIV"
+   "
+ {
+   rtx op2_neg, op1_di, div;
+ 
+   div = gen_reg_rtx (SImode);
+   emit_insn (gen_udivsi3 (div, operands[1], operands[2]));
+ 
+   op2_neg = expand_unop (SImode, neg_optab, operands[2], NULL_RTX, 0);
+ 
+   /* This is a trick to get us to reuse the value that we're sure to
+      have already copied to the FP regs.  */
+   op1_di = gen_reg_rtx (DImode);
+   convert_move (op1_di, operands[1], 1);
+ 
+   emit_insn (gen_maddsi4 (operands[0], div, op2_neg,
+ 			  gen_lowpart (SImode, op1_di)));
+   DONE;
+ }")
+ 
+ (define_insn_and_split "divsi3_internal"
+   [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+ 	(float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+ 			  (match_operand:TF 2 "fr_register_operand" "f"))))
+    (clobber (match_scratch:TF 4 "=&f"))
+    (clobber (match_scratch:TF 5 "=&f"))
+    (clobber (match_scratch:CC 6 "=c"))
+    (use (match_operand:TF 3 "fr_register_operand" "f"))]
+   "TARGET_INLINE_DIV"
+   "#"
+   "&& reload_completed"
+   [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+ 	      (set (match_dup 6) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+ 	      (use (const_int 1))])
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 5)
+ 		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+ 			      (match_dup 7)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 4)
+ 		     (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+ 			      (match_dup 4)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 5)
+ 		     (plus:TF (mult:TF (match_dup 5) (match_dup 5))
+ 			      (match_dup 3)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 0)
+ 		     (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+ 			      (match_dup 4)))
+ 		(use (const_int 1))]))
+   ] 
+   "operands[7] = CONST1_RTX (TFmode);"
+   [(set_attr "predicable" "no")])
  
  ;; ::::::::::::::::::::
  ;; ::
***************
*** 1557,1563 ****
  
  ;; ??? Maybe we should change how adds are canonicalized.
  
! (define_insn "*madddi3"
    [(set (match_operand:DI 0 "fr_register_operand" "=f")
  	(plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
  			  (match_operand:DI 2 "grfr_register_operand" "f"))
--- 1759,1765 ----
  
  ;; ??? Maybe we should change how adds are canonicalized.
  
! (define_insn "madddi4"
    [(set (match_operand:DI 0 "fr_register_operand" "=f")
  	(plus:DI (mult:DI (match_operand:DI 1 "grfr_register_operand" "f")
  			  (match_operand:DI 2 "grfr_register_operand" "f"))
***************
*** 1572,1581 ****
  
  ;; We have to use nonmemory_operand for operand 4, to ensure that the
  ;; validate_changes call inside eliminate_regs will always succeed.  If it
! ;; doesn't succeed, then this remain a madddi3 pattern, and will be reloaded
  ;; incorrectly.
  
! (define_insn "*madddi3_elim"
    [(set (match_operand:DI 0 "register_operand" "=&r")
  	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f")
  				   (match_operand:DI 2 "register_operand" "f"))
--- 1774,1783 ----
  
  ;; We have to use nonmemory_operand for operand 4, to ensure that the
  ;; validate_changes call inside eliminate_regs will always succeed.  If it
! ;; doesn't succeed, then this remain a madddi4 pattern, and will be reloaded
  ;; incorrectly.
  
! (define_insn "*madddi4_elim"
    [(set (match_operand:DI 0 "register_operand" "=&r")
  	(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "f")
  				   (match_operand:DI 2 "register_operand" "f"))
***************
*** 1734,1739 ****
--- 1936,2143 ----
    "popcnt %0 = %1"
    [(set_attr "type" "I")])
  
+ (define_expand "divdi3"
+   [(set (match_operand:DI 0 "register_operand" "")
+ 	(div:DI (match_operand:DI 1 "general_operand" "")
+ 		(match_operand:DI 2 "general_operand" "")))]
+   "TARGET_INLINE_DIV"
+   "
+ {
+   rtx op1_tf, op2_tf, op0_tf;
+ 
+   op0_tf = gen_reg_rtx (TFmode);
+ 
+   if (CONSTANT_P (operands[1]))
+     operands[1] = force_reg (DImode, operands[1]);
+   op1_tf = gen_reg_rtx (TFmode);
+   expand_float (op1_tf, operands[1], 0);
+ 
+   if (CONSTANT_P (operands[2]))
+     operands[2] = force_reg (DImode, operands[2]);
+   op2_tf = gen_reg_rtx (TFmode);
+   expand_float (op2_tf, operands[2], 0);
+ 
+   if (TARGET_INLINE_DIV_LAT)
+     emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf));
+   else
+     emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf));
+ 
+   emit_insn (gen_fix_trunctfdi2_alts (operands[0], op0_tf, const1_rtx));
+   DONE;
+ }")
+ 
+ (define_expand "moddi3"
+   [(set (match_operand:DI 0 "register_operand" "")
+ 	(mod:SI (match_operand:DI 1 "general_operand" "")
+ 		(match_operand:DI 2 "general_operand" "")))]
+   "TARGET_INLINE_DIV"
+   "
+ {
+   rtx op2_neg, div;
+ 
+   div = gen_reg_rtx (DImode);
+   emit_insn (gen_divdi3 (div, operands[1], operands[2]));
+ 
+   op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+ 
+   emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+   DONE;
+ }")
+ 
+ (define_expand "udivdi3"
+   [(set (match_operand:DI 0 "register_operand" "")
+ 	(udiv:DI (match_operand:DI 1 "general_operand" "")
+ 		 (match_operand:DI 2 "general_operand" "")))]
+   "TARGET_INLINE_DIV"
+   "
+ {
+   rtx op1_tf, op2_tf, op0_tf;
+ 
+   op0_tf = gen_reg_rtx (TFmode);
+ 
+   if (CONSTANT_P (operands[1]))
+     operands[1] = force_reg (DImode, operands[1]);
+   op1_tf = gen_reg_rtx (TFmode);
+   expand_float (op1_tf, operands[1], 1);
+ 
+   if (CONSTANT_P (operands[2]))
+     operands[2] = force_reg (DImode, operands[2]);
+   op2_tf = gen_reg_rtx (TFmode);
+   expand_float (op2_tf, operands[2], 1);
+ 
+   if (TARGET_INLINE_DIV_LAT)
+     emit_insn (gen_divdi3_internal_lat (op0_tf, op1_tf, op2_tf));
+   else
+     emit_insn (gen_divdi3_internal_thr (op0_tf, op1_tf, op2_tf));
+ 
+   emit_insn (gen_fixuns_trunctfdi2_alts (operands[0], op0_tf, const1_rtx));
+   DONE;
+ }")
+ 
+ (define_expand "umoddi3"
+   [(set (match_operand:DI 0 "register_operand" "")
+ 	(umod:DI (match_operand:DI 1 "general_operand" "")
+ 		 (match_operand:DI 2 "general_operand" "")))]
+   "TARGET_INLINE_DIV"
+   "
+ {
+   rtx op2_neg, div;
+ 
+   div = gen_reg_rtx (DImode);
+   emit_insn (gen_udivdi3 (div, operands[1], operands[2]));
+ 
+   op2_neg = expand_unop (DImode, neg_optab, operands[2], NULL_RTX, 0);
+ 
+   emit_insn (gen_madddi4 (operands[0], div, op2_neg, operands[1]));
+   DONE;
+ }")
+ 
+ (define_insn_and_split "divdi3_internal_lat"
+   [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+ 	(float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+ 			  (match_operand:TF 2 "fr_register_operand" "f"))))
+    (clobber (match_scratch:TF 3 "=&f"))
+    (clobber (match_scratch:TF 4 "=&f"))
+    (clobber (match_scratch:TF 5 "=&f"))
+    (clobber (match_scratch:CC 6 "=c"))]
+   "TARGET_INLINE_DIV_LAT"
+   "#"
+   "&& reload_completed"
+   [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+ 	      (set (match_dup 6) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+ 	      (use (const_int 1))])
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 3)
+ 		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+ 			      (match_dup 7)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 4) (mult:TF (match_dup 1) (match_dup 0)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 5) (mult:TF (match_dup 3) (match_dup 3)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 4)
+ 		     (plus:TF (mult:TF (match_dup 3) (match_dup 4))
+ 			      (match_dup 4)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 0)
+ 		     (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+ 			      (match_dup 0)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 3)
+ 		     (plus:TF (mult:TF (match_dup 5) (match_dup 4))
+ 			      (match_dup 4)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 0)
+ 		     (plus:TF (mult:TF (match_dup 5) (match_dup 0))
+ 			      (match_dup 0)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 4)
+ 		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3)))
+ 			      (match_dup 1)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 6) (const_int 0))
+      (parallel [(set (match_dup 0)
+ 		     (plus:TF (mult:TF (match_dup 4) (match_dup 0))
+ 			      (match_dup 3)))
+ 		(use (const_int 1))]))
+   ] 
+   "operands[7] = CONST1_RTX (TFmode);"
+   [(set_attr "predicable" "no")])
+ 
+ (define_insn_and_split "divdi3_internal_thr"
+   [(set (match_operand:TF 0 "fr_register_operand" "=&f")
+ 	(float:TF (div:SI (match_operand:TF 1 "fr_register_operand" "f")
+ 			  (match_operand:TF 2 "fr_register_operand" "f"))))
+    (clobber (match_scratch:TF 3 "=&f"))
+    (clobber (match_scratch:TF 4 "=f"))
+    (clobber (match_scratch:CC 5 "=c"))]
+   "TARGET_INLINE_DIV_THR"
+   "#"
+   "&& reload_completed"
+   [(parallel [(set (match_dup 0) (div:TF (const_int 1) (match_dup 2)))
+ 	      (set (match_dup 5) (unspec:CC [(match_dup 1) (match_dup 2)] 5))
+ 	      (use (const_int 1))])
+    (cond_exec (ne (match_dup 5) (const_int 0))
+      (parallel [(set (match_dup 3)
+ 		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 0)))
+ 			      (match_dup 6)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 5) (const_int 0))
+      (parallel [(set (match_dup 0)
+ 		     (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+ 			      (match_dup 0)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 5) (const_int 0))
+      (parallel [(set (match_dup 3) (mult:TF (match_dup 3) (match_dup 3)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 5) (const_int 0))
+      (parallel [(set (match_dup 0)
+ 		     (plus:TF (mult:TF (match_dup 3) (match_dup 0))
+ 			      (match_dup 0)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 5) (const_int 0))
+      (parallel [(set (match_dup 3) (mult:TF (match_dup 0) (match_dup 1)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 5) (const_int 0))
+      (parallel [(set (match_dup 4)
+ 		     (plus:TF (neg:TF (mult:TF (match_dup 2) (match_dup 3)))
+ 			      (match_dup 1)))
+ 		(use (const_int 1))]))
+    (cond_exec (ne (match_dup 5) (const_int 0))
+      (parallel [(set (match_dup 0)
+ 		     (plus:TF (mult:TF (match_dup 4) (match_dup 0))
+ 			      (match_dup 3)))
+ 		(use (const_int 1))]))
+   ] 
+   "operands[6] = CONST1_RTX (TFmode);"
+   [(set_attr "predicable" "no")])
  
  ;; ::::::::::::::::::::
  ;; ::
***************
*** 1802,1808 ****
    "fmax %0 = %1, %F2%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*maddsf3"
    [(set (match_operand:SF 0 "fr_register_operand" "=f")
  	(plus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
  			  (match_operand:SF 2 "fr_register_operand" "f"))
--- 2206,2212 ----
    "fmax %0 = %1, %F2%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*maddsf4"
    [(set (match_operand:SF 0 "fr_register_operand" "=f")
  	(plus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
  			  (match_operand:SF 2 "fr_register_operand" "f"))
***************
*** 1811,1817 ****
    "fma.s %0 = %1, %2, %F3%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*msubsf3"
    [(set (match_operand:SF 0 "fr_register_operand" "=f")
  	(minus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
  			   (match_operand:SF 2 "fr_register_operand" "f"))
--- 2215,2221 ----
    "fma.s %0 = %1, %2, %F3%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*msubsf4"
    [(set (match_operand:SF 0 "fr_register_operand" "=f")
  	(minus:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
  			   (match_operand:SF 2 "fr_register_operand" "f"))
***************
*** 1830,1836 ****
  
  ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
  
! (define_insn "*nmaddsf3"
    [(set (match_operand:SF 0 "fr_register_operand" "=f")
  	(plus:SF (neg:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
  				  (match_operand:SF 2 "fr_register_operand" "f")))
--- 2234,2240 ----
  
  ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
  
! (define_insn "*nmaddsf4"
    [(set (match_operand:SF 0 "fr_register_operand" "=f")
  	(plus:SF (neg:SF (mult:SF (match_operand:SF 1 "fr_register_operand" "f")
  				  (match_operand:SF 2 "fr_register_operand" "f")))
***************
*** 1907,1913 ****
    "fmax %0 = %1, %F2%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*madddf3"
    [(set (match_operand:DF 0 "fr_register_operand" "=f")
  	(plus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
  			  (match_operand:DF 2 "fr_register_operand" "f"))
--- 2311,2317 ----
    "fmax %0 = %1, %F2%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*madddf4"
    [(set (match_operand:DF 0 "fr_register_operand" "=f")
  	(plus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
  			  (match_operand:DF 2 "fr_register_operand" "f"))
***************
*** 1916,1922 ****
    "fma.d %0 = %1, %2, %F3%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*msubdf3"
    [(set (match_operand:DF 0 "fr_register_operand" "=f")
  	(minus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
  			   (match_operand:DF 2 "fr_register_operand" "f"))
--- 2320,2326 ----
    "fma.d %0 = %1, %2, %F3%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*msubdf4"
    [(set (match_operand:DF 0 "fr_register_operand" "=f")
  	(minus:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
  			   (match_operand:DF 2 "fr_register_operand" "f"))
***************
*** 1935,1941 ****
  
  ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
  
! (define_insn "*nmadddf3"
    [(set (match_operand:DF 0 "fr_register_operand" "=f")
  	(plus:DF (neg:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
  				  (match_operand:DF 2 "fr_register_operand" "f")))
--- 2339,2345 ----
  
  ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
  
! (define_insn "*nmadddf4"
    [(set (match_operand:DF 0 "fr_register_operand" "=f")
  	(plus:DF (neg:DF (mult:DF (match_operand:DF 1 "fr_register_operand" "f")
  				  (match_operand:DF 2 "fr_register_operand" "f")))
***************
*** 1974,1979 ****
--- 2378,2392 ----
    "fmpy %0 = %F1, %F2%B0"
    [(set_attr "type" "F")])
  
+ (define_insn "*multf3_alts"
+   [(set (match_operand:TF 0 "fr_register_operand" "=f")
+ 	(mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+ 		 (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))
+    (use (match_operand:SI 3 "const_int_operand" ""))]
+   ""
+   "fmpy.s%3 %0 = %F1, %F2%B0"
+   [(set_attr "type" "F")])
+ 
  (define_insn "abstf2"
    [(set (match_operand:TF 0 "fr_register_operand" "=f")
  	(abs:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")))]
***************
*** 2011,2017 ****
    "fmax %0 = %F1, %F2%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*maddtf3"
    [(set (match_operand:TF 0 "fr_register_operand" "=f")
  	(plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
  			  (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
--- 2424,2430 ----
    "fmax %0 = %F1, %F2%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*maddtf4"
    [(set (match_operand:TF 0 "fr_register_operand" "=f")
  	(plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
  			  (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
***************
*** 2020,2027 ****
    "fma %0 = %F1, %F2, %F3%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*msubtf3"
    [(set (match_operand:TF 0 "fr_register_operand" "=f")
  	(minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
  			   (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
  		  (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))]
--- 2433,2450 ----
    "fma %0 = %F1, %F2, %F3%B0"
    [(set_attr "type" "F")])
  
! (define_insn "*maddtf4_alts"
    [(set (match_operand:TF 0 "fr_register_operand" "=f")
+ 	(plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+ 			  (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
+ 		 (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))
+    (use (match_operand:SI 4 "const_int_operand" ""))]
+   ""
+   "fma.s%4 %0 = %F1, %F2, %F3%B0"
+   [(set_attr "type" "F")])
+ 
+ (define_insn "*msubtf4"
+   [(set (match_operand:TF 0 "fr_register_operand" "=f")
  	(minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
  			   (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))
  		  (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))]
***************
*** 2039,2045 ****
  
  ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
  
! (define_insn "*nmaddtf3"
    [(set (match_operand:TF 0 "fr_register_operand" "=f")
  	(plus:TF (neg:TF (mult:TF
  			  (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
--- 2462,2468 ----
  
  ;; ??? Is it possible to canonicalize this as (minus (reg) (mult))?
  
! (define_insn "*nmaddtf4"
    [(set (match_operand:TF 0 "fr_register_operand" "=f")
  	(plus:TF (neg:TF (mult:TF
  			  (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
***************
*** 2047,2052 ****
--- 2470,2498 ----
  		 (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))]
    ""
    "fnma %0 = %F1, %F2, %F3%B0"
+   [(set_attr "type" "F")])
+ 
+ (define_insn "*nmaddtf4_alts"
+   [(set (match_operand:TF 0 "fr_register_operand" "=f")
+ 	(plus:TF (neg:TF (mult:TF
+ 			  (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")
+ 			  (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))
+ 		 (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))
+    (use (match_operand:SI 4 "const_int_operand" ""))]
+   ""
+   "fnma.s%4 %0 = %F1, %F2, %F3%B0"
+   [(set_attr "type" "F")])
+ 
+ (define_insn "*recip_approx"
+   [(set (match_operand:TF 0 "fr_register_operand" "=f")
+ 	(div:TF (const_int 1)
+ 		(match_operand:TF 3 "fr_register_operand" "f")))
+    (set (match_operand:CC 1 "register_operand" "=c")
+ 	(unspec:CC [(match_operand:TF 2 "fr_register_operand" "f")
+ 		    (match_dup 3)] 5))
+    (use (match_operand:SI 4 "const_int_operand" ""))]
+   ""
+   "frcpa.s%4 %0, %1 = %2, %3"
    [(set_attr "type" "F")])
  
  ;; ::::::::::::::::::::
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]