This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, committed] PowerPC floating point division approximation


	This patch implements an experimental feature to compute divide
using the PowerPC floating point reciprocal estimate instruction and
iteratively refining the estimate to achieve the desired accuracy.  This
method allows greater FP throughput by interleaving the computation with
other FP operations.

David


	* config/rs6000/rs6000.c (rs6000_emit_swdivsf): New function.
	(rs6000_emit_swdivdf): New function.
	* config/rs6000/rs6000.md (fres): New pattern.
	(divsf3): Add approximation through rs6000_emit_swdivsf.
	(fred): New pattern.
	(divdf3): Add approximation through rs6000_emit_swdivdf.
	* config/rs6000/rs6000-protos.h (rs6000_emit_swdivsf): Declare.
	(rs6000_emit_swdivdf): Declare.
	* config/rs6000/rs6000.opt (mswdiv): New option.
	* doc/invoke.texi (RS/6000 and PowerPC Options): Document mswdiv.

Index: doc/invoke.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/invoke.texi,v
retrieving revision 1.631
diff -c -p -r1.631 invoke.texi
*** doc/invoke.texi	2 Jun 2005 17:13:26 -0000	1.631
--- doc/invoke.texi	6 Jun 2005 00:50:39 -0000
*************** See RS/6000 and PowerPC Options.
*** 631,637 ****
  -mstrict-align  -mno-strict-align  -mrelocatable @gol
  -mno-relocatable  -mrelocatable-lib  -mno-relocatable-lib @gol
  -mtoc  -mno-toc  -mlittle  -mlittle-endian  -mbig  -mbig-endian @gol
! -mdynamic-no-pic @gol
  -mprioritize-restricted-insns=@var{priority} @gol
  -msched-costly-dep=@var{dependence_type} @gol
  -minsert-sched-nops=@var{scheme} @gol
--- 631,637 ----
  -mstrict-align  -mno-strict-align  -mrelocatable @gol
  -mno-relocatable  -mrelocatable-lib  -mno-relocatable-lib @gol
  -mtoc  -mno-toc  -mlittle  -mlittle-endian  -mbig  -mbig-endian @gol
! -mdynamic-no-pic  -maltivec  -mswdiv @gol
  -mprioritize-restricted-insns=@var{priority} @gol
  -msched-costly-dep=@var{dependence_type} @gol
  -minsert-sched-nops=@var{scheme} @gol
*************** values for @var{cpu_type} are used for @
*** 10720,10725 ****
--- 10720,10736 ----
  architecture, registers, and mnemonics set by @option{-mcpu}, but the
  scheduling parameters set by @option{-mtune}.
  
+ @item -mswdiv
+ @itemx -mno-swdiv
+ @opindex mswdiv
+ @opindex mno-swdiv
+ Generate code to compute division as reciprocal estimate and iterative
+ refinement, creating opportunities for increased throughput.  This
+ feature requires: optional PowerPC Graphics instruction set for single
+ precision and FRE instruction for double precision, assuming divides
+ cannot generate user-visible traps, and the domain values not include
+ Infinities, denormals or zero denominator.
+ 
  @item -maltivec
  @itemx -mno-altivec
  @opindex maltivec
Index: config/rs6000/rs6000-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000-protos.h,v
retrieving revision 1.99
diff -c -p -r1.99 rs6000-protos.h
*** config/rs6000/rs6000-protos.h	21 Apr 2005 21:13:36 -0000	1.99
--- config/rs6000/rs6000-protos.h	6 Jun 2005 00:50:39 -0000
*************** extern int rs6000_emit_vector_cond_expr 
*** 82,87 ****
--- 82,89 ----
  extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
  extern void rs6000_emit_sync (enum rtx_code, enum machine_mode,
  			      rtx, rtx, rtx, rtx, bool);
+ extern void rs6000_emit_swdivsf (rtx, rtx, rtx);
+ extern void rs6000_emit_swdivdf (rtx, rtx, rtx);
  extern void output_toc (FILE *, rtx, int, enum machine_mode);
  extern void rs6000_initialize_trampoline (rtx, rtx, rtx);
  extern rtx rs6000_longcall_ref (rtx);
Index: config/rs6000/rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.832
diff -c -p -r1.832 rs6000.c
*** config/rs6000/rs6000.c	4 Jun 2005 21:32:34 -0000	1.832
--- config/rs6000/rs6000.c	6 Jun 2005 00:50:39 -0000
*************** rs6000_memory_move_cost (enum machine_mo
*** 17955,17960 ****
--- 17955,18063 ----
      return 4 + rs6000_register_move_cost (mode, class, GENERAL_REGS);
  }
  
+ /* Newton-Raphson approximation of single-precision floating point divide n/d.
+    Assumes no trapping math and finite arguments.  */
+ 
+ void
+ rs6000_emit_swdivsf (rtx res, rtx n, rtx d)
+ {
+   rtx x0, e0, e1, y1, u0, v0, one;
+ 
+   x0 = gen_reg_rtx (SFmode);
+   e0 = gen_reg_rtx (SFmode);
+   e1 = gen_reg_rtx (SFmode);
+   y1 = gen_reg_rtx (SFmode);
+   u0 = gen_reg_rtx (SFmode);
+   v0 = gen_reg_rtx (SFmode);
+   one = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, SFmode));
+ 
+   /* x0 = 1./d estimate */
+   emit_insn (gen_rtx_SET (VOIDmode, x0,
+ 			  gen_rtx_UNSPEC (SFmode, gen_rtvec (1, d),
+ 					  UNSPEC_FRES)));
+   /* e0 = 1. - d * x0 */
+   emit_insn (gen_rtx_SET (VOIDmode, e0,
+ 			  gen_rtx_MINUS (SFmode, one,
+ 					 gen_rtx_MULT (SFmode, d, x0))));
+   /* e1 = e0 + e0 * e0 */
+   emit_insn (gen_rtx_SET (VOIDmode, e1,
+ 			  gen_rtx_PLUS (SFmode,
+ 					gen_rtx_MULT (SFmode, e0, e0), e0)));
+   /* y1 = x0 + e1 * x0 */
+   emit_insn (gen_rtx_SET (VOIDmode, y1,
+ 			  gen_rtx_PLUS (SFmode,
+ 					gen_rtx_MULT (SFmode, e1, x0), x0)));
+   /* u0 = n * y1 */
+   emit_insn (gen_rtx_SET (VOIDmode, u0,
+ 			  gen_rtx_MULT (SFmode, n, y1)));
+   /* v0 = n - d * u0 */
+   emit_insn (gen_rtx_SET (VOIDmode, v0,
+ 			  gen_rtx_MINUS (SFmode, n,
+ 					 gen_rtx_MULT (SFmode, d, u0))));
+   /* res = u0 + v0 * y1 */
+   emit_insn (gen_rtx_SET (VOIDmode, res,
+ 			  gen_rtx_PLUS (SFmode,
+ 					gen_rtx_MULT (SFmode, v0, y1), u0)));
+ }
+ 
+ /* Newton-Raphson approximation of double-precision floating point divide n/d.
+    Assumes no trapping math and finite arguments.  */
+ 
+ void
+ rs6000_emit_swdivdf (rtx res, rtx n, rtx d)
+ {
+   rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
+ 
+   x0 = gen_reg_rtx (DFmode);
+   e0 = gen_reg_rtx (DFmode);
+   e1 = gen_reg_rtx (DFmode);
+   e2 = gen_reg_rtx (DFmode);
+   y1 = gen_reg_rtx (DFmode);
+   y2 = gen_reg_rtx (DFmode);
+   y3 = gen_reg_rtx (DFmode);
+   u0 = gen_reg_rtx (DFmode);
+   v0 = gen_reg_rtx (DFmode);
+   one = force_reg (DFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, DFmode));
+ 
+   /* x0 = 1./d estimate */
+   emit_insn (gen_rtx_SET (VOIDmode, x0,
+ 			  gen_rtx_UNSPEC (DFmode, gen_rtvec (1, d),
+ 					  UNSPEC_FRES)));
+   /* e0 = 1. - d * x0 */
+   emit_insn (gen_rtx_SET (VOIDmode, e0,
+ 			  gen_rtx_MINUS (DFmode, one,
+ 					 gen_rtx_MULT (SFmode, d, x0))));
+   /* y1 = x0 + e0 * x0 */
+   emit_insn (gen_rtx_SET (VOIDmode, y1,
+ 			  gen_rtx_PLUS (DFmode,
+ 					gen_rtx_MULT (DFmode, e0, x0), x0)));
+   /* e1 = e0 * e0 */
+   emit_insn (gen_rtx_SET (VOIDmode, e1,
+ 			  gen_rtx_MULT (DFmode, e0, e0)));
+   /* y2 = y1 + e1 * y1 */
+   emit_insn (gen_rtx_SET (VOIDmode, y2,
+ 			  gen_rtx_PLUS (DFmode,
+ 					gen_rtx_MULT (DFmode, e1, y1), y1)));
+   /* e2 = e1 * e1 */
+   emit_insn (gen_rtx_SET (VOIDmode, e2,
+ 			  gen_rtx_MULT (DFmode, e1, e1)));
+   /* y3 = y2 + e2 * y2 */
+   emit_insn (gen_rtx_SET (VOIDmode, y3,
+ 			  gen_rtx_PLUS (DFmode,
+ 					gen_rtx_MULT (DFmode, e2, y2), y2)));
+   /* u0 = n * y3 */
+   emit_insn (gen_rtx_SET (VOIDmode, u0,
+ 			  gen_rtx_MULT (DFmode, n, y3)));
+   /* v0 = n - d * u0 */
+   emit_insn (gen_rtx_SET (VOIDmode, v0,
+ 			  gen_rtx_MINUS (DFmode, n,
+ 					 gen_rtx_MULT (DFmode, d, u0))));
+   /* res = u0 + v0 * y3 */
+   emit_insn (gen_rtx_SET (VOIDmode, res,
+ 			  gen_rtx_PLUS (DFmode,
+ 					gen_rtx_MULT (DFmode, v0, y3), u0)));
+ }
+ 
  /* Return an RTX representing where to find the function value of a
     function returning MODE.  */
  static rtx
Index: config/rs6000/rs6000.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.md,v
retrieving revision 1.374
diff -c -p -r1.374 rs6000.md
*** config/rs6000/rs6000.md	4 Jun 2005 21:32:40 -0000	1.374
--- config/rs6000/rs6000.md	6 Jun 2005 00:50:40 -0000
***************
*** 58,63 ****
--- 58,64 ----
     (UNSPEC_LWSYNC		36)
     (UNSPEC_ISYNC		37)
     (UNSPEC_POPCNTB		38)
+    (UNSPEC_FRES			39)
    ])
  
  ;;
***************
*** 4640,4651 ****
    "{fm|fmul} %0,%1,%2"
    [(set_attr "type" "dmul")])
  
  (define_expand "divsf3"
    [(set (match_operand:SF 0 "gpc_reg_operand" "")
  	(div:SF (match_operand:SF 1 "gpc_reg_operand" "")
  		(match_operand:SF 2 "gpc_reg_operand" "")))]
    "TARGET_HARD_FLOAT"
!   "")
  
  (define_insn ""
    [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
--- 4641,4666 ----
    "{fm|fmul} %0,%1,%2"
    [(set_attr "type" "dmul")])
  
+ (define_insn "fres"
+   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ 	(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+   "TARGET_PPC_GFXOPT && flag_finite_math_only"
+   "fres %0,%1"
+   [(set_attr "type" "fp")])
+ 
  (define_expand "divsf3"
    [(set (match_operand:SF 0 "gpc_reg_operand" "")
  	(div:SF (match_operand:SF 1 "gpc_reg_operand" "")
  		(match_operand:SF 2 "gpc_reg_operand" "")))]
    "TARGET_HARD_FLOAT"
! {
!   if (swdiv && !optimize_size && TARGET_PPC_GFXOPT
!   && flag_finite_math_only && !flag_trapping_math)
!     {
!       rs6000_emit_swdivsf (operands[0], operands[1], operands[2]);
!       DONE;
!     }
! })
  
  (define_insn ""
    [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
***************
*** 5028,5039 ****
    "{fm|fmul} %0,%1,%2"
    [(set_attr "type" "dmul")])
  
  (define_expand "divdf3"
    [(set (match_operand:DF 0 "gpc_reg_operand" "")
  	(div:DF (match_operand:DF 1 "gpc_reg_operand" "")
  		(match_operand:DF 2 "gpc_reg_operand" "")))]
    "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)"
!   "")
  
  (define_insn "*divdf3_fpr"
    [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
--- 5043,5068 ----
    "{fm|fmul} %0,%1,%2"
    [(set_attr "type" "dmul")])
  
+ (define_insn "fred"
+   [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ 	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
+   "TARGET_POPCNTB && flag_finite_math_only"
+   "fre %0,%1"
+   [(set_attr "type" "fp")])
+ 
  (define_expand "divdf3"
    [(set (match_operand:DF 0 "gpc_reg_operand" "")
  	(div:DF (match_operand:DF 1 "gpc_reg_operand" "")
  		(match_operand:DF 2 "gpc_reg_operand" "")))]
    "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)"
! {
!   if (swdiv && !optimize_size && TARGET_POPCNTB
!   && flag_finite_math_only && !flag_trapping_math)
!     {
!       rs6000_emit_swdivdf (operands[0], operands[1], operands[2]);
!       DONE;
!     }
! })
  
  (define_insn "*divdf3_fpr"
    [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
Index: config/rs6000/rs6000.opt
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.opt,v
retrieving revision 1.4
diff -c -p -r1.4 rs6000.opt
*** config/rs6000/rs6000.opt	25 May 2005 22:19:22 -0000	1.4
--- config/rs6000/rs6000.opt	6 Jun 2005 00:50:40 -0000
*************** mxl-compat
*** 123,128 ****
--- 123,132 ----
  Target Report Var(TARGET_XL_COMPAT)
  Conform more closely to IBM XLC semantics
  
+ mswdiv
+ Target Report Var(swdiv)
+ Generate software floating point divide for better throughput
+ 
  mno-fp-in-toc
  Target Report RejectNegative Mask(NO_FP_IN_TOC)
  Do not place floating point constants in TOC


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]