This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][GCC] Complex division improvements in GCC


Hi all,

Advice and help needed! 

This patch makes changes to the the complex division in GCC. The algorithm 
used is same as in https://gcc.gnu.org/ml/gcc-patches/2019-08/msg01629.html - 
same problems, same improvement in robustness, same loss in accuracy. 

Since Baudin adds another underflow check, two more basic blocks get added 
during the cplxlower1 pass. 

No problems with bootstrap on aarch64-none-linux-gnu. Unsurprisingly, there
are regressions in gcc/testsuite/gcc.dg/torture/builtin-math-7.c. As in the 
patch linked above, the regressions in that test are due to the loss in 
accuracy.

To evaluate the performance, the same test which generates 360 000 000 random 
numbers was used. Doing one less division results in a nice 11.32% 
improvement in performance:

            | CPU time
--------------------
smiths  | 7 290 996
b1div   | 6 465 590

That implementation works (in a sense that it produces an expected result), 
but it could be made more efficient and clean. As an example, the cplxlower1
pass assigns one variable to another variable, which seems redundant:

[...]

 <bb 3> [local count: 1063004407]:
  # i_19 = PHI <0(2), i_15(7)>
  _9 = REALPART_EXPR <x[i_19]>;
  _7 = IMAGPART_EXPR <x[i_19]>;
  _1 = COMPLEX_EXPR <_9, _7>;
  _18 = REALPART_EXPR <y[i_19]>;
  _17 = IMAGPART_EXPR <y[i_19]>;
  _2 = COMPLEX_EXPR <_18, _17>;
  _16 = ABS_EXPR <_18>;
  _21 = ABS_EXPR <_17>;
  _22 = _16 > _21;
  if (_22 != 0)
    goto <bb 9>; [50.00%]
  else
    goto <bb 10>; [50.00%]

  <bb 9> [local count: 531502204]:
  _23 = _17 / _18;
  _24 = _17 * _23;
  _25 = _18 + _24;
  _26 = 1.0e+0 / _25;
  _27 = _23 == 0.0;
  if (_27 != 0)
    goto <bb 12>; [50.00%]
  else
    goto <bb 13>; [50.00%]

  <bb 12> [local count: 265751102]:
  _28 = _7 / _18;
  _29 = _17 * _28;
  _30 = _9 + _29;
  _31 = _26 * _30;
  _32 = _9 / _18;
  _33 = _17 * _32;
  _34 = _7 - _33;
  _35 = _26 * _34;
  _83 = _31; <----------------------- could these extra assignments be avoided?
  _84 = _35; <-----------------------|
  goto <bb 11>; [100.00%]

  <bb 13> [local count: 265751102]:
  _36 = _7 * _23;
  _37 = _9 + _36;
  _38 = _26 * _37;
  _39 = _9 * _23;
  _40 = _7 - _39;
  _41 = _26 * _40;
  _81 = _38;
  _82 = _41;

  <bb 11> [local count: 531502204]:
  # _71 = PHI <_83(12), _81(13)>
  # _72 = PHI <_84(12), _82(13)>
  _85 = _71;
  _86 = _72;
  goto <bb 8>; [100.00%]

  <bb 10> [local count: 531502204]:
  _42 = _18 / _17;
  _43 = _18 * _42;
  _44 = _17 + _43;
  _45 = 1.0e+0 / _44;
  _46 = _42 == 0.0;
  if (_46 != 0)
    goto <bb 15>; [50.00%]
  else
    goto <bb 16>; [50.00%]

  <bb 15> [local count: 265751102]:
  _47 = _9 / _17;
  _48 = _18 * _47;
  _49 = _7 + _48;
  _50 = _45 * _49;
  _51 = _7 / _17;
  _52 = _18 * _51;
  _53 = _9 - _52;
  _54 = _45 * _53;
  _77 = _50;
  _78 = _54;
  goto <bb 14>; [100.00%]

  <bb 16> [local count: 265751102]:
  _55 = _9 * _42;
  _56 = _7 + _55;
  _57 = _45 * _56;
  _58 = _7 * _42;
  _59 = _9 - _58;
  _60 = _45 * _59;
  _75 = _57;
  _76 = _60;

  <bb 14> [local count: 531502204]:
  # _73 = PHI <_77(15), _75(16)>
  # _74 = PHI <_78(15), _76(16)>
  _61 = -_74;
  _79 = _73;
  _80 = _61;

[...]

Best wishes,
Elen

gcc/ChangeLog:

2019-08-29  Elen Kalda  <elen.kalda@arm.com>

    * fold-const.c (fold_negate_const): Make the fold_negate_const function 
    non-static
    (const_binop): Implement Baudin's algorithm for complex division
    * fold-const.h (fold_negate_const): Add a fold_negate_const function 
    declaration
    * tree-complex.c (complex_div_internal_wide): New function to aid with the 
    wide complex division
    (expand_complex_div_wide): Implement Baudin's algorithm for complex 
    division
diff --git a/gcc/fold-const.h b/gcc/fold-const.h
index 54c850a3ee1f5db7c20fc8ab07ea504d634b55b8..71c1631881b693f973fa9ef94154abb02064e1c1 100644
--- a/gcc/fold-const.h
+++ b/gcc/fold-const.h
@@ -194,6 +194,7 @@ extern tree const_binop (enum tree_code, tree, tree, tree);
 extern bool negate_mathfn_p (combined_fn);
 extern const char *c_getstr (tree, unsigned HOST_WIDE_INT * = NULL);
 extern wide_int tree_nonzero_bits (const_tree);
+extern tree fold_negate_const (tree arg0, tree type);
 
 /* Return OFF converted to a pointer offset type suitable as offset for
    POINTER_PLUS_EXPR.  Use location LOC for this conversion.  */
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 0bd68b5e2d484d6f3be52b1d38be5a9f41637355..e4ea9046fbf010861b726dd742fd3834b35e80ec 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -133,7 +133,7 @@ static tree fold_binary_op_with_conditional_arg (location_t,
 						 enum tree_code, tree,
 						 tree, tree,
 						 tree, tree, int);
-static tree fold_negate_const (tree, tree);
+tree fold_negate_const (tree, tree);
 static tree fold_not_const (const_tree, tree);
 static tree fold_relational_const (enum tree_code, tree, tree, tree);
 static tree fold_convert_const (enum tree_code, tree, tree);
@@ -1387,7 +1387,9 @@ const_binop (enum tree_code code, tree arg1, tree arg2)
       tree i1 = TREE_IMAGPART (arg1);
       tree r2 = TREE_REALPART (arg2);
       tree i2 = TREE_IMAGPART (arg2);
+
       tree real, imag;
+      imag = real = NULL_TREE;
 
       switch (code)
 	{
@@ -1446,58 +1448,105 @@ const_binop (enum tree_code code, tree arg1, tree arg2)
 	    real = const_binop (code, t1, magsquared);
 	    imag = const_binop (code, t2, magsquared);
 	  }
+
+   /*
+   Keep this algorithm in sync with
+   tree-complex.c:expand_complex_div_wide().
+   Expand complex division to scalars, modified algorithm to minimize
+   overflow with wide input ranges.
+
+
+    z = x / y
+    r1 = real (x)
+    i1 = imag (x)
+    r2 = real (y)
+    i2 = imag (y)
+    real = real (z)
+    imag = imag (z)
+    */
 	  else
 	  {
-	    /* Keep this algorithm in sync with
-               tree-complex.c:expand_complex_div_wide().
-
-	       Expand complex division to scalars, modified algorithm to minimize
-	       overflow with wide input ranges.  */
 	    tree compare = fold_build2 (LT_EXPR, boolean_type_node,
 					fold_abs_const (r2, TREE_TYPE (type)),
 					fold_abs_const (i2, TREE_TYPE (type)));
 
-	    if (integer_nonzerop (compare))
+	    /* If r2 > i2 */
+	    if (!integer_nonzerop (compare))
+	    {
+	      tree ratio = const_binop (code, i2, r2);
+
+	      if (!ratio)
+		break;
+
+	      tree div = const_binop (PLUS_EXPR, r2,
+	                 const_binop (MULT_EXPR, i2, ratio));
+	      tree t = const_binop(code, build_one_cst(TREE_TYPE(div)), div);
+
+	      if (!zerop(ratio))
+	      {
+		real = const_binop (MULT_EXPR, i1, ratio);
+		real = const_binop (PLUS_EXPR, real, r1);
+		real = const_binop (MULT_EXPR, real, t);
+
+		imag = const_binop (MULT_EXPR, r1, ratio);
+		imag = const_binop (MINUS_EXPR, i1, imag);
+		imag = const_binop (MULT_EXPR, imag, t);
+	      }
+
+	      /* If ratio underflows */
+	      else
+	      {
+		tree div_i1_r2 = const_binop(code, i1, r2);
+
+		real = const_binop(MULT_EXPR, i2, div_i1_r2);
+		real = const_binop(PLUS_EXPR, real, r1);
+		real = const_binop(MULT_EXPR, real, t);
+
+		tree div_r1_r2 = const_binop(code, r1, r2);
+
+		imag = const_binop (MULT_EXPR, i2, div_r1_r2);
+		imag = const_binop (MINUS_EXPR, i1, imag);
+		imag = const_binop (MULT_EXPR, imag, t);
+	      }
+	    }
+
+	    else
+	    {
+	      tree ratio = const_binop (code, r2, i2);
+
+	      if (!ratio)
+		break;
+
+	      tree div = const_binop (PLUS_EXPR, i2,
+	                 const_binop (MULT_EXPR, r2, ratio));
+	      tree t = const_binop(code, build_one_cst(TREE_TYPE(div)), div);
+
+	      if (!zerop(ratio))
 	      {
-		/* In the TRUE branch, we compute
-		   ratio = br/bi;
-		   div = (br * ratio) + bi;
-		   tr = (ar * ratio) + ai;
-		   ti = (ai * ratio) - ar;
-		   tr = tr / div;
-		   ti = ti / div;  */
-		tree ratio = const_binop (code, r2, i2);
-		tree div = const_binop (PLUS_EXPR, i2,
-					const_binop (MULT_EXPR, r2, ratio));
 		real = const_binop (MULT_EXPR, r1, ratio);
 		real = const_binop (PLUS_EXPR, real, i1);
-		real = const_binop (code, real, div);
+		real = const_binop (MULT_EXPR, real, t);
 
 		imag = const_binop (MULT_EXPR, i1, ratio);
 		imag = const_binop (MINUS_EXPR, imag, r1);
-		imag = const_binop (code, imag, div);
+		imag = const_binop (MULT_EXPR, imag, t);
 	      }
-	    else
+
+	      else
 	      {
-		/* In the FALSE branch, we compute
-		   ratio = d/c;
-		   divisor = (d * ratio) + c;
-		   tr = (b * ratio) + a;
-		   ti = b - (a * ratio);
-		   tr = tr / div;
-		   ti = ti / div;  */
-		tree ratio = const_binop (code, i2, r2);
-		tree div = const_binop (PLUS_EXPR, r2,
-                                        const_binop (MULT_EXPR, i2, ratio));
+		tree div_r1_i2 = const_binop(code, r1, i2);
 
-		real = const_binop (MULT_EXPR, i1, ratio);
-		real = const_binop (PLUS_EXPR, real, r1);
-		real = const_binop (code, real, div);
+		real = const_binop(MULT_EXPR, r2, div_r1_i2);
+		real = const_binop(PLUS_EXPR, real, i1);
+		real = const_binop(MULT_EXPR, real, t);
 
-		imag = const_binop (MULT_EXPR, r1, ratio);
-		imag = const_binop (MINUS_EXPR, i1, imag);
-		imag = const_binop (code, imag, div);
+		tree div_i1_i2 = const_binop(code, i1, i2);
+
+		imag = const_binop (MULT_EXPR, r2, div_i1_i2);
+		imag = const_binop (MINUS_EXPR, imag, r1);
+		imag = const_binop (MULT_EXPR, imag, t);
 	      }
+	    }
 	  }
 	  break;
 
@@ -13874,7 +13923,7 @@ fold_read_from_vector (tree arg, poly_uint64 idx)
 
    TYPE is the type of the result.  */
 
-static tree
+tree
 fold_negate_const (tree arg0, tree type)
 {
   tree t = NULL_TREE;
diff --git a/gcc/tree-complex.c b/gcc/tree-complex.c
index d4b053d68e1396a98760060c4a9cae448635d1a5..106ef8dfe14ca20a5dc7f0d19d24573d88a12179 100644
--- a/gcc/tree-complex.c
+++ b/gcc/tree-complex.c
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-hasher.h"
 #include "cfgloop.h"
 #include "cfganal.h"
+#include "fold-const.h"
 
 
 /* For each complex ssa name, a lattice value.  We're interested in finding
@@ -1244,162 +1245,300 @@ expand_complex_div_straight (gimple_stmt_iterator *gsi, tree inner_type,
   update_complex_assignment (gsi, rr, ri);
 }
 
-/* Keep this algorithm in sync with fold-const.c:const_binop().
-
-   Expand complex division to scalars, modified algorithm to minimize
-   overflow with wide input ranges.  */
+// z = x / y
+// r1 := real (x)
+// i1 := imag (x)
+// r2 := real (y)
+// i2 := imag (y)
+// real := real (z)
+// imag := imag (z)
 
 static void
-expand_complex_div_wide (gimple_stmt_iterator *gsi, tree inner_type,
-			 tree ar, tree ai, tree br, tree bi,
-			 enum tree_code code)
+complex_div_internal_wide (gimple_stmt_iterator *gsi, tree *real, tree *imag, tree r1, tree i1, tree r2, tree i2, tree inner_type, enum tree_code code)
 {
-  tree rr, ri, ratio, div, t1, t2, tr, ti, compare;
+  tree ratio, t, div, underflow, div_i1_r2, div_r1_r2, temp_real, temp_imag,
+		real_loc, imag_loc;
   basic_block bb_cond, bb_true, bb_false, bb_join;
   gimple *stmt;
 
-  /* Examine |br| < |bi|, and branch.  */
-  t1 = gimplify_build1 (gsi, ABS_EXPR, inner_type, br);
-  t2 = gimplify_build1 (gsi, ABS_EXPR, inner_type, bi);
-  compare = fold_build2_loc (gimple_location (gsi_stmt (*gsi)),
-			     LT_EXPR, boolean_type_node, t1, t2);
-  STRIP_NOPS (compare);
+  ratio = gimplify_build2 (gsi, code, inner_type, i2, r2);
+  div = gimplify_build2 (gsi, PLUS_EXPR, inner_type,
+		r2, gimplify_build2 (gsi, MULT_EXPR, inner_type, i2, ratio));
+  t = gimplify_build2 (gsi, code, inner_type, build_one_cst (TREE_TYPE (div)),
+		div);
+
+  /* Check if ratio == 0 */
+  underflow = fold_build2_loc (gimple_location (gsi_stmt (*gsi)), EQ_EXPR,
+		boolean_type_node, ratio, build_zero_cst (TREE_TYPE (ratio)));
+
+  STRIP_NOPS (underflow);
 
   bb_cond = bb_true = bb_false = bb_join = NULL;
-  rr = ri = tr = ti = NULL;
-  if (TREE_CODE (compare) != INTEGER_CST)
+  div_i1_r2 = div_r1_r2 = temp_real = temp_imag = real_loc = imag_loc = NULL;
+
+  if (TREE_CODE (underflow) != INTEGER_CST)
+  {
+    edge e;
+    gimple *stmt;
+    tree cond, tmp;
+
+    tmp = make_ssa_name (boolean_type_node);
+    stmt = gimple_build_assign (tmp, underflow);
+    gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+
+    cond = fold_build2_loc (gimple_location (stmt),
+		EQ_EXPR, boolean_type_node, tmp, boolean_true_node);
+    stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
+    gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+
+    /* Split the original block, and create the TRUE and FALSE blocks.  */
+    e = split_block (gsi_bb (*gsi), stmt);
+    bb_cond = e->src;
+    bb_join = e->dest;
+    bb_true = create_empty_bb (bb_cond);
+    bb_false = create_empty_bb (bb_true);
+    bb_true->count = bb_false->count
+	= bb_cond->count.apply_probability (profile_probability::even ());
+
+    /* Wire the blocks together.  */
+    e->flags = EDGE_TRUE_VALUE;
+    /* TODO: With value profile we could add an historgram to determine real
+    branch outcome.  */
+    e->probability = profile_probability::even ();
+    redirect_edge_succ (e, bb_true);
+    edge e2 = make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
+    e2->probability = profile_probability::even ();
+    make_single_succ_edge (bb_true, bb_join, EDGE_FALLTHRU);
+    make_single_succ_edge (bb_false, bb_join, EDGE_FALLTHRU);
+    add_bb_to_loop (bb_true, bb_cond->loop_father);
+    add_bb_to_loop (bb_false, bb_cond->loop_father);
+
+    /* Update dominance info.  Note that bb_join's data was
+    updated by split_block.  */
+    if (dom_info_available_p (CDI_DOMINATORS))
     {
-      edge e;
-      gimple *stmt;
-      tree cond, tmp;
+      set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
+      set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
+    }
 
-      tmp = make_ssa_name (boolean_type_node);
-      stmt = gimple_build_assign (tmp, compare);
-      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+    temp_real = create_tmp_reg (inner_type);
+    temp_imag = create_tmp_reg (inner_type);
+  }
 
-      cond = fold_build2_loc (gimple_location (stmt),
-			  EQ_EXPR, boolean_type_node, tmp, boolean_true_node);
-      stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
-      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+  /* If ratio underflows, change the order of operations */
+  if (bb_true || integer_nonzerop (underflow))
+  {
+    if (bb_true)
+    {
+      *gsi = gsi_last_bb (bb_true);
+      gsi_insert_after (gsi, gimple_build_nop (), GSI_NEW_STMT);
+    }
 
-      /* Split the original block, and create the TRUE and FALSE blocks.  */
-      e = split_block (gsi_bb (*gsi), stmt);
-      bb_cond = e->src;
-      bb_join = e->dest;
-      bb_true = create_empty_bb (bb_cond);
-      bb_false = create_empty_bb (bb_true);
-      bb_true->count = bb_false->count
-	 = bb_cond->count.apply_probability (profile_probability::even ());
+	/* i1 / r2 */
+    div_i1_r2 = gimplify_build2 (gsi, code, inner_type, i1, r2);
+    /* i2 * div_i1_r2 */
+    real_loc = gimplify_build2 (gsi, MULT_EXPR, inner_type, i2, div_i1_r2);
+    /* r1 + (i2 * div_i1_r2) */
+    real_loc = gimplify_build2 (gsi, PLUS_EXPR, inner_type, r1, real_loc);
+    /* real = (a + i2 * div_i1_r2) * t */
+    real_loc = gimplify_build2 (gsi, MULT_EXPR, inner_type, real_loc, t);
+
+    /* r1 / r2 */
+    div_r1_r2 = gimplify_build2 (gsi, code, inner_type, r1, r2);
+    /* i2 * div_r1_r2 */
+    imag_loc = gimplify_build2 (gsi, MULT_EXPR, inner_type, i2, div_r1_r2);
+    /* i1 - (i2 * div_r1_r2) */
+    imag_loc = gimplify_build2 (gsi, MINUS_EXPR, inner_type, i1, imag_loc);
+    /* imag = (i1 - i2 * div_r1_r2) * t */
+    imag_loc = gimplify_build2 (gsi, MULT_EXPR, inner_type, imag_loc, t);
+
+    if (bb_true)
+    {
+      stmt = gimple_build_assign (temp_real, real_loc);
+      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+      stmt = gimple_build_assign (temp_imag, imag_loc);
+      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+      gsi_remove (gsi, true);
+    }
+  }
 
-      /* Wire the blocks together.  */
-      e->flags = EDGE_TRUE_VALUE;
-      /* TODO: With value profile we could add an historgram to determine real
-	 branch outcome.  */
-      e->probability = profile_probability::even ();
-      redirect_edge_succ (e, bb_true);
-      edge e2 = make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
-      e2->probability = profile_probability::even ();
-      make_single_succ_edge (bb_true, bb_join, EDGE_FALLTHRU);
-      make_single_succ_edge (bb_false, bb_join, EDGE_FALLTHRU);
-      add_bb_to_loop (bb_true, bb_cond->loop_father);
-      add_bb_to_loop (bb_false, bb_cond->loop_father);
-
-      /* Update dominance info.  Note that bb_join's data was
-         updated by split_block.  */
-      if (dom_info_available_p (CDI_DOMINATORS))
-        {
-          set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
-          set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
-        }
-
-      rr = create_tmp_reg (inner_type);
-      ri = create_tmp_reg (inner_type);
+  if (bb_false || integer_zerop (underflow))
+  {
+    if (bb_false)
+    {
+      *gsi = gsi_last_bb (bb_false);
+      gsi_insert_after (gsi, gimple_build_nop (), GSI_NEW_STMT);
     }
 
-  /* In the TRUE branch, we compute
-      ratio = br/bi;
-      div = (br * ratio) + bi;
-      tr = (ar * ratio) + ai;
-      ti = (ai * ratio) - ar;
-      tr = tr / div;
-      ti = ti / div;  */
-  if (bb_true || integer_nonzerop (compare))
+    /* i1 * ratio */
+    real_loc = gimplify_build2 (gsi, MULT_EXPR, inner_type, i1, ratio);
+    /* r1 + (i1 * ratio) */
+    real_loc = gimplify_build2 (gsi, PLUS_EXPR, inner_type, r1, real_loc);
+    /* real = (r1 + i1 * ratio) * t */
+    real_loc = gimplify_build2 (gsi, MULT_EXPR, inner_type, real_loc, t);
+
+    /* r1 * ratio */
+    imag_loc = gimplify_build2 (gsi, MULT_EXPR, inner_type, r1, ratio);
+    /* i1 - (r1 * ratio) */
+    imag_loc = gimplify_build2 (gsi, MINUS_EXPR, inner_type, i1, imag_loc);
+    /* imag = (i1 - r1 * ratio) * t */
+    imag_loc = gimplify_build2 (gsi, MULT_EXPR, inner_type, imag_loc, t);
+
+    if (bb_false)
     {
-      if (bb_true)
-	{
-	  *gsi = gsi_last_bb (bb_true);
-	  gsi_insert_after (gsi, gimple_build_nop (), GSI_NEW_STMT);
-	}
+      stmt = gimple_build_assign (temp_real, real_loc);
+      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+      stmt = gimple_build_assign (temp_imag, imag_loc);
+      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+      gsi_remove (gsi, true);
+    }
+  }
 
-      ratio = gimplify_build2 (gsi, code, inner_type, br, bi);
+  if (bb_join)
+  {
+    *gsi = gsi_start_bb (bb_join);
+    *real = temp_real;
+    *imag = temp_imag;
+  }
+
+  else if (TREE_CODE (underflow) == INTEGER_CST)
+  {
+    *real = real_loc;
+    *imag = imag_loc;
+  }
+}
 
-      t1 = gimplify_build2 (gsi, MULT_EXPR, inner_type, br, ratio);
-      div = gimplify_build2 (gsi, PLUS_EXPR, inner_type, t1, bi);
 
-      t1 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ar, ratio);
-      tr = gimplify_build2 (gsi, PLUS_EXPR, inner_type, t1, ai);
+/* Keep this algorithm in sync with fold-const.c:const_binop().
 
-      t1 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ai, ratio);
-      ti = gimplify_build2 (gsi, MINUS_EXPR, inner_type, t1, ar);
+   Expand complex division to scalars, modified algorithm to minimize
+   overflow with wide input ranges.
+
+   z = x / y
+   r1 := real (x)
+   i1 := imag (x)
+   r2 := real (y)
+   i2 := imag (y)
+   real := real (z)
+   imag := imag (z)
+*/
+static void
+expand_complex_div_wide (gimple_stmt_iterator *gsi, tree inner_type, tree r1,
+		  tree i1, tree r2, tree i2, enum tree_code code)
+{
+  tree temp_reg_real, temp_reg_imag, abs_r2, abs_i2, real, imag, compare;
+  basic_block bb_cond, bb_true, bb_false, bb_join;
+  gimple *stmt;
 
-      tr = gimplify_build2 (gsi, code, inner_type, tr, div);
-      ti = gimplify_build2 (gsi, code, inner_type, ti, div);
+  /* Examine |r2| < |i2|, and branch.  */
+  abs_r2 = gimplify_build1 (gsi, ABS_EXPR, inner_type, r2);
+  abs_i2 = gimplify_build1 (gsi, ABS_EXPR, inner_type, i2);
+  compare = fold_build2_loc (gimple_location (gsi_stmt (*gsi)),
+		LT_EXPR, boolean_type_node, abs_i2, abs_r2);
+  STRIP_NOPS (compare);
 
-     if (bb_true)
-       {
-	 stmt = gimple_build_assign (rr, tr);
-	 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
-	 stmt = gimple_build_assign (ri, ti);
-	 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
-	 gsi_remove (gsi, true);
-       }
+  bb_cond = bb_true = bb_false = bb_join = NULL;
+  temp_reg_real = temp_reg_imag = real = imag = NULL;
+  if (TREE_CODE (compare) != INTEGER_CST)
+  {
+    edge e;
+    gimple *stmt;
+    tree cond, tmp;
+
+    tmp = make_ssa_name (boolean_type_node);
+    stmt = gimple_build_assign (tmp, compare);
+    gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+
+    cond = fold_build2_loc (gimple_location (stmt),
+		EQ_EXPR, boolean_type_node, tmp, boolean_true_node);
+    stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
+    gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+
+    /* Split the original block, and create the TRUE and FALSE blocks.  */
+    e = split_block (gsi_bb (*gsi), stmt);
+    bb_cond = e->src;
+    bb_join = e->dest;
+    bb_true = create_empty_bb (bb_cond);
+    bb_false = create_empty_bb (bb_true);
+    bb_true->count = bb_false->count
+	 = bb_cond->count.apply_probability (profile_probability::even ());
+
+    /* Wire the blocks together.  */
+    e->flags = EDGE_TRUE_VALUE;
+    /* TODO: With value profile we could add an historgram to determine real
+	     branch outcome.  */
+    e->probability = profile_probability::even ();
+    redirect_edge_succ (e, bb_true);
+    edge e2 = make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
+    e2->probability = profile_probability::even ();
+    make_single_succ_edge (bb_true, bb_join, EDGE_FALLTHRU);
+    make_single_succ_edge (bb_false, bb_join, EDGE_FALLTHRU);
+    add_bb_to_loop (bb_true, bb_cond->loop_father);
+    add_bb_to_loop (bb_false, bb_cond->loop_father);
+
+    /* Update dominance info.  Note that bb_join's data was
+       updated by split_block.  */
+    if (dom_info_available_p (CDI_DOMINATORS))
+    {
+      set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
+      set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
     }
 
-  /* In the FALSE branch, we compute
-      ratio = d/c;
-      divisor = (d * ratio) + c;
-      tr = (b * ratio) + a;
-      ti = b - (a * ratio);
-      tr = tr / div;
-      ti = ti / div;  */
-  if (bb_false || integer_zerop (compare))
+    temp_reg_real = create_tmp_reg (inner_type);
+    temp_reg_imag = create_tmp_reg (inner_type);
+  }
+
+  if (bb_true || integer_nonzerop (compare))
+  {
+    if (bb_true)
     {
-      if (bb_false)
-	{
-	  *gsi = gsi_last_bb (bb_false);
-	  gsi_insert_after (gsi, gimple_build_nop (), GSI_NEW_STMT);
-	}
+      *gsi = gsi_last_bb (bb_true);
+      gsi_insert_after (gsi, gimple_build_nop (), GSI_NEW_STMT);
+    }
 
-      ratio = gimplify_build2 (gsi, code, inner_type, bi, br);
+    complex_div_internal_wide (gsi, &real, &imag, r1, i1, r2, i2, inner_type,
+		code);
 
-      t1 = gimplify_build2 (gsi, MULT_EXPR, inner_type, bi, ratio);
-      div = gimplify_build2 (gsi, PLUS_EXPR, inner_type, t1, br);
+    if (bb_true)
+    {
+      stmt = gimple_build_assign (temp_reg_real, real);
+      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+      stmt = gimple_build_assign (temp_reg_imag, imag);
+      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+      gsi_remove (gsi, true);
+    }
+  }
 
-      t1 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ai, ratio);
-      tr = gimplify_build2 (gsi, PLUS_EXPR, inner_type, t1, ar);
 
-      t1 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ar, ratio);
-      ti = gimplify_build2 (gsi, MINUS_EXPR, inner_type, ai, t1);
+  if (bb_false || integer_zerop (compare))
+  {
+    if (bb_false)
+    {
+      *gsi = gsi_last_bb (bb_false);
+      gsi_insert_after (gsi, gimple_build_nop (), GSI_NEW_STMT);
+    }
 
-      tr = gimplify_build2 (gsi, code, inner_type, tr, div);
-      ti = gimplify_build2 (gsi, code, inner_type, ti, div);
+    complex_div_internal_wide (gsi, &real, &imag, i1, r1, i2, r2, inner_type,
+		code);
+    imag = gimplify_build1 (gsi, NEGATE_EXPR, inner_type, imag);
 
-     if (bb_false)
-       {
-	 stmt = gimple_build_assign (rr, tr);
-	 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
-	 stmt = gimple_build_assign (ri, ti);
-	 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
-	 gsi_remove (gsi, true);
-       }
+    if (bb_false)
+    {
+      stmt = gimple_build_assign (temp_reg_real, real);
+      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+      stmt = gimple_build_assign (temp_reg_imag, imag);
+      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+      gsi_remove (gsi, true);
     }
+  }
 
   if (bb_join)
     *gsi = gsi_start_bb (bb_join);
   else
-    rr = tr, ri = ti;
+    temp_reg_real = real,
+    temp_reg_imag = imag;
 
-  update_complex_assignment (gsi, rr, ri);
+  update_complex_assignment (gsi, temp_reg_real, temp_reg_imag);
 }
 
 /* Expand complex division to scalars.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]