[PATCH][PR90106] Builtin call transformation changes in cdce pass

JunMa JunMa@linux.alibaba.com
Wed May 8 10:09:00 GMT 2019


Hi

As PR90106 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90106),
when gcc meets builtin function call like:

   y = sqrt (x);

The cdce pass tries to transform the call into an internal function
call and conditionally executes call with a simple range check on the
arguments which can detect most cases and the errno does not need
to be set. It looks like:

   y = IFN_SQRT (x);
   if (__builtin_isless (x, 0))
     sqrt (x);

However, If the call is in tail position, for example:

   y =  sqrt (x);
   return y;

will become:

   y = IFN_SQRT (x);
   if (__builtin_isless (x, 0))
     sqrt (x);
   return y;

This transformation breaks tailcall pattern, and prevents
later tailcall optimizations.

So This patch transform builtin call with return value into
if-then-else part, which looks like:

   y =  sqrt (x);
    ==>
   if (__builtin_isless (x, 0))
     y = sqrt (x);
   else
     y = IFN_SQRT (x);

BTW, y = sqrt (x) can also transform like:

   y = IFN_SQRT (x);
   if (__builtin_isless (x, 0))
     y = sqrt (x);

We don‘t choose this pattern because it emits worse assemble
code(more move instruction and use more registers) in x86_64.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

Regards
JunMa


gcc/ChangeLog

2019-05-07  Jun Ma <JunMa@linux.alibaba.com>

     PR tree-optimization/90106
     * tree-call-cdce.c (shrink_wrap_one_built_in_call_with_conds): Add
     new parameter as new internal function call, also move it to new
     basic block.
     (use_internal_fn): Pass internal function call to
     shrink_wrap_one_built_in_call_with_conds.

gcc/testsuite/ChangeLog

2019-05-07  Jun Ma <JunMa@linux.alibaba.com>

     PR tree-optimization/90106
     * gcc.dg/cdce1.c: Check tailcall code generation after cdce pass.
     * gcc.dg/cdce2.c: Likewise.

-------------- next part --------------
---
 gcc/testsuite/gcc.dg/cdce1.c |  3 +-
 gcc/testsuite/gcc.dg/cdce2.c |  3 +-
 gcc/tree-call-cdce.c         | 90 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/cdce1.c b/gcc/testsuite/gcc.dg/cdce1.c
index b23ad63..424d80f 100644
--- a/gcc/testsuite/gcc.dg/cdce1.c
+++ b/gcc/testsuite/gcc.dg/cdce1.c
@@ -1,7 +1,8 @@
 /* { dg-do  run  } */
 /* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details  -lm" } */
 /* { dg-require-effective-target int32plus } */
-/* { dg-final { scan-tree-dump  "cdce1.c:16: .* function call is shrink-wrapped into error conditions\."  "cdce" } } */
+/* { dg-final { scan-tree-dump  "cdce1.c:17: .* function call is shrink-wrapped into error conditions\."  "cdce" } } */
+/* { dg-final { scan-assembler     "jmp pow" } } */
 /* { dg-require-effective-target large_double } */
 
 #include <stdlib.h>
diff --git a/gcc/testsuite/gcc.dg/cdce2.c b/gcc/testsuite/gcc.dg/cdce2.c
index 30e7cb1..2af2893 100644
--- a/gcc/testsuite/gcc.dg/cdce2.c
+++ b/gcc/testsuite/gcc.dg/cdce2.c
@@ -1,7 +1,8 @@
 /* { dg-do  run  } */
 /* { dg-skip-if "doubles are floats" { "avr-*-*" } } */
 /* { dg-options "-O2 -fmath-errno -fdump-tree-cdce-details  -lm" } */
-/* { dg-final { scan-tree-dump  "cdce2.c:15: .* function call is shrink-wrapped into error conditions\." "cdce" } } */
+/* { dg-final { scan-tree-dump  "cdce2.c:16: .* function call is shrink-wrapped into error conditions\." "cdce" } } */
+/* { dg-final { scan-assembler "jmp log" } } */
  
 #include <stdlib.h>
 #include <math.h>
diff --git a/gcc/tree-call-cdce.c b/gcc/tree-call-cdce.c
index 2e482b3..9e3372f 100644
--- a/gcc/tree-call-cdce.c
+++ b/gcc/tree-call-cdce.c
@@ -93,10 +93,10 @@ along with GCC; see the file COPYING3.  If not see
 
 	y = sqrt (x);
      ==>
-	y = IFN_SQRT (x);
 	if (__builtin_isless (x, 0))
-	    sqrt (x);
-
+	  y =  sqrt (x);
+	else
+	  y = IFN_SQRT (x);
      In the vast majority of cases we should then never need to call sqrt.
 
    Note that library functions are not supposed to clear errno to zero without
@@ -793,14 +793,16 @@ gen_shrink_wrap_conditions (gcall *bi_call, vec<gimple *> conds,
 }
 
 /* Shrink-wrap BI_CALL so that it is only called when one of the NCONDS
-   conditions in CONDS is false.  */
+   conditions in CONDS is false.  Also move BI_NEWCALL to a new basic block
+   when it is non-null, it is called while all of the CONDS are true.  */
 
 static void
 shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
-					  unsigned int nconds)
+					  unsigned int nconds,
+					  gcall *bi_newcall = NULL)
 {
   gimple_stmt_iterator bi_call_bsi;
-  basic_block bi_call_bb, join_tgt_bb, guard_bb;
+  basic_block bi_call_bb, bi_newcall_bb, join_tgt_bb, guard_bb;
   edge join_tgt_in_edge_from_call, join_tgt_in_edge_fall_thru;
   edge bi_call_in_edge0, guard_bb_in_edge;
   unsigned tn_cond_stmts;
@@ -809,27 +811,26 @@ shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
   gimple *cond_expr_start;
 
   /* The cfg we want to create looks like this:
-
-	   [guard n-1]         <- guard_bb (old block)
-	     |    \
-	     | [guard n-2]                   }
-	     |    / \                        }
-	     |   /  ...                      } new blocks
-	     |  /  [guard 0]                 }
-	     | /    /   |                    }
-	    [ call ]    |     <- bi_call_bb  }
-	     | \        |
-	     |  \       |
-	     |   [ join ]     <- join_tgt_bb (old iff call must end bb)
-	     |
+          [guard n-1]         <- guard_bb (old block)
+            |    \
+            | [guard n-2]                   }
+            |    / \                        }
+            |   /  ...                      } new blocks
+            |  /  [guard 0]                 }
+            | /  /    |                     }
+           [call]     |      <- bi_call_bb  }
+             \    [newcall]  <-bi_newcall_bb}
+              \       |
+                [join]       <- join_tgt_bb (old iff call must end bb)
 	 possible EH edges (only if [join] is old)
 
      When [join] is new, the immediate dominators for these blocks are:
 
      1. [guard n-1]: unchanged
      2. [call]: [guard n-1]
-     3. [guard m]: [guard m+1] for 0 <= m <= n-2
-     4. [join]: [guard n-1]
+     3. [newcall]: [guard 0]
+     4. [guard m]: [guard m+1] for 0 <= m <= n-2
+     5. [join]: [guard n-1]
 
      We punt for the more complex case case of [join] being old and
      simply free the dominance info.  We also punt on postdominators,
@@ -927,6 +928,47 @@ shrink_wrap_one_built_in_call_with_conds (gcall *bi_call, vec <gimple *> conds,
       edges.quick_push (edge_pair (bi_call_in_edge, guard_bb_in_edge));
     }
 
+  /* Move BI_NEWCALL to new basic block when it is non-null.  */
+  if (bi_newcall)
+    {
+      /* Get bi_newcall_bb by split join_tgt_in_edge_fall_thru edge,
+         and move BI_NEWCALL to bi_newcall_bb.  */
+      bi_newcall_bb = split_edge (join_tgt_in_edge_fall_thru);
+      gimple_stmt_iterator to_gsi = gsi_start_bb (bi_newcall_bb);
+      gimple_stmt_iterator from_gsi = gsi_for_stmt (bi_newcall);
+      gsi_move_before (&from_gsi, &to_gsi);
+      join_tgt_in_edge_fall_thru = EDGE_SUCC (bi_newcall_bb, 0);
+      join_tgt_bb = join_tgt_in_edge_fall_thru->dest;
+
+      tree bi_newcall_lhs = gimple_call_lhs (bi_newcall);
+      tree bi_call_lhs = gimple_call_lhs (bi_call);
+      if (!bi_call_lhs)
+        {
+          bi_call_lhs = copy_ssa_name (bi_newcall_lhs);
+          gimple_call_set_lhs (bi_call, bi_call_lhs);
+          SSA_NAME_DEF_STMT (bi_call_lhs) = bi_call;
+        }
+
+      /* Create phi node for lhs of BI_CALL and BI_NEWCALL.  */
+      gphi *new_phi = create_phi_node (copy_ssa_name (bi_newcall_lhs),
+				       join_tgt_bb);
+      SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (new_phi))
+        = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (bi_newcall_lhs);
+      add_phi_arg (new_phi, bi_call_lhs, join_tgt_in_edge_from_call,
+                   gimple_location (bi_call));
+      add_phi_arg (new_phi, bi_newcall_lhs, join_tgt_in_edge_fall_thru,
+                   gimple_location (bi_newcall));
+
+      /* Replace all use of original return value with result of phi node.  */
+      use_operand_p use_p;
+      gimple *use_stmt;
+      imm_use_iterator iterator;
+      FOR_EACH_IMM_USE_STMT (use_stmt, iterator, bi_newcall_lhs)
+        if (use_stmt != new_phi)
+	  FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
+	    SET_USE (use_p, PHI_RESULT (new_phi));
+    }
+
   /* Now update the probability and profile information, processing the
      guards in order of execution.
 
@@ -1030,9 +1072,11 @@ use_internal_fn (gcall *call)
 
   unsigned nconds = 0;
   auto_vec<gimple *, 12> conds;
+  bool is_arg_conds = false;
   if (can_test_argument_range (call))
     {
       gen_shrink_wrap_conditions (call, conds, &nconds);
+      is_arg_conds = true;
       gcc_assert (nconds != 0);
     }
   else
@@ -1082,8 +1126,8 @@ use_internal_fn (gcall *call)
 	  call = new_call;
 	}
     }
-
-  shrink_wrap_one_built_in_call_with_conds (call, conds, nconds);
+  shrink_wrap_one_built_in_call_with_conds (call, conds, nconds,
+					    is_arg_conds ? new_call : NULL);
 }
 
 /* The top level function for conditional dead code shrink
-- 
1.8.3.1



More information about the Gcc-patches mailing list