This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

using scratchpads to enhance RTL-level if-conversion: the new patch now passes bootstrap with the default BUILD_CONFIG [i.e. no stage2-to-stage3 comparison errors even with debugging info off in stage2 and on in stage3] AND passes "make check" testing with no new regressions


Dear all,

My new patch now passes bootstrap with the default BUILD_CONFIG [i.e. no stage2-to-stage3
comparison errors even with debugging info off in stage2 and on in stage3]
_and_ passes "make check" testing with no new regressions.
As before, this testing was done on AMD64-AKA-"x86_64" GNU/Linux [Ubuntu 14.04.3 LTS].
I tested my patch on top of the following commit, which the below is now relative to:

  commit d36ce389eff2bce98ffe3384b969b4b5fc20c716
  Author: ville <ville@138bc75d-0d04-0410-961f-82ee72b054a4>
  Date:   Thu Oct 1 19:22:08 2015 +0000


As mentioned earlier, the current scratchpad allocation algorithm is
neither perfect nor necessarily final.  I have already discussed
this locally with my colleagues and found several alternatives.
I expect I will discuss this with the community.

My thanks to everybody who helped me to fix the recent bugs, and my thanks
to Kyrill for his RTL if-conversion work, upon which this work is based.

I look forward to your feedback.

Regards,

Abe










---------------------------------










 gcc/ChangeLog |  14 ++++
 gcc/ifcvt.c   | 253 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 256 insertions(+), 11 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 91b417a..89e79e1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+ChangeLog:
+
+2015-10-02  Abe Skolnik  <a.skolnik@samsung.com>
+
+	* ifcvt.c
+	(if_convert): Ensure the scratchpad-support variables are cleared upon
+	  each entry into the RTL if-converter.
+	(noce_mem_write_may_trap_or_fault_considering_scratchpad_support_p): New.
+	(noce_mem_write_may_trap_or_fault_p): Moved most of the functionality
+	  to "noce_mem_write_may_trap_or_fault_considering_scratchpad_support_p",
+	  rewrote this one as a wrapper that preserves the old interface.
+	(noce_process_if_block):  Added support for scratchpad-based writes to
+	  nonlocal/nonscalar variables inside half hammocks.
+
 2015-10-01  Sebastian Pop  <s.pop@samsung.com>
 	    Aditya Kumar  <aditya.k7@samsung.com>

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 7ab738e..f52bcfb 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -47,6 +47,7 @@
 #include "insn-codes.h"
 #include "optabs.h"
 #include "diagnostic-core.h"
+#include "diagnostic-color.h"
 #include "tm_p.h"
 #include "cfgloop.h"
 #include "target.h"
@@ -56,6 +57,8 @@
 #include "rtl-iter.h"
 #include "ifcvt.h"

+#include <utility>
+
 #ifndef MAX_CONDITIONAL_EXECUTE
 #define MAX_CONDITIONAL_EXECUTE \
   (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
@@ -66,6 +69,9 @@

 #define NULL_BLOCK	((basic_block) NULL)

+/* An arbitrary upper bound; it should never be *nearly* this big.  */
+#define SCRATCHPADS_INCLUSIVE_MAX_SIZE_IN_BYTES 128
+
 /* True if after combine pass.  */
 static bool ifcvt_after_combine;

@@ -110,6 +116,10 @@ static int dead_or_predicable (basic_block, basic_block, basic_block,
 			       edge, int);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
+
+static auto_vec<std::pair<rtx, unsigned int> > scratchpads;
+static rtx biggest_scratchpad_rtx;
+static size_t size_of_biggest_scratchpad;
 
 /* Count the number of non-jump active insns in BB.  */

@@ -2828,18 +2838,28 @@ noce_operand_ok (const_rtx op)
   return ! may_trap_p (op);
 }

-/* Return true if a write into MEM may trap or fault.  */
+/* Return true if a write into MEM may trap or fault.
+   Pass in "true" to SCRATCHPADS_ENABLED
+   if/when/where asking with regard to a
+   scratchpad-based if-conversion, "false" otherwise;
+   "noce_mem_write_may_trap_or_fault_p", below,
+   preserves the pre-scratchpad-enhancements behavior
+   of the older function with the same name,
+   which used to contain most of what is now in
+   "noce_mem_write_may_trap_or_fault_considering_scratchpad_support_p" */

 static bool
-noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+noce_mem_write_may_trap_or_fault_considering_scratchpad_support_p
+  (const_rtx mem, bool scratchpads_enabled)
 {
   rtx addr;

   if (MEM_READONLY_P (mem))
     return true;

-  if (may_trap_or_fault_p (mem))
-    return true;
+  if (! scratchpads_enabled)
+    if (may_trap_or_fault_p (mem))
+      return true;

   addr = XEXP (mem, 0);

@@ -2881,6 +2901,18 @@ noce_mem_write_may_trap_or_fault_p (const_rtx mem)
   return false;
 }

+ /* The following function preserves the pre-scratchpad-enhancements
+    behavior of the older function with the same name,
+    which used to contain most of what is now in
+    "noce_mem_write_may_trap_or_fault_considering_scratchpad_support_p".  */
+
+static bool
+noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+{
+  return noce_mem_write_may_trap_or_fault_considering_scratchpad_support_p
+    (mem, false);
+}
+
 /* Return whether we can use store speculation for MEM.  TOP_BB is the
    basic block above the conditional block where we are considering
    doing the speculative store.  We look for whether MEM is set
@@ -3150,6 +3182,7 @@ noce_process_if_block (struct noce_if_info *if_info)
     }

   /* Don't operate on sources that may trap or are volatile.  */
+  /* This will probably need to be altered for scratchpad-based loads.  */
   if (! noce_operand_ok (a) || ! noce_operand_ok (b))
     return FALSE;

@@ -3200,17 +3233,209 @@ noce_process_if_block (struct noce_if_info *if_info)

   if (!set_b && MEM_P (orig_x))
     {
-      /* Disallow the "if (...) x = a;" form (implicit "else x = x;")
-	 for optimizations if writing to x may trap or fault,
-	 i.e. it's a memory other than a static var or a stack slot,
-	 is misaligned on strict aligned machines or is read-only.  If
-	 x is a read-only memory, then the program is valid only if we
+      /* Disallow the "if (...) x = a;" form (with no "else") for optimizations
+	 when x is misaligned on strict-alignment machines or is read-only.
+	 If x is a memory other than a static var or a stack slot: for targets
+	 _with_ conditional move and _without_ conditional execution,
+	 convert using the scratchpad technique, otherwise don`t convert.
+	 If x is a read-only memory, then the program is valid only if we
 	 avoid the store into it.  If there are stores on both the
 	 THEN and ELSE arms, then we can go ahead with the conversion;
 	 either the program is broken, or the condition is always
-	 false such that the other memory is selected.  */
+	 false such that the other memory is selected.  The non-scratchpad-based
+	 conversion here has an implicit "else x = x;". */
       if (noce_mem_write_may_trap_or_fault_p (orig_x))
-	return FALSE;
+	{
+	  /* The next "if": quoting "noce_emit_cmove":
+	       If we can't create new pseudos, though, don't bother.  */
+	  if (reload_completed)
+	  {
+	    return FALSE;
+	  }
+
+	  if (optimize<2)
+	  {
+	    return FALSE;
+	  }
+
+	  if (optimize_function_for_size_p (cfun))
+	  { /* reminder: "cfun" is a global,
+	       decl.d as "extern" in "function.h" and def.d in "function.c" */
+	    return FALSE;
+	  }
+
+	  if (targetm.have_conditional_execution () || ! HAVE_conditional_move)
+	    {
+	      return FALSE;
+	    }
+
+	  const bool not_a_scratchpad_candidate =
+	    noce_mem_write_may_trap_or_fault_considering_scratchpad_support_p
+	    (orig_x, true);
+
+	  if (! not_a_scratchpad_candidate)
+	  {
+
+	    if (MEM_SIZE_KNOWN_P (orig_x))
+	    {
+	      const size_t size_of_MEM_operand = MEM_SIZE (orig_x);
+
+	      if (size_of_MEM_operand<=SCRATCHPADS_INCLUSIVE_MAX_SIZE_IN_BYTES)
+	      {
+
+		if (size_of_MEM_operand > size_of_biggest_scratchpad)
+		{
+		  size_of_biggest_scratchpad = size_of_MEM_operand;
+		  biggest_scratchpad_rtx = assign_stack_local
+		    (GET_MODE (orig_x), size_of_MEM_operand, 0);
+		    /* 0: align acc. to the machine mode indicated by
+			  "GET_MODE (orig_x)" */
+		  gcc_assert (biggest_scratchpad_rtx);
+		  scratchpads.safe_push (std::make_pair (biggest_scratchpad_rtx,
+							 size_of_MEM_operand));
+		}
+
+		gcc_assert (biggest_scratchpad_rtx);
+
+		rtx reg_for_address_to_which_to_store = gen_reg_rtx (Pmode);
+		set_used_flags (reg_for_address_to_which_to_store);
+
+		start_sequence ();
+
+		/* We must copy the insn.s between {the start of the THEN block}
+		   and {the set of 'a'}, if they exist, since they may be needed
+		   for the converted code as well, but we must not copy a
+		   start-of-BB note if one is present, nor debug "insn"s.  */
+		rtx_insn* insn_to_maybe_duplicate;
+		/* Only removing a NOTE_INSN_BASIC_BLOCK when it`s the first
+		   insn in the BB caused trouble with bootstrapping using the
+		   default value of "bootstrap-debug" for "BUILD_CONFIG",
+		   since the object files produced by stage2 & stage3 differed
+		   from each other even _after_ stripping off debug info;
+		   Abe`s conclusion: the debug info is sometimes the 1st "insn"
+		   in the BB, with the NOTE_INSN_BASIC_BLOCK following it!  */
+
+		for
+		  (insn_to_maybe_duplicate = BB_HEAD (then_bb);
+		   insn_to_maybe_duplicate &&
+		     (insn_to_maybe_duplicate != insn_a) &&
+		     (insn_to_maybe_duplicate != BB_END (then_bb));
+		   insn_to_maybe_duplicate=NEXT_INSN (insn_to_maybe_duplicate))
+		  {
+		     if (! (NOTE_INSN_BASIC_BLOCK_P (insn_to_maybe_duplicate))
+		            || (DEBUG_INSN_P (insn_to_maybe_duplicate)))
+		       {
+			 duplicate_insn_chain
+			   (insn_to_maybe_duplicate, insn_to_maybe_duplicate);
+			 /* A return of 0 from "duplicate_insn_chain" is
+			    _not_ a failure, as far as Abe knows;
+			    it just seems to return the
+			    "NEXT_INSN" of the last insn it duplicated.  */
+		       }
+		  }
+
+		/* Done copying the insn.s between
+		     {the start of the THEN block}
+		   and
+		     {the set of 'a'},
+		   if any.  */
+
+		if (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1)))
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+
+		/* WARNING: "noce_emit_cmove" takes its resulting-expr. param.s
+			    in {false, true} order, whereas
+			    "gen_rtx_IF_THEN_ELSE" takes them the other way.  */
+
+		/* IMPORTANT NOTE: _intentionally_ not reversing the positions
+		   of "XEXP (orig_x,0)" and "XEXP (biggest_scratchpad_rtx, 0)"
+		   when "if_info->then_else_reversed" is true:
+		   when that flag is true, code that has already run
+		   by this point has _already_ inverted "if_info->cond",
+		   which is the source for the local "cond".  */
+
+		rtx target = noce_emit_cmove (if_info,
+					      reg_for_address_to_which_to_store,
+					      GET_CODE (cond),
+					      XEXP (cond, 0),
+					      XEXP (cond, 1),
+					      XEXP (orig_x, 0),
+					      XEXP (biggest_scratchpad_rtx, 0));
+
+		if (!target)
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+		if (target != reg_for_address_to_which_to_store)
+		  noce_emit_move_insn
+		    (reg_for_address_to_which_to_store, target);
+
+		/* --- start of building the new MEM --- */
+		/* Most or all of the code for building the new MEM is Abe
+		   mimicking the MEM-related code in "noce_try_cmove_arith".  */
+		rtx mem_rtx = gen_rtx_MEM (GET_MODE (orig_x),
+		  reg_for_address_to_which_to_store);
+		MEM_NOTRAP_P (mem_rtx) = true;
+		MEM_VOLATILE_P (mem_rtx) = MEM_VOLATILE_P (orig_x);
+
+		alias_set_type temp_alias_set = new_alias_set ();
+		if (MEM_ALIAS_SET (orig_x))
+		  record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set);
+		set_mem_alias_set (mem_rtx, temp_alias_set);
+
+
+		set_mem_align (mem_rtx,
+		  MIN (MEM_ALIGN (biggest_scratchpad_rtx), MEM_ALIGN (orig_x)));
+
+		if (MEM_ADDR_SPACE (orig_x) !=
+		    MEM_ADDR_SPACE (biggest_scratchpad_rtx))
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+
+		set_used_flags (mem_rtx);
+
+		/* --- end of building the new MEM --- */
+
+		noce_emit_move_insn (mem_rtx, a);
+
+		/* Abe`s note: do we need to do the following after getting
+		   a new pseudo-reg., as shown elsewhere in this file?
+		  if (max_regno < max_reg_num ())  max_regno = max_reg_num ();
+		*/
+
+		rtx_insn *seq = end_ifcvt_sequence (if_info);
+		if (!seq)
+		  {
+		    return FALSE;
+		  }
+		else
+		  {
+		    unshare_all_rtl_in_chain (seq);
+
+		    /* Prevent the code right after "success:"
+		       from throwing away the changes.  */
+		    x = orig_x;
+
+		    emit_insn_before_setloc (seq, if_info->jump,
+					     INSN_LOCATION (if_info->insn_a));
+
+		    goto success;
+		  }
+
+	      } /* (size_of_MEM_operand <=
+				     SCRATCHPADS_INCLUSIVE_MAX_SIZE_IN_BYTES) */
+	    }
+
+	  }
+
+	  return FALSE;
+	}

       /* Avoid store speculation: given "if (...) x = a" where x is a
 	 MEM, we only want to do the store if x is always set
@@ -5000,9 +5225,15 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb,
 static void
 if_convert (bool after_combine)
 {
+
   basic_block bb;
   int pass;

+  /* Ensure that we start the scratchpads vec fresh each time.  */
+  scratchpads.truncate (0);
+  size_of_biggest_scratchpad = 0;
+  biggest_scratchpad_rtx = 0; /* Reminder: an "rtx", therefore a pointer.  */
+
   if (optimize == 1)
     {
       df_live_add_problem ();
--


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]