using scratchpads to enhance RTL-level if-conversion: revised patch

Abe abe_skolnik@yahoo.com
Tue Oct 27 23:02:00 GMT 2015


Dear all,

Thanks for all your feedback.  I have integrated as much of it as I could in the available time.

The revised patch now allows the target to decide when to use scratchpads,
the global default being to do so when the profile data indicates it is probably good to do so.

Bootstrapped and make-checked on x86_64 GNU/Linux.

Regards,

Abe








2015-10-06  Abe  <abe_skolnik@yahoo.com>

         * ifcvt.c (noce_mem_write_may_go_wrong_even_with_scratchpads): New.
         (noce_mem_write_may_trap_or_fault_p_1): New.
         (noce_mem_write_may_trap_or_fault_p): Moved most of it into
         "noce_mem_write_may_trap_or_fault_p_1".
         (noce_process_if_block): Added scratchpad-based support for if-converting
         half hammocks with writes to destinations GCC says may "trap or fault".
         (if_convert): Ensure the scratchpad-support vector is cleared upon
         each entry into the RTL if-converter.
         * target.def (RTL_ifcvt_when_to_use_scratchpads): New DEFHOOKPOD.
         * target.h: New enum named "RTL_ifcvt_when_to_use_scratchpads".






 From c24c9b7528a2865efe49e5abb812bfbbac1a5e1c Mon Sep 17 00:00:00 2001
From: Abe <abe_skolnik@yahoo.com>
Date: Tue, 27 Oct 2015 17:12:44 -0500
Subject: [PATCH] Adding my RTL ifcvt work on top of Ville`s upstream commit.

---
  gcc/ifcvt.c    | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
  gcc/target.def |   5 ++
  gcc/target.h   |   6 ++
  3 files changed, 160 insertions(+), 20 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 7ab738e..fb80ee9 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -56,6 +56,8 @@
  #include "rtl-iter.h"
  #include "ifcvt.h"

+#include <utility>
+
  #ifndef MAX_CONDITIONAL_EXECUTE
  #define MAX_CONDITIONAL_EXECUTE \
    (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
@@ -66,6 +68,9 @@

  #define NULL_BLOCK	((basic_block) NULL)

+/* An arbitrary inclusive maximum size (in bytes) for each scratchpad.  */
+#define SCRATCHPAD_MAX_SIZE 128
+
  /* True if after combine pass.  */
  static bool ifcvt_after_combine;

@@ -110,6 +115,8 @@ static int dead_or_predicable (basic_block, basic_block, basic_block,
  			       edge, int);
  static void noce_emit_move_insn (rtx, rtx);
  static rtx_insn *block_has_only_trap (basic_block);
+
+static auto_vec<std::pair<rtx, unsigned int> > scratchpads;
  

  /* Count the number of non-jump active insns in BB.  */

@@ -2828,19 +2835,16 @@ noce_operand_ok (const_rtx op)
    return ! may_trap_p (op);
  }

-/* Return true if a write into MEM may trap or fault.  */
+/* Return true if a write into MEM may trap or fault
+   even in the presence of scratchpad support.  */

  static bool
-noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem)
  {
-  rtx addr;
-
    if (MEM_READONLY_P (mem))
      return true;

-  if (may_trap_or_fault_p (mem))
-    return true;
-
+  rtx addr;
    addr = XEXP (mem, 0);

    /* Call target hook to avoid the effects of -fpic etc....  */
@@ -2881,6 +2885,18 @@ noce_mem_write_may_trap_or_fault_p (const_rtx mem)
    return false;
  }

+/* Return true if a write into MEM may trap or fault
+   without scratchpad support.  */
+
+static bool
+noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+{
+  if (may_trap_or_fault_p (mem))
+    return true;
+
+  return noce_mem_write_may_trap_or_fault_p_1 (mem);
+}
+
  /* Return whether we can use store speculation for MEM.  TOP_BB is the
     basic block above the conditional block where we are considering
     doing the speculative store.  We look for whether MEM is set
@@ -3031,7 +3047,7 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
     at converting the block.  */

  static int
-noce_process_if_block (struct noce_if_info *if_info)
+noce_process_if_block (struct noce_if_info *if_info, edge then_edge)
  {
    basic_block test_bb = if_info->test_bb;	/* test block */
    basic_block then_bb = if_info->then_bb;	/* THEN */
@@ -3047,7 +3063,7 @@ noce_process_if_block (struct noce_if_info *if_info)

       (1) if (...) x = a; else x = b;
       (2) x = b; if (...) x = a;
-     (3) if (...) x = a;   // as if with an initial x = x.
+     (3) if (...) x = a;

       The later patterns require jumps to be more expensive.
       For the if (...) x = a; else x = b; case we allow multiple insns
@@ -3200,17 +3216,127 @@ noce_process_if_block (struct noce_if_info *if_info)

    if (!set_b && MEM_P (orig_x))
      {
-      /* Disallow the "if (...) x = a;" form (implicit "else x = x;")
-	 for optimizations if writing to x may trap or fault,
-	 i.e. it's a memory other than a static var or a stack slot,
-	 is misaligned on strict aligned machines or is read-only.  If
-	 x is a read-only memory, then the program is valid only if we
-	 avoid the store into it.  If there are stores on both the
-	 THEN and ELSE arms, then we can go ahead with the conversion;
-	 either the program is broken, or the condition is always
-	 false such that the other memory is selected.  */
+      /* Disallow the "if (...) x = a;" form (with no "else") for optimizations
+	 when x is misaligned on strict-alignment machines or is read-only.
+	 If x is a memory other than a static var or a stack slot: for targets
+	 _with_ conditional move and _without_ conditional execution,
+	 convert using the scratchpad technique, otherwise don`t convert.
+	 If x is a read-only memory, then the program is valid only if we avoid
+	 the store into it.  If there are stores on both the THEN and ELSE arms,
+	 then we can go ahead with the conversion; either the program is broken,
+	 or the condition is always false such that the other memory is selected.
+	 The non-scratchpad-based conversion here has an implicit "else x = x;".  */
        if (noce_mem_write_may_trap_or_fault_p (orig_x))
-	return FALSE;
+	{
+	  if (reload_completed
+	      || optimize < 2
+	      || optimize_function_for_size_p (cfun)
+	      || targetm.have_conditional_execution ()
+	      || !HAVE_conditional_move
+	      || (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1)))
+	      || RTL_ifcvt_never_use_scratchpads
+	           == targetm.RTL_ifcvt_when_to_use_scratchpads)
+	    return FALSE;
+
+
+	  if (RTL_ifcvt_use_spads_as_per_profile
+		== targetm.RTL_ifcvt_when_to_use_scratchpads
+	      && (PROFILE_ABSENT == profile_status_for_fn (cfun)
+		  || PROFILE_GUESSED == profile_status_for_fn (cfun)
+		  || predictable_edge_p (then_edge)
+		  || ! maybe_hot_bb_p (cfun, then_bb)))
+	    return FALSE;
+
+
+	  if (noce_mem_write_may_trap_or_fault_p_1 (orig_x)
+	      || !MEM_SIZE_KNOWN_P (orig_x))
+	    return FALSE;
+
+	  const size_t MEM_size = MEM_SIZE (orig_x);
+	  if (MEM_size > SCRATCHPAD_MAX_SIZE)
+	    return FALSE;
+
+	  rtx biggest_spad = 0;
+	  unsigned int biggest_spad_size = 0;
+	  if (size_t vec_len = scratchpads.length ())
+	    {
+	      std::pair<rtx, unsigned> tmp_pair = scratchpads[vec_len - 1];
+	      biggest_spad = tmp_pair.first;
+	      biggest_spad_size = tmp_pair.second;
+	    }
+	  if (MEM_size > biggest_spad_size)
+	    {
+	      biggest_spad_size = MEM_size;
+	      biggest_spad = assign_stack_local (GET_MODE (orig_x), MEM_size, 0);
+	      gcc_assert (biggest_spad);
+	      scratchpads.safe_push (std::make_pair (biggest_spad, MEM_size));
+	    }
+
+	  gcc_assert (biggest_spad);
+
+	  rtx reg_for_store_addr = gen_reg_rtx (Pmode);
+	  set_used_flags (reg_for_store_addr);
+
+	  start_sequence ();
+
+	  for (rtx_insn *insn = BB_HEAD (then_bb);
+	       insn && insn != insn_a && insn != BB_END (then_bb);
+	       insn = NEXT_INSN (insn))
+	    if (!(NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn)))
+	      duplicate_insn_chain (insn, insn);
+
+	  rtx target = noce_emit_cmove (if_info,
+					reg_for_store_addr,
+					GET_CODE (cond),
+					XEXP (cond, 0),
+					XEXP (cond, 1),
+					XEXP (orig_x, 0),
+					XEXP (biggest_spad, 0));
+
+	  if (!target)
+	    {
+	      end_sequence ();
+	      return FALSE;
+	    }
+	  if (target != reg_for_store_addr)
+	    noce_emit_move_insn (reg_for_store_addr, target);
+
+	  rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr);
+	  MEM_NOTRAP_P (mem) = MEM_NOTRAP_P (orig_x);
+	  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x);
+
+	  alias_set_type temp_alias_set = new_alias_set ();
+	  if (MEM_ALIAS_SET (orig_x))
+	    record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set);
+	  set_mem_alias_set (mem, temp_alias_set);
+
+	  set_mem_align (mem, MIN (MEM_ALIGN (biggest_spad),
+				   MEM_ALIGN (orig_x)));
+	  if (MEM_ADDR_SPACE (orig_x) != MEM_ADDR_SPACE (biggest_spad))
+	    {
+	      end_sequence ();
+	      return FALSE;
+	    }
+
+	  set_used_flags (mem);
+
+	  noce_emit_move_insn (mem, a);
+
+	  rtx_insn *seq = end_ifcvt_sequence (if_info);
+	  if (!seq)
+	    return FALSE;
+
+	  unshare_all_rtl_in_chain (seq);
+
+	  /* Prevent the code right after "success:"
+	     from throwing away the changes.  */
+	  x = orig_x;
+
+	  emit_insn_before_setloc (seq, if_info->jump,
+				   INSN_LOCATION (if_info->insn_a));
+	  goto success;
+
+	}

        /* Avoid store speculation: given "if (...) x = a" where x is a
  	 MEM, we only want to do the store if x is always set
@@ -3696,7 +3822,7 @@ noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,

    /* Do the real work.  */

-  if (noce_process_if_block (&if_info))
+  if (noce_process_if_block (&if_info, then_edge))
      return TRUE;

    if (HAVE_conditional_move
@@ -5003,6 +5129,9 @@ if_convert (bool after_combine)
    basic_block bb;
    int pass;

+  /* Ensure that we start the scratchpads data fresh each time.  */
+  scratchpads.truncate (0);
+
    if (optimize == 1)
      {
        df_live_add_problem ();
diff --git a/gcc/target.def b/gcc/target.def
index d29aad5..dc007e5 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5915,6 +5915,11 @@ HOOK_VECTOR_END (mode_switching)
  #include "target-insns.def"
  #undef DEF_TARGET_INSN

+DEFHOOKPOD
+(RTL_ifcvt_when_to_use_scratchpads,
+"*",
+enum RTL_ifcvt_when_to_use_scratchpads, RTL_ifcvt_use_spads_as_per_profile)
+
  /* Close the 'struct gcc_target' definition.  */
  HOOK_VECTOR_END (C90_EMPTY_HACK)

diff --git a/gcc/target.h b/gcc/target.h
index a79f424..bbe4814 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -187,6 +187,12 @@ enum vect_cost_model_location {
  #define DEFHOOK_UNDOC DEFHOOK
  #define HOOKSTRUCT(FRAGMENT) FRAGMENT

+enum RTL_ifcvt_when_to_use_scratchpads {
+  RTL_ifcvt_never_use_scratchpads = 0,
+  RTL_ifcvt_always_use_scratchpads,
+  RTL_ifcvt_use_spads_as_per_profile
+};
+
  #include "target.def"

  extern struct gcc_target targetm;
-- 
2.1.0.243.g30d45f7

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Adding-my-RTL-ifcvt-work-on-top-of-Ville-s-upstream-.patch.gz
Type: application/x-gzip
Size: 3631 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20151027/3d7ea6af/attachment.bin>


More information about the Gcc-patches mailing list