using scratchpads to enhance RTL-level if-conversion: revised patch

Abe abe_skolnik@yahoo.com
Tue Oct 13 20:05:00 GMT 2015


Attached please find my revised patch to enhance RTL-level if-conversion
using scratchpads, newly optimizing only half-hammock stores for now.

Bootstrapped and regression-tested on x86_64 GNU/Linux.

Regards,

Abe
-------------- next part --------------
--- ifcvt.c	2015-09-01 12:54:38.234108158 -0500
+++ ifcvt.c	2015-10-13 14:22:27.935204461 -0500
@@ -56,6 +56,8 @@
 #include "rtl-iter.h"
 #include "ifcvt.h"
 
+#include <utility>
+
 #ifndef MAX_CONDITIONAL_EXECUTE
 #define MAX_CONDITIONAL_EXECUTE \
   (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
@@ -66,6 +68,9 @@
 
 #define NULL_BLOCK	((basic_block) NULL)
 
+/* An arbitrary inclusive maximum size (in bytes) for each scratchpad.  */
+#define SCRATCHPAD_MAX_SIZE 128
+
 /* True if after combine pass.  */
 static bool ifcvt_after_combine;
 
@@ -110,6 +115,8 @@ static int dead_or_predicable (basic_blo
 			       edge, int);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
+
+static auto_vec<std::pair<rtx, unsigned int> > scratchpads;
 

 /* Count the number of non-jump active insns in BB.  */
 
@@ -2784,19 +2791,16 @@ noce_operand_ok (const_rtx op)
   return ! may_trap_p (op);
 }
 
-/* Return true if a write into MEM may trap or fault.  */
+/* Return true if a write into MEM may trap or fault
+   even in the presence of scratchpad support.  */
 
 static bool
-noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem)
 {
-  rtx addr;
-
   if (MEM_READONLY_P (mem))
     return true;
 
-  if (may_trap_or_fault_p (mem))
-    return true;
-
+  rtx addr;
   addr = XEXP (mem, 0);
 
   /* Call target hook to avoid the effects of -fpic etc....  */
@@ -2837,6 +2841,18 @@ noce_mem_write_may_trap_or_fault_p (cons
   return false;
 }
 
+/* Return true if a write into MEM may trap or fault
+   without scratchpad support.  */
+
+static bool
+noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+{
+  if (may_trap_or_fault_p (mem))
+    return true;
+
+  return noce_mem_write_may_trap_or_fault_p_1 (mem);
+}
+
 /* Return whether we can use store speculation for MEM.  TOP_BB is the
    basic block above the conditional block where we are considering
    doing the speculative store.  We look for whether MEM is set
@@ -3156,17 +3172,116 @@ noce_process_if_block (struct noce_if_in
 
   if (!set_b && MEM_P (orig_x))
     {
-      /* Disallow the "if (...) x = a;" form (implicit "else x = x;")
-	 for optimizations if writing to x may trap or fault,
-	 i.e. it's a memory other than a static var or a stack slot,
-	 is misaligned on strict aligned machines or is read-only.  If
-	 x is a read-only memory, then the program is valid only if we
-	 avoid the store into it.  If there are stores on both the
-	 THEN and ELSE arms, then we can go ahead with the conversion;
-	 either the program is broken, or the condition is always
-	 false such that the other memory is selected.  */
+      /* Disallow the "if (...) x = a;" form (with no "else") for optimizations
+	 when x is misaligned on strict-alignment machines or is read-only.
+	 If x is a memory other than a static var or a stack slot: for targets
+	 _with_ conditional move and _without_ conditional execution,
+	 convert using the scratchpad technique, otherwise don`t convert.
+	 If x is a read-only memory, then the program is valid only if we avoid
+	 the store into it.  If there are stores on both the THEN and ELSE arms,
+	 then we can go ahead with the conversion; either the program is broken,
+	 or the condition is always false such that the other memory is selected.
+	 The non-scratchpad-based conversion here has an implicit "else x = x;".  */
       if (noce_mem_write_may_trap_or_fault_p (orig_x))
-	return FALSE;
+	{
+	  if (   reload_completed
+	      || optimize < 2
+	      || optimize_function_for_size_p (cfun)
+	      || targetm.have_conditional_execution ()
+	      || !HAVE_conditional_move
+	      || (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1))))
+	    return FALSE;
+
+
+	  if (noce_mem_write_may_trap_or_fault_p_1 (orig_x)
+	      || !MEM_SIZE_KNOWN_P (orig_x))
+	    return FALSE;
+
+	  const size_t MEM_size = MEM_SIZE (orig_x);
+	  if (MEM_size > SCRATCHPAD_MAX_SIZE)
+	    return FALSE;
+
+	  rtx biggest_spad = 0;
+	  unsigned int biggest_spad_size = 0;
+	  if (size_t vec_len = scratchpads.length ())
+	    {
+	      std::pair<rtx, unsigned> tmp_pair = scratchpads[vec_len - 1];
+	      biggest_spad = tmp_pair.first;
+	      biggest_spad_size = tmp_pair.second;
+	    }
+	  if (MEM_size > biggest_spad_size)
+	    {
+	      biggest_spad_size = MEM_size;
+	      biggest_spad = assign_stack_local (GET_MODE (orig_x), MEM_size, 0);
+	      gcc_assert (biggest_spad);
+	      scratchpads.safe_push (std::make_pair (biggest_spad, MEM_size));
+	    }
+
+	  gcc_assert (biggest_spad);
+
+	  rtx reg_for_store_addr = gen_reg_rtx (Pmode);
+	  set_used_flags (reg_for_store_addr);
+
+	  start_sequence ();
+
+	  for (rtx_insn *insn = BB_HEAD (then_bb);
+	       insn && insn != insn_a && insn != BB_END (then_bb);
+	       insn = NEXT_INSN (insn))
+	    if (!(NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn)))
+	      duplicate_insn_chain (insn, insn);
+
+	  rtx target = noce_emit_cmove (if_info,
+					reg_for_store_addr,
+					GET_CODE (cond),
+					XEXP (cond, 0),
+					XEXP (cond, 1),
+					XEXP (orig_x, 0),
+					XEXP (biggest_spad, 0));
+
+	  if (!target)
+	    {
+	      end_sequence ();
+	      return FALSE;
+	    }
+	  if (target != reg_for_store_addr)
+	    noce_emit_move_insn (reg_for_store_addr, target);
+
+	  rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr);
+	  MEM_NOTRAP_P (mem) = true;
+	  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x);
+
+	  alias_set_type temp_alias_set = new_alias_set ();
+	  if (MEM_ALIAS_SET (orig_x))
+	    record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set);
+	  set_mem_alias_set (mem, temp_alias_set);
+
+	  set_mem_align (mem, MIN (MEM_ALIGN (biggest_spad),
+				   MEM_ALIGN (orig_x)));
+	  if (MEM_ADDR_SPACE (orig_x) != MEM_ADDR_SPACE (biggest_spad))
+	    {
+	      end_sequence ();
+	      return FALSE;
+	    }
+
+	  set_used_flags (mem);
+
+	  noce_emit_move_insn (mem, a);
+
+	  rtx_insn *seq = end_ifcvt_sequence (if_info);
+	  if (!seq)
+	    return FALSE;
+
+	  unshare_all_rtl_in_chain (seq);
+
+	  /* Prevent the code right after "success:"
+	     from throwing away the changes.  */
+	  x = orig_x;
+
+	  emit_insn_before_setloc (seq, if_info->jump,
+				   INSN_LOCATION (if_info->insn_a));
+	  goto success;
+
+	}
 
       /* Avoid store speculation: given "if (...) x = a" where x is a
 	 MEM, we only want to do the store if x is always set
@@ -4959,6 +5074,9 @@ if_convert (bool after_combine)
   basic_block bb;
   int pass;
 
+  /* Ensure that we start the scratchpads data fresh each time.  */
+  scratchpads.truncate (0);
+
   if (optimize == 1)
     {
       df_live_add_problem ();


More information about the Gcc-patches mailing list