This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFA: improvement to if-conversion


At -O2 -fomot-frame-pointer, we currently generate quite inefficient code
for sh64-elf for this testcase:

long long
f (long long a, long long b, long long c)
{
  long long d;

  if (a < b)
    {
      d = a < c ? a : c;
      d++;
    }
  else
    {
      d = b < c ? b : c;
      d++;
    }
  return d;
}

_f:
        pt      .L2, tr0
        cmpgt   r4, r2, r1
        add     r2, r63, r6
        cmveq   r1, r4, r6
        ptabs   r18, tr1
        addi    r6, 1, r7
        bge     r2, r3, tr0
        pt      .L4, tr0
        blink   tr0, r63
        .align 5
.L2:
        cmpgt   r4, r3, r1
        cmveq   r1, r4, r3
        addi    r3, 1, r7
.L4:
        add     r7, r63, r2
        blink   tr1, r63

We can get much better code when we recognize that the if and else blocks
are structurally equivalent, and use a conditional move to get the
right input value:

_f:
        cmpgt   r3, r2, r1
        ptabs   r18, tr0
        cmveq   r1, r3, r2
        cmpgt   r4, r2, r1
        cmveq   r1, r4, r2
        addi    r2, 1, r2
        blink   tr0, r63

The appended patch implements this optimization.  Bootstrapped on
i686-pc-linux-gnu.  Regression tested for i686-pc-linux-gnu native and
X sh64-elf.

2004-01-29  J"orn Rennecke <joern.rennecke@superh.com>

Generate 4-insn conditional move sequence for 3-way minimum:
	* ifcvt.c (will_merge_if_block, update_regno_block): New functions.
	(noce_try_complex_cmove, ifconvert_mark_reg_use): Likewise.
	(ifconvert_update_reg_use, struct_equiv): Likewise.
	(regno_block, regno_block_max): New variables.
	(noce_process_if_block): Call noce_try_complex_cmove.
	(cond_exec_process_if_block): Call will_merge_if_block and
	will_merge_if_block.
	(update_regno_block_info, equiv_info): New structs.
	(STRUCT_EQUIV_MAX_LOCAL, STRUCT_EQUIV_MAX_INPUT): New defines.
	(if_convert): For flag_expensive_optimizations, initialize regno_block
	before the conversion loop, and free regno_block at the end.

Index: ifcvt.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/ifcvt.c,v
retrieving revision 1.137
diff -p -u -r1.137 ifcvt.c
--- ifcvt.c	25 Jan 2004 03:52:42 -0000	1.137
+++ ifcvt.c	29 Jan 2004 17:37:25 -0000
@@ -98,6 +98,7 @@ static int noce_operand_ok (rtx);
 static int noce_process_if_block (ce_if_block_t *);
 static int process_if_block (ce_if_block_t *);
 static void merge_if_block (ce_if_block_t *);
+static void will_merge_if_block (ce_if_block_t *);
 static int find_cond_trap (basic_block, edge, edge);
 static basic_block find_if_header (basic_block, int);
 static int block_jumps_and_fallthru_p (basic_block, basic_block);
@@ -571,6 +572,7 @@ cond_exec_process_if_block (ce_if_block_
 	     n_insns, (n_insns == 1) ? " was" : "s were");
 
   /* Merge the blocks!  */
+  will_merge_if_block (ce_info);
   merge_if_block (ce_info);
   cond_exec_changed_p = TRUE;
   return TRUE;
@@ -613,6 +615,20 @@ static rtx noce_get_alt_condition (struc
 static int noce_try_minmax (struct noce_if_info *);
 static int noce_try_abs (struct noce_if_info *);
 
+static void update_regno_block (basic_block, basic_block);
+static int noce_try_complex_cmove (ce_if_block_t *,
+				   struct noce_if_info *if_info,
+				   rtx jump, rtx cond);
+
+/* Allocated and initialized at the start of if_convert, REGNO_BLOCK is
+   an array indexed by register number, with REGNO_BLOCK_MAX being the
+   upper (excluded) bound.  For each register thus covered by the array,
+   first is the first basic block - in linear instruction order - in
+   which a register is mentioned, and last is the last basic block in
+   which it is mentioned.  */
+static struct { basic_block first, last; } *regno_block;
+static int regno_block_max;
+
 /* Helper function for noce_try_store_flag*.  */
 
 static rtx
@@ -1846,7 +1862,9 @@ noce_process_if_block (struct ce_if_bloc
   if (! insn_a
       || insn_a != last_active_insn (then_bb, FALSE)
       || (set_a = single_set (insn_a)) == NULL_RTX)
-    return FALSE;
+    return ((else_bb && regno_block && HAVE_conditional_move)
+	    ? noce_try_complex_cmove (ce_info, &if_info, jump, cond)
+	    : FALSE);
 
   x = SET_DEST (set_a);
   a = SET_SRC (set_a);
@@ -2009,6 +2027,7 @@ noce_process_if_block (struct ce_if_bloc
   return FALSE;
 
  success:
+  will_merge_if_block (ce_info);
   /* The original sets may now be killed.  */
   delete_insn (insn_a);
 
@@ -2076,6 +2095,24 @@ process_if_block (struct ce_if_block * c
   return FALSE;
 }
 
+
+/* The blocks described in CE_INFO will be merged.  Update REGNO_BLOCK
+   accordingly.  */
+static void
+will_merge_if_block (struct ce_if_block * ce_info)
+{
+  basic_block test_bb = ce_info->test_bb;	/* test block */
+
+  if (ce_info->then_bb && regno_block)
+    update_regno_block (ce_info->then_bb, test_bb);
+  if (ce_info->else_bb && regno_block)
+    update_regno_block (ce_info->else_bb, test_bb);
+  if (ce_info->join_bb && regno_block)
+    update_regno_block (ce_info->join_bb, test_bb);
+  if (flag_expensive_optimizations)
+    cond_exec_changed_p = TRUE;
+}
+
 /* Merge the blocks and mark for local life update.  */
 
 static void
@@ -3193,6 +3230,293 @@ dead_or_predicable (basic_block test_bb,
   cancel_changes (0);
   return FALSE;
 }
+
+/* Called via for_each_rtx by if_convert, in a linear forward scan over all
+   instructions, if *px is a REG, this updates the corresponding entry in
+   REGNO_BLOCK accordingly.  PBLOCK is the current basic block.  */
+static int
+ifconvert_mark_reg_use (rtx *px, void *pblock)
+{
+  rtx x = *px;
+  int regno;
+
+  if (!x || GET_CODE (x) != REG)
+    return 0;
+  regno = REGNO (x);
+  if (!regno_block[regno].first)
+    regno_block[regno].first = pblock;
+  regno_block[regno].last = pblock;
+  return 0;
+}
+
+/* Used to describe that FROM is going to be merged into TO.  */
+struct update_regno_block_info { basic_block from, to; };
+
+/* Called via for_each_rtx by update_regno_block, if *px is a REG,
+   update its REGNO_BLOCK entry according to the information in PINFO,
+   which is a struct update_regno_block_info *.  */
+static int
+ifconvert_update_reg_use (rtx *px, void *pinfo)
+{
+  rtx x = *px;
+  struct update_regno_block_info *pi = pinfo;
+  
+  int regno;
+
+  if (!x || GET_CODE (x) != REG)
+    return 0;
+  regno = REGNO (x);
+  if (regno >= regno_block_max)
+    return 0;
+  if (regno_block[regno].first == pi->from)
+    regno_block[regno].first = pi->to;
+  if (regno_block[regno].last == pi->from)
+    regno_block[regno].last = pi->to;
+  return 0;
+}
+
+/* FROM_BB is going to be merged into TO_BB.  Update REGNO_BLOCK
+   accordingly.  */
+static void
+update_regno_block (basic_block from_bb, basic_block to_bb)
+{
+  rtx insn;
+  struct update_regno_block_info info;
+
+  info.from = from_bb;
+  info.to = to_bb;
+  for (insn = BB_HEAD (from_bb); ; insn = NEXT_INSN (insn))
+    {
+      if (INSN_P (insn))
+	{
+	  for_each_rtx (&PATTERN (insn), &ifconvert_update_reg_use, &info);
+	  for_each_rtx (&REG_NOTES (insn), &ifconvert_update_reg_use, &info);
+	}
+      if (insn == BB_END (from_bb))
+	break;
+    }
+}
+
+/* Constants used to size arrays in struct equiv_info.  When these limits
+   are exceeded, struct_equiv returns zero.  */
+#define STRUCT_EQUIV_MAX_LOCAL 16
+/* The maximum number of references to an input register that struct_equiv
+   can handle.  */
+#define STRUCT_EQUIV_MAX_INPUT 9
+
+/* A struct equiv_info is used to pass information to struct_equiv and
+   to gather state while two basic blocks are checked for structural
+   equivalence.
+   X_BLOCK and Y_BLOCK are the two block being compared.
+   X_INPUT and Y_INPUT are used by struct_equiv to record a register that
+   is used as an input parameter, i.e. where different registers are used
+   as sources.
+   INPUT gathers pointers to all places where the input register is mentioned
+   in X_BLOCK, with INPUT_COUNT being the index for the next free entry.
+   X_LOCAL and Y_LOCAL are used to gather register numbers of register pairs
+   that are local to X_BLOCK and Y_BLOCK, with LOCAL_COUNT being the index
+   to the next free entry.  */
+struct equiv_info
+{
+  basic_block x_block, y_block;
+  rtx x_input, y_input;
+  int input_count, local_count;
+  rtx *input[STRUCT_EQUIV_MAX_INPUT];
+  int x_local[STRUCT_EQUIV_MAX_LOCAL], y_local[STRUCT_EQUIV_MAX_LOCAL];
+};
+
+/* Check if *xp is equivalent to Y.  RVALUE indicates if the processed
+   piece of rtl is used as a destination, in which case we can't have
+   different registers being an input.  Returns nonzero if the two blocks
+   have been identified as equivalent, zero otherwise.  */
+static int
+struct_equiv (rtx *xp, rtx y, int rvalue, struct equiv_info *info)
+{
+  rtx x = *xp;
+  enum rtx_code code;
+  int length;
+  const char *format;
+  int i;
+
+  if (!y || !x)
+    return x == y;
+  code = GET_CODE (y);
+  if (GET_CODE (x) != code
+      || GET_MODE (x) != GET_MODE (y))
+    return 0;
+  /* ??? could extend to allow CONST_INT inputs.  */
+  if (code == REG)
+    {
+      int x_regno = REGNO (x);
+      int y_regno = REGNO (y);
+      int x_local, y_local;
+
+      if (x_regno == y_regno)
+	return 1;
+      x_local = (x_regno >= regno_block_max
+		 || (regno_block[x_regno].first == info->x_block
+		     && regno_block[x_regno].last == info->x_block));
+      y_local = (y_regno >= regno_block_max
+		 || (regno_block[y_regno].first == info->y_block
+		     && regno_block[y_regno].last == info->y_block));
+      if (x_local && y_local)
+	{
+	  for (i = info->local_count - 1; i >= 0; i--)
+	    {
+	      if (x_regno == info->x_local[i] && y_regno == info->y_local[i])
+		return 1;
+	      if (x_regno == info->x_local[i] || y_regno == info->y_local[i])
+		return 0;
+	    }
+	  if (info->local_count >= STRUCT_EQUIV_MAX_LOCAL)
+	    return 0;
+	  info->x_local[info->local_count] = x_regno;
+	  info->y_local[info->local_count] = y_regno;
+	  info->local_count++;
+	  return 1;
+	}
+      if (x_local || y_local || !rvalue
+	  || info->input_count >= STRUCT_EQUIV_MAX_INPUT)
+	return 0;
+      if (info->x_input == NULL_RTX && info->y_input == NULL_RTX)
+	{
+	  info->x_input = x;
+	  info->y_input = y;
+	}
+      else if (! rtx_equal_p (x, info->x_input)
+	       || ! rtx_equal_p (y, info->y_input))
+	return 0;
+      info->input[info->input_count++] = xp;
+      return 1;
+    }
+  else if (code == SET)
+    return  (struct_equiv (&SET_SRC (x), SET_SRC (y), 1, info)
+	     && struct_equiv (&SET_DEST (x), SET_DEST (y), 0, info));
+  else if (code == MEM)
+    return  struct_equiv (&XEXP (x, 0), XEXP (y, 0), 1, info);
+
+  /* Process subexpressions - this is similar to rtx_equal_p.  */
+  length = GET_RTX_LENGTH (code);
+  format = GET_RTX_FORMAT (code);
+
+  for (i = 0; i < length; ++i)
+    {
+      switch (format[i])
+        {
+	case 'w':
+	  if (XWINT (x, i) != XWINT (y, i))
+	    return 0;
+	  break;
+	case 'n':
+	case 'i':
+	  if (XINT (x, i) != XINT (y, i))
+	    return 0;
+	  break;
+        case 'V':
+        case 'E':
+	  if (XVECLEN (x, i) != XVECLEN (y, i))
+	    return 0;
+          if (XVEC (x, i) != 0)
+            {
+              int j;
+              for (j = 0; j < XVECLEN (x, i); ++j)
+                {
+                  if (! struct_equiv (&XVECEXP (x, i, j), XVECEXP (y, i, j),
+				      rvalue, info))
+		    return 0;
+                }
+            }
+          break;
+        case 'e':
+          if (! struct_equiv (&XEXP (x, i), XEXP (y, i), rvalue, info))
+	    return 0;
+          break;
+	case 'S':
+	case 's':
+	  if ((XSTR (x, i) || XSTR (y, i))
+	      && (! XSTR (x, i) || ! XSTR (y, i)
+		  || strcmp (XSTR (x, i), XSTR (y, i))))
+	    return 0;
+	  break;
+        case 'u':
+	  /* These are just backpointers, so they don't matter.  */
+	  break;
+        case '0':
+        case 't':
+	  break;
+        default:
+	  /* Err on the side of caution.  */
+          return 0;
+	}
+    }
+  return 1;
+}
+
+/* Try to use a cmove where if end else blocks are structurally equivalent
+   blocks that differ only by an input register, and possible some local
+   registers.  */
+static int
+noce_try_complex_cmove (struct ce_if_block * ce_info,
+			struct noce_if_info *if_info, rtx jump, rtx cond)
+{
+  basic_block then_bb = ce_info->then_bb;	/* THEN */
+  basic_block else_bb = ce_info->else_bb;	/* ELSE or NULL */
+  rtx xi, yi, x_end, y_end;
+  struct equiv_info info;
+  int i;
+  rtx temp;
+  rtx next;
+
+  if_info->cond = cond;
+  if_info->jump = jump;
+  info.x_block = else_bb;
+  info.y_block = then_bb;
+  info.x_input = info.y_input = NULL_RTX;
+  info.input_count = info.local_count = 0;
+  xi = first_active_insn (else_bb);
+  yi = first_active_insn (then_bb);
+  if (!xi || !yi)
+    return FALSE;
+  x_end = BB_END (else_bb);
+  y_end = prev_active_insn (BB_END (then_bb));
+  for (;;)
+    {
+      if (INSN_P (xi) != INSN_P (yi))
+	return FALSE;
+      if (INSN_P (xi))
+	{
+	  if (! struct_equiv (&PATTERN (xi), PATTERN (yi), 1, &info)
+	      || ! struct_equiv (&REG_NOTES (xi), REG_NOTES (yi), 1, &info))
+	    return 0;
+	}
+      if (xi == x_end || yi == y_end)
+	break;
+      xi = NEXT_INSN (xi);
+      yi = NEXT_INSN (yi);
+    }
+  if (xi != x_end || yi != y_end || !info.input_count)
+    return FALSE;
+  temp = gen_reg_rtx (GET_MODE (info.x_input));
+  if_info->a = info.y_input;
+  if_info->b = info.x_input;
+  if_info->x = temp;
+  if (! noce_try_cmove (if_info))
+    return FALSE;
+  will_merge_if_block (ce_info);
+  for (i = info.input_count-1; i >= 0; i--)
+    *info.input[i] = temp;
+  for (yi = BB_HEAD (then_bb); BB_HEAD (then_bb) != BB_END (then_bb); yi = next)
+    {
+      next = NEXT_INSN (yi);
+      if (INSN_P (yi))
+	next = delete_insn (yi);
+      if (yi == BB_END (then_bb) || PREV_INSN (yi) == BB_END (then_bb))
+	break;
+    }
+  delete_insn (jump);
+  merge_if_block (ce_info);
+  return TRUE;
+}
 
 /* Main entry point for all if-conversion.  */
 
@@ -3221,6 +3545,32 @@ if_convert (int x_life_data_ok)
   if (life_data_ok)
     clear_bb_flags ();
 
+  if (flag_expensive_optimizations)
+    {
+      /* For each register, note in which basic blocks it is mentioned first
+	 and last.  */
+      basic_block current_block = NULL_BLOCK;
+      rtx insn;
+
+      regno_block_max = max_reg_num ();
+      regno_block = xcalloc (regno_block_max,  sizeof *regno_block);
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+	  if (GET_CODE (insn) == NOTE
+	      && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
+	    current_block = NOTE_BASIC_BLOCK (insn);
+	  else if (INSN_P (insn))
+	    {
+	      for_each_rtx (&PATTERN (insn), &ifconvert_mark_reg_use,
+			    current_block);
+	      for_each_rtx (&REG_NOTES (insn), &ifconvert_mark_reg_use,
+			    current_block);
+	    }
+	}
+    }
+  else
+    regno_block = 0;
+
   /* Go through each of the basic blocks looking for things to convert.  If we
      have conditional execution, we make multiple passes to allow us to handle
      IF-THEN{-ELSE} blocks within other IF-THEN{-ELSE} blocks.  */
@@ -3248,6 +3598,9 @@ if_convert (int x_life_data_ok)
 #endif
     }
   while (cond_exec_changed_p);
+
+  if (regno_block)
+    free (regno_block);
 
 #ifdef IFCVT_MULTIPLE_DUMPS
   if (rtl_dump_file)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]