This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Another performance regression


On Thu, Sep 26, 2002 at 12:01:38PM -0700, Dale Johannesen wrote:
> So should I fix this by making the unrolling code smarter, in effect
> doing cse's job?

I think so, since the change to loop unrolling order was 
also done to get around a performance regression.  :-/

> It seems likely Roger Sayle's approach of running
> gcse after loop opts would Just Work.  Is that going to go in?

Not for 3.3.

The following appears to do the job.  Would you see if this
causes any other problems?


r~


	* unroll.c (simplify_cmp_and_jump_insns): New.
	(unroll_loop): Use it.  Use simplify_gen_foo+force_operand
	instead of expand_simple_foo.

Index: unroll.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/unroll.c,v
retrieving revision 1.176
diff -c -p -d -r1.176 unroll.c
*** unroll.c	8 Sep 2002 18:32:31 -0000	1.176
--- unroll.c	26 Sep 2002 22:13:46 -0000
*************** static int *splittable_regs_updates;
*** 193,198 ****
--- 193,201 ----
  
  /* Forward declarations.  */
  
+ static rtx simplify_cmp_and_jump_insns PARAMS ((enum rtx_code,
+ 						enum machine_mode,
+ 						rtx, rtx, rtx));
  static void init_reg_map PARAMS ((struct inline_remap *, int));
  static rtx calculate_giv_inc PARAMS ((rtx, rtx, unsigned int));
  static rtx initial_reg_note_copy PARAMS ((rtx, struct inline_remap *));
*************** unroll_loop (loop, insn_count, strength_
*** 843,849 ****
  			       &initial_value, &final_value, &increment,
  			       &mode))
  	{
! 	  rtx diff;
  	  rtx *labels;
  	  int abs_inc, neg_inc;
  	  enum rtx_code cc = loop_info->comparison_code;
--- 846,852 ----
  			       &initial_value, &final_value, &increment,
  			       &mode))
  	{
! 	  rtx diff, insn;
  	  rtx *labels;
  	  int abs_inc, neg_inc;
  	  enum rtx_code cc = loop_info->comparison_code;
*************** unroll_loop (loop, insn_count, strength_
*** 875,900 ****
  
  	  start_sequence ();
  
  	  /* Final value may have form of (PLUS val1 const1_rtx).  We need
  	     to convert it into general operand, so compute the real value.  */
  
! 	  if (GET_CODE (final_value) == PLUS)
! 	    {
! 	      final_value = expand_simple_binop (mode, PLUS,
! 						 copy_rtx (XEXP (final_value, 0)),
! 						 copy_rtx (XEXP (final_value, 1)),
! 						 NULL_RTX, 0, OPTAB_LIB_WIDEN);
! 	    }
  	  if (!nonmemory_operand (final_value, VOIDmode))
! 	    final_value = force_reg (mode, copy_rtx (final_value));
  
  	  /* Calculate the difference between the final and initial values.
  	     Final value may be a (plus (reg x) (const_int 1)) rtx.
- 	     Let the following cse pass simplify this if initial value is
- 	     a constant.
- 
- 	     We must copy the final and initial values here to avoid
- 	     improperly shared rtl.
  
  	     We have to deal with for (i = 0; --i < 6;) type loops.
  	     For such loops the real final value is the first time the
--- 878,897 ----
  
  	  start_sequence ();
  
+ 	  /* We must copy the final and initial values here to avoid
+ 	     improperly shared rtl.  */
+ 	  final_value = copy_rtx (final_value);
+ 	  initial_value = copy_rtx (initial_value);
+ 
  	  /* Final value may have form of (PLUS val1 const1_rtx).  We need
  	     to convert it into general operand, so compute the real value.  */
  
! 	  final_value = force_operand (final_value, NULL_RTX);
  	  if (!nonmemory_operand (final_value, VOIDmode))
! 	    final_value = force_reg (mode, final_value);
  
  	  /* Calculate the difference between the final and initial values.
  	     Final value may be a (plus (reg x) (const_int 1)) rtx.
  
  	     We have to deal with for (i = 0; --i < 6;) type loops.
  	     For such loops the real final value is the first time the
*************** unroll_loop (loop, insn_count, strength_
*** 907,924 ****
  	     so we can pretend that the overflow value is 0/~0.  */
  
  	  if (cc == NE || less_p != neg_inc)
! 	    diff = expand_simple_binop (mode, MINUS, final_value,
! 					copy_rtx (initial_value), NULL_RTX, 0,
! 					OPTAB_LIB_WIDEN);
  	  else
! 	    diff = expand_simple_unop (mode, neg_inc ? NOT : NEG,
! 				       copy_rtx (initial_value), NULL_RTX, 0);
  
  	  /* Now calculate (diff % (unroll * abs (increment))) by using an
  	     and instruction.  */
! 	  diff = expand_simple_binop (GET_MODE (diff), AND, diff,
! 				      GEN_INT (unroll_number * abs_inc - 1),
! 				      NULL_RTX, 0, OPTAB_LIB_WIDEN);
  
  	  /* Now emit a sequence of branches to jump to the proper precond
  	     loop entry point.  */
--- 904,921 ----
  	     so we can pretend that the overflow value is 0/~0.  */
  
  	  if (cc == NE || less_p != neg_inc)
! 	    diff = simplify_gen_binary (MINUS, mode, final_value,
! 					initial_value);
  	  else
! 	    diff = simplify_gen_unary (neg_inc ? NOT : NEG, mode,
! 				       initial_value, mode);
! 	  diff = force_operand (diff, NULL_RTX);
  
  	  /* Now calculate (diff % (unroll * abs (increment))) by using an
  	     and instruction.  */
! 	  diff = simplify_gen_binary (AND, mode, diff,
! 				      GEN_INT (unroll_number*abs_inc - 1));
! 	  diff = force_operand (diff, NULL_RTX);
  
  	  /* Now emit a sequence of branches to jump to the proper precond
  	     loop entry point.  */
*************** unroll_loop (loop, insn_count, strength_
*** 936,953 ****
  	  if (cc != NE)
  	    {
  	      rtx incremented_initval;
! 	      incremented_initval = expand_simple_binop (mode, PLUS,
! 							 initial_value,
! 							 increment,
! 							 NULL_RTX, 0,
! 							 OPTAB_LIB_WIDEN);
! 	      emit_cmp_and_jump_insns (incremented_initval, final_value,
! 				       less_p ? GE : LE, NULL_RTX,
! 				       mode, unsigned_p, labels[1]);
! 	      predict_insn_def (get_last_insn (), PRED_LOOP_CONDITION,
! 				TAKEN);
! 	      JUMP_LABEL (get_last_insn ()) = labels[1];
! 	      LABEL_NUSES (labels[1])++;
  	    }
  
  	  /* Assuming the unroll_number is 4, and the increment is 2, then
--- 933,954 ----
  	  if (cc != NE)
  	    {
  	      rtx incremented_initval;
! 	      enum rtx_code cmp_code;
! 
! 	      incremented_initval
! 		= simplify_gen_binary (PLUS, mode, initial_value, increment);
! 	      incremented_initval
! 		= force_operand (incremented_initval, NULL_RTX);
! 
! 	      cmp_code = (less_p
! 			  ? (unsigned_p ? GEU : GE)
! 			  : (unsigned_p ? LEU : LE));
! 
! 	      insn = simplify_cmp_and_jump_insns (cmp_code, mode,
! 						  incremented_initval,
! 						  final_value, labels[1]);
! 	      if (insn)
! 	        predict_insn_def (insn, PRED_LOOP_CONDITION, TAKEN);
  	    }
  
  	  /* Assuming the unroll_number is 4, and the increment is 2, then
*************** unroll_loop (loop, insn_count, strength_
*** 986,997 ****
  		  cmp_code = LE;
  		}
  
! 	      emit_cmp_and_jump_insns (diff, GEN_INT (abs_inc * cmp_const),
! 				       cmp_code, NULL_RTX, mode, 0, labels[i]);
! 	      JUMP_LABEL (get_last_insn ()) = labels[i];
! 	      LABEL_NUSES (labels[i])++;
! 	      predict_insn (get_last_insn (), PRED_LOOP_PRECONDITIONING,
! 			    REG_BR_PROB_BASE / (unroll_number - i));
  	    }
  
  	  /* If the increment is greater than one, then we need another branch,
--- 987,998 ----
  		  cmp_code = LE;
  		}
  
! 	      insn = simplify_cmp_and_jump_insns (cmp_code, mode, diff,
! 						  GEN_INT (abs_inc*cmp_const),
! 						  labels[i]);
! 	      if (insn)
! 	        predict_insn (insn, PRED_LOOP_PRECONDITIONING,
! 			      REG_BR_PROB_BASE / (unroll_number - i));
  	    }
  
  	  /* If the increment is greater than one, then we need another branch,
*************** unroll_loop (loop, insn_count, strength_
*** 1019,1028 ****
  		  cmp_code = GE;
  		}
  
! 	      emit_cmp_and_jump_insns (diff, GEN_INT (cmp_const), cmp_code,
! 				       NULL_RTX, mode, 0, labels[0]);
! 	      JUMP_LABEL (get_last_insn ()) = labels[0];
! 	      LABEL_NUSES (labels[0])++;
  	    }
  
  	  sequence = get_insns ();
--- 1020,1027 ----
  		  cmp_code = GE;
  		}
  
! 	      simplify_cmp_and_jump_insns (cmp_code, mode, diff,
! 					   GEN_INT (cmp_const), labels[0]);
  	    }
  
  	  sequence = get_insns ();
*************** unroll_loop (loop, insn_count, strength_
*** 1323,1328 ****
--- 1322,1364 ----
    if (map->reg_map)
      free (map->reg_map);
    free (map);
+ }
+ 
+ /* A helper function for unroll_loop.  Emit a compare and branch to 
+    satisfy (CMP OP1 OP2), but pass this through the simplifier first.
+    If the branch turned out to be conditional, return it, otherwise
+    return NULL.  */
+ 
+ static rtx
+ simplify_cmp_and_jump_insns (code, mode, op0, op1, label)
+      enum rtx_code code;
+      enum machine_mode mode;
+      rtx op0, op1, label;
+ {
+   rtx t, insn;
+ 
+   t = simplify_relational_operation (code, mode, op0, op1);
+   if (!t)
+     {
+       enum rtx_code scode = signed_condition (code);
+       emit_cmp_and_jump_insns (op0, op1, scode, NULL_RTX, mode,
+ 			       code != scode, label);
+       insn = get_last_insn ();
+ 
+       JUMP_LABEL (insn) = label;
+       LABEL_NUSES (label) += 1;
+ 
+       return insn;
+     }
+   else if (t == const_true_rtx)
+     {
+       insn = emit_jump_insn (gen_jump (label));
+       emit_barrier ();
+       JUMP_LABEL (insn) = label;
+       LABEL_NUSES (label) += 1;
+     }
+ 
+   return NULL_RTX;
  }
  
  /* Return true if the loop can be safely, and profitably, preconditioned


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]