This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Another performance regression
- From: Richard Henderson <rth at redhat dot com>
- To: Dale Johannesen <dalej at apple dot com>
- Cc: gcc-patches at gcc dot gnu dot org, gcc at gcc dot gnu dot org
- Date: Thu, 26 Sep 2002 15:26:40 -0700
- Subject: Re: Another performance regression
- References: <62C4CADA-D182-11D6-A88A-003065C86F94@apple.com>
On Thu, Sep 26, 2002 at 12:01:38PM -0700, Dale Johannesen wrote:
> So should I fix this by making the unrolling code smarter, in effect
> doing cse's job?
I think so, since the change to loop unrolling order was
also done to get around a performance regression. :-/
> It seems likely Roger Sayle's approach of running
> gcse after loop opts would Just Work. Is that going to go in?
Not for 3.3.
The following appears to do the job. Would you see if this
causes any other problems?
r~
* unroll.c (simplify_cmp_and_jump_insns): New.
(unroll_loop): Use it. Use simplify_gen_foo+force_operand
instead of expand_simple_foo.
Index: unroll.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/unroll.c,v
retrieving revision 1.176
diff -c -p -d -r1.176 unroll.c
*** unroll.c 8 Sep 2002 18:32:31 -0000 1.176
--- unroll.c 26 Sep 2002 22:13:46 -0000
*************** static int *splittable_regs_updates;
*** 193,198 ****
--- 193,201 ----
/* Forward declarations. */
+ static rtx simplify_cmp_and_jump_insns PARAMS ((enum rtx_code,
+ enum machine_mode,
+ rtx, rtx, rtx));
static void init_reg_map PARAMS ((struct inline_remap *, int));
static rtx calculate_giv_inc PARAMS ((rtx, rtx, unsigned int));
static rtx initial_reg_note_copy PARAMS ((rtx, struct inline_remap *));
*************** unroll_loop (loop, insn_count, strength_
*** 843,849 ****
&initial_value, &final_value, &increment,
&mode))
{
! rtx diff;
rtx *labels;
int abs_inc, neg_inc;
enum rtx_code cc = loop_info->comparison_code;
--- 846,852 ----
&initial_value, &final_value, &increment,
&mode))
{
! rtx diff, insn;
rtx *labels;
int abs_inc, neg_inc;
enum rtx_code cc = loop_info->comparison_code;
*************** unroll_loop (loop, insn_count, strength_
*** 875,900 ****
start_sequence ();
/* Final value may have form of (PLUS val1 const1_rtx). We need
to convert it into general operand, so compute the real value. */
! if (GET_CODE (final_value) == PLUS)
! {
! final_value = expand_simple_binop (mode, PLUS,
! copy_rtx (XEXP (final_value, 0)),
! copy_rtx (XEXP (final_value, 1)),
! NULL_RTX, 0, OPTAB_LIB_WIDEN);
! }
if (!nonmemory_operand (final_value, VOIDmode))
! final_value = force_reg (mode, copy_rtx (final_value));
/* Calculate the difference between the final and initial values.
Final value may be a (plus (reg x) (const_int 1)) rtx.
- Let the following cse pass simplify this if initial value is
- a constant.
-
- We must copy the final and initial values here to avoid
- improperly shared rtl.
We have to deal with for (i = 0; --i < 6;) type loops.
For such loops the real final value is the first time the
--- 878,897 ----
start_sequence ();
+ /* We must copy the final and initial values here to avoid
+ improperly shared rtl. */
+ final_value = copy_rtx (final_value);
+ initial_value = copy_rtx (initial_value);
+
/* Final value may have form of (PLUS val1 const1_rtx). We need
to convert it into general operand, so compute the real value. */
! final_value = force_operand (final_value, NULL_RTX);
if (!nonmemory_operand (final_value, VOIDmode))
! final_value = force_reg (mode, final_value);
/* Calculate the difference between the final and initial values.
Final value may be a (plus (reg x) (const_int 1)) rtx.
We have to deal with for (i = 0; --i < 6;) type loops.
For such loops the real final value is the first time the
*************** unroll_loop (loop, insn_count, strength_
*** 907,924 ****
so we can pretend that the overflow value is 0/~0. */
if (cc == NE || less_p != neg_inc)
! diff = expand_simple_binop (mode, MINUS, final_value,
! copy_rtx (initial_value), NULL_RTX, 0,
! OPTAB_LIB_WIDEN);
else
! diff = expand_simple_unop (mode, neg_inc ? NOT : NEG,
! copy_rtx (initial_value), NULL_RTX, 0);
/* Now calculate (diff % (unroll * abs (increment))) by using an
and instruction. */
! diff = expand_simple_binop (GET_MODE (diff), AND, diff,
! GEN_INT (unroll_number * abs_inc - 1),
! NULL_RTX, 0, OPTAB_LIB_WIDEN);
/* Now emit a sequence of branches to jump to the proper precond
loop entry point. */
--- 904,921 ----
so we can pretend that the overflow value is 0/~0. */
if (cc == NE || less_p != neg_inc)
! diff = simplify_gen_binary (MINUS, mode, final_value,
! initial_value);
else
! diff = simplify_gen_unary (neg_inc ? NOT : NEG, mode,
! initial_value, mode);
! diff = force_operand (diff, NULL_RTX);
/* Now calculate (diff % (unroll * abs (increment))) by using an
and instruction. */
! diff = simplify_gen_binary (AND, mode, diff,
! GEN_INT (unroll_number*abs_inc - 1));
! diff = force_operand (diff, NULL_RTX);
/* Now emit a sequence of branches to jump to the proper precond
loop entry point. */
*************** unroll_loop (loop, insn_count, strength_
*** 936,953 ****
if (cc != NE)
{
rtx incremented_initval;
! incremented_initval = expand_simple_binop (mode, PLUS,
! initial_value,
! increment,
! NULL_RTX, 0,
! OPTAB_LIB_WIDEN);
! emit_cmp_and_jump_insns (incremented_initval, final_value,
! less_p ? GE : LE, NULL_RTX,
! mode, unsigned_p, labels[1]);
! predict_insn_def (get_last_insn (), PRED_LOOP_CONDITION,
! TAKEN);
! JUMP_LABEL (get_last_insn ()) = labels[1];
! LABEL_NUSES (labels[1])++;
}
/* Assuming the unroll_number is 4, and the increment is 2, then
--- 933,954 ----
if (cc != NE)
{
rtx incremented_initval;
! enum rtx_code cmp_code;
!
! incremented_initval
! = simplify_gen_binary (PLUS, mode, initial_value, increment);
! incremented_initval
! = force_operand (incremented_initval, NULL_RTX);
!
! cmp_code = (less_p
! ? (unsigned_p ? GEU : GE)
! : (unsigned_p ? LEU : LE));
!
! insn = simplify_cmp_and_jump_insns (cmp_code, mode,
! incremented_initval,
! final_value, labels[1]);
! if (insn)
! predict_insn_def (insn, PRED_LOOP_CONDITION, TAKEN);
}
/* Assuming the unroll_number is 4, and the increment is 2, then
*************** unroll_loop (loop, insn_count, strength_
*** 986,997 ****
cmp_code = LE;
}
! emit_cmp_and_jump_insns (diff, GEN_INT (abs_inc * cmp_const),
! cmp_code, NULL_RTX, mode, 0, labels[i]);
! JUMP_LABEL (get_last_insn ()) = labels[i];
! LABEL_NUSES (labels[i])++;
! predict_insn (get_last_insn (), PRED_LOOP_PRECONDITIONING,
! REG_BR_PROB_BASE / (unroll_number - i));
}
/* If the increment is greater than one, then we need another branch,
--- 987,998 ----
cmp_code = LE;
}
! insn = simplify_cmp_and_jump_insns (cmp_code, mode, diff,
! GEN_INT (abs_inc*cmp_const),
! labels[i]);
! if (insn)
! predict_insn (insn, PRED_LOOP_PRECONDITIONING,
! REG_BR_PROB_BASE / (unroll_number - i));
}
/* If the increment is greater than one, then we need another branch,
*************** unroll_loop (loop, insn_count, strength_
*** 1019,1028 ****
cmp_code = GE;
}
! emit_cmp_and_jump_insns (diff, GEN_INT (cmp_const), cmp_code,
! NULL_RTX, mode, 0, labels[0]);
! JUMP_LABEL (get_last_insn ()) = labels[0];
! LABEL_NUSES (labels[0])++;
}
sequence = get_insns ();
--- 1020,1027 ----
cmp_code = GE;
}
! simplify_cmp_and_jump_insns (cmp_code, mode, diff,
! GEN_INT (cmp_const), labels[0]);
}
sequence = get_insns ();
*************** unroll_loop (loop, insn_count, strength_
*** 1323,1328 ****
--- 1322,1364 ----
if (map->reg_map)
free (map->reg_map);
free (map);
+ }
+
+ /* A helper function for unroll_loop. Emit a compare and branch to
+ satisfy (CMP OP1 OP2), but pass this through the simplifier first.
+ If the branch turned out to be conditional, return it, otherwise
+ return NULL. */
+
+ static rtx
+ simplify_cmp_and_jump_insns (code, mode, op0, op1, label)
+ enum rtx_code code;
+ enum machine_mode mode;
+ rtx op0, op1, label;
+ {
+ rtx t, insn;
+
+ t = simplify_relational_operation (code, mode, op0, op1);
+ if (!t)
+ {
+ enum rtx_code scode = signed_condition (code);
+ emit_cmp_and_jump_insns (op0, op1, scode, NULL_RTX, mode,
+ code != scode, label);
+ insn = get_last_insn ();
+
+ JUMP_LABEL (insn) = label;
+ LABEL_NUSES (label) += 1;
+
+ return insn;
+ }
+ else if (t == const_true_rtx)
+ {
+ insn = emit_jump_insn (gen_jump (label));
+ emit_barrier ();
+ JUMP_LABEL (insn) = label;
+ LABEL_NUSES (label) += 1;
+ }
+
+ return NULL_RTX;
}
/* Return true if the loop can be safely, and profitably, preconditioned