PATCH: complete cstore support and implement for Thumb
Richard Earnshaw
Richard.Earnshaw@buzzard.freeserve.co.uk
Tue Nov 14 23:36:00 GMT 2006
A long time ago, when bernds originally added the support for the cbranch
insns, he did most of the work to enable implementation of cstore insns as
an equivalent to the scc insns that are available with normal flag setting
instruction expanders.
This patch completes the support for cstore insns and adds some initial
code to exploit this for Thumb. The result is about 0.2 reduction in code
size when optimizing for size, and the elimination of branches should
improve performance slightly too, though I have not measured this.
I wrote this patch during the 4.1 development phase and tested it at the
time by bootstrapping in Thumb mode. Unfortunately, I can no-longer
repeat that native test now because GMP does not support interworking in
its hand-coded assembly routines (this is a nasty gotcha of using GMP, we
now dependent on the processor variants that it supports).
Anyway, retested on an arm-eabi cross: gcc.dg/tree-ssa/20040204-1.c moves
from being XPASS to XFAIL, but this is to be expected since I've adjusted
BRANCH_COST for the thumb instruction set.
Committed to trunk.
2006-11-14 Richard Earnshaw <rearnsha@arm.com>
* expmed.c (emit_store_flag_1): New function.
(emit_store_flag): Call it. If we can't find a suitable scc insn,
try a cstore insn.
* expr.c (do_store_flag): If we can't find a scc insn, try cstore.
Use do_compare_rtx_and_jump.
* arm.h (BRANCH_COST): Increase to 2 on Thumb.
* arm.md (cstoresi4): New define_expand.
(cstoresi_eq0_thumb, cstoresi_ne0_thumb): Likewise.
(cstoresi_eq0_thumb_insn, cstore_ne0_thumb_insn): New patterns.
(cstoresi_nltu_thumb, thumb_addsi3_addgeu): New patterns.
-------------- next part --------------
*** config/arm/arm.h (revision 118864)
--- config/arm/arm.h (local)
*************** do { \
*** 2091,2097 ****
/* Try to generate sequences that don't involve branches, we can then use
conditional instructions */
#define BRANCH_COST \
! (TARGET_ARM ? 4 : (optimize > 1 ? 1 : 0))
/* Position Independent Code. */
/* We decide which register to use based on the compilation options and
--- 2091,2097 ----
/* Try to generate sequences that don't involve branches, we can then use
conditional instructions */
#define BRANCH_COST \
! (TARGET_ARM ? 4 : (optimize > 0 ? 2 : 0))
/* Position Independent Code. */
/* We decide which register to use based on the compilation options and
*** config/arm/arm.md (revision 118864)
--- config/arm/arm.md (local)
*************** (define_insn "*mov_notscc"
*** 7460,7465 ****
--- 7460,7657 ----
(set_attr "length" "8")]
)
+ (define_expand "cstoresi4"
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (match_operator:SI 1 "arm_comparison_operator"
+ [(match_operand:SI 2 "s_register_operand" "")
+ (match_operand:SI 3 "reg_or_int_operand" "")]))]
+ "TARGET_THUMB"
+ "{
+ rtx op3, scratch, scratch2;
+
+ if (operands[3] == const0_rtx)
+ {
+ switch (GET_CODE (operands[1]))
+ {
+ case EQ:
+ emit_insn (gen_cstoresi_eq0_thumb (operands[0], operands[2]));
+ break;
+
+ case NE:
+ emit_insn (gen_cstoresi_ne0_thumb (operands[0], operands[2]));
+ break;
+
+ case LE:
+ scratch = expand_binop (SImode, add_optab, operands[2], constm1_rtx,
+ NULL_RTX, 0, OPTAB_WIDEN);
+ scratch = expand_binop (SImode, ior_optab, operands[2], scratch,
+ NULL_RTX, 0, OPTAB_WIDEN);
+ expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
+ operands[0], 1, OPTAB_WIDEN);
+ break;
+
+ case GE:
+ scratch = expand_unop (SImode, one_cmpl_optab, operands[2],
+ NULL_RTX, 1);
+ expand_binop (SImode, lshr_optab, scratch, GEN_INT (31),
+ NULL_RTX, 1, OPTAB_WIDEN);
+ break;
+
+ case GT:
+ scratch = expand_binop (SImode, ashr_optab, operands[2],
+ GEN_INT (31), NULL_RTX, 0, OPTAB_WIDEN);
+ scratch = expand_binop (SImode, sub_optab, scratch, operands[2],
+ NULL_RTX, 0, OPTAB_WIDEN);
+ expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), operands[0],
+ 0, OPTAB_WIDEN);
+ break;
+
+ /* LT is handled by generic code. No need for unsigned with 0. */
+ default:
+ FAIL;
+ }
+ DONE;
+ }
+
+ switch (GET_CODE (operands[1]))
+ {
+ case EQ:
+ scratch = expand_binop (SImode, sub_optab, operands[2], operands[3],
+ NULL_RTX, 0, OPTAB_WIDEN);
+ emit_insn (gen_cstoresi_eq0_thumb (operands[0], scratch));
+ break;
+
+ case NE:
+ scratch = expand_binop (SImode, sub_optab, operands[2], operands[3],
+ NULL_RTX, 0, OPTAB_WIDEN);
+ emit_insn (gen_cstoresi_ne0_thumb (operands[0], scratch));
+ break;
+
+ case LE:
+ op3 = force_reg (SImode, operands[3]);
+
+ scratch = expand_binop (SImode, lshr_optab, operands[2], GEN_INT (31),
+ NULL_RTX, 1, OPTAB_WIDEN);
+ scratch2 = expand_binop (SImode, ashr_optab, op3, GEN_INT (31),
+ NULL_RTX, 0, OPTAB_WIDEN);
+ emit_insn (gen_thumb_addsi3_addgeu (operands[0], scratch, scratch2,
+ op3, operands[2]));
+ break;
+
+ case GE:
+ op3 = operands[3];
+ if (!thumb_cmp_operand (op3, SImode))
+ op3 = force_reg (SImode, op3);
+ scratch = expand_binop (SImode, ashr_optab, operands[2], GEN_INT (31),
+ NULL_RTX, 0, OPTAB_WIDEN);
+ scratch2 = expand_binop (SImode, lshr_optab, op3, GEN_INT (31),
+ NULL_RTX, 1, OPTAB_WIDEN);
+ emit_insn (gen_thumb_addsi3_addgeu (operands[0], scratch, scratch2,
+ operands[2], op3));
+ break;
+
+ case LEU:
+ op3 = force_reg (SImode, operands[3]);
+ scratch = force_reg (SImode, const0_rtx);
+ emit_insn (gen_thumb_addsi3_addgeu (operands[0], scratch, scratch,
+ op3, operands[2]));
+ break;
+
+ case GEU:
+ op3 = operands[3];
+ if (!thumb_cmp_operand (op3, SImode))
+ op3 = force_reg (SImode, op3);
+ scratch = force_reg (SImode, const0_rtx);
+ emit_insn (gen_thumb_addsi3_addgeu (operands[0], scratch, scratch,
+ operands[2], op3));
+ break;
+
+ case LTU:
+ op3 = operands[3];
+ if (!thumb_cmp_operand (op3, SImode))
+ op3 = force_reg (SImode, op3);
+ scratch = gen_reg_rtx (SImode);
+ emit_insn (gen_cstoresi_nltu_thumb (scratch, operands[2], op3));
+ emit_insn (gen_negsi2 (operands[0], scratch));
+ break;
+
+ case GTU:
+ op3 = force_reg (SImode, operands[3]);
+ scratch = gen_reg_rtx (SImode);
+ emit_insn (gen_cstoresi_nltu_thumb (scratch, op3, operands[2]));
+ emit_insn (gen_negsi2 (operands[0], scratch));
+ break;
+
+ /* No good sequences for GT, LT. */
+ default:
+ FAIL;
+ }
+ DONE;
+ }")
+
+ (define_expand "cstoresi_eq0_thumb"
+ [(parallel
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (eq:SI (match_operand:SI 1 "s_register_operand" "")
+ (const_int 0)))
+ (clobber (match_dup:SI 2))])]
+ "TARGET_THUMB"
+ "operands[2] = gen_reg_rtx (SImode);"
+ )
+
+ (define_expand "cstoresi_ne0_thumb"
+ [(parallel
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (ne:SI (match_operand:SI 1 "s_register_operand" "")
+ (const_int 0)))
+ (clobber (match_dup:SI 2))])]
+ "TARGET_THUMB"
+ "operands[2] = gen_reg_rtx (SImode);"
+ )
+
+ (define_insn "*cstoresi_eq0_thumb_insn"
+ [(set (match_operand:SI 0 "s_register_operand" "=&l,l")
+ (eq:SI (match_operand:SI 1 "s_register_operand" "l,0")
+ (const_int 0)))
+ (clobber (match_operand:SI 2 "s_register_operand" "=X,l"))]
+ "TARGET_THUMB"
+ "@
+ neg\\t%0, %1\;adc\\t%0, %0, %1
+ neg\\t%2, %1\;adc\\t%0, %1, %2"
+ [(set_attr "length" "4")]
+ )
+
+ (define_insn "*cstoresi_ne0_thumb_insn"
+ [(set (match_operand:SI 0 "s_register_operand" "=l")
+ (ne:SI (match_operand:SI 1 "s_register_operand" "0")
+ (const_int 0)))
+ (clobber (match_operand:SI 2 "s_register_operand" "=l"))]
+ "TARGET_THUMB"
+ "sub\\t%2, %1, #1\;sbc\\t%0, %1, %2"
+ [(set_attr "length" "4")]
+ )
+
+ (define_insn "cstoresi_nltu_thumb"
+ [(set (match_operand:SI 0 "s_register_operand" "=l,l")
+ (neg:SI (gtu:SI (match_operand:SI 1 "s_register_operand" "l,*h")
+ (match_operand:SI 2 "thumb_cmp_operand" "lI*h,*r"))))]
+ "TARGET_THUMB"
+ "cmp\\t%1, %2\;sbc\\t%0, %0, %0"
+ [(set_attr "length" "4")]
+ )
+
+ ;; Used as part of the expansion of thumb les sequence.
+ (define_insn "thumb_addsi3_addgeu"
+ [(set (match_operand:SI 0 "s_register_operand" "=l")
+ (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%0")
+ (match_operand:SI 2 "s_register_operand" "l"))
+ (geu:SI (match_operand:SI 3 "s_register_operand" "l")
+ (match_operand:SI 4 "thumb_cmp_operand" "lI"))))]
+ "TARGET_THUMB"
+ "cmp\\t%3, %4\;adc\\t%0, %1, %2"
+ [(set_attr "length" "4")]
+ )
+
;; Conditional move insns
*** expmed.c (revision 118864)
--- expmed.c (local)
*************** expand_and (enum machine_mode mode, rtx
*** 5091,5096 ****
--- 5091,5167 ----
return target;
}
+ /* Helper function for emit_store_flag. */
+ static rtx
+ emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
+ int normalizep)
+ {
+ rtx op0;
+ enum machine_mode target_mode = GET_MODE (target);
+
+ /* If we are converting to a wider mode, first convert to
+ TARGET_MODE, then normalize. This produces better combining
+ opportunities on machines that have a SIGN_EXTRACT when we are
+ testing a single bit. This mostly benefits the 68k.
+
+ If STORE_FLAG_VALUE does not have the sign bit set when
+ interpreted in MODE, we can do this conversion as unsigned, which
+ is usually more efficient. */
+ if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
+ {
+ convert_move (target, subtarget,
+ (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
+ && 0 == (STORE_FLAG_VALUE
+ & ((HOST_WIDE_INT) 1
+ << (GET_MODE_BITSIZE (mode) -1))));
+ op0 = target;
+ mode = target_mode;
+ }
+ else
+ op0 = subtarget;
+
+ /* If we want to keep subexpressions around, don't reuse our last
+ target. */
+ if (optimize)
+ subtarget = 0;
+
+ /* Now normalize to the proper value in MODE. Sometimes we don't
+ have to do anything. */
+ if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
+ ;
+ /* STORE_FLAG_VALUE might be the most negative number, so write
+ the comparison this way to avoid a compiler-time warning. */
+ else if (- normalizep == STORE_FLAG_VALUE)
+ op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
+
+ /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
+ it hard to use a value of just the sign bit due to ANSI integer
+ constant typing rules. */
+ else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
+ && (STORE_FLAG_VALUE
+ & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
+ op0 = expand_shift (RSHIFT_EXPR, mode, op0,
+ size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
+ normalizep == 1);
+ else
+ {
+ gcc_assert (STORE_FLAG_VALUE & 1);
+
+ op0 = expand_and (mode, op0, const1_rtx, subtarget);
+ if (normalizep == -1)
+ op0 = expand_unop (mode, neg_optab, op0, op0, 0);
+ }
+
+ /* If we were converting to a smaller mode, do the conversion now. */
+ if (target_mode != mode)
+ {
+ convert_move (target, op0, 0);
+ return target;
+ }
+ else
+ return op0;
+ }
+
/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
and storing in TARGET. Normally return TARGET.
Return 0 if that cannot be done.
*************** emit_store_flag (rtx target, enum rtx_co
*** 5180,5191 ****
{
rtx op00, op01, op0both;
! /* Do a logical OR or AND of the two words and compare the result. */
op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
op0both = expand_binop (word_mode,
op1 == const0_rtx ? ior_optab : and_optab,
! op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
if (op0both != 0)
return emit_store_flag (target, code, op0both, op1, word_mode,
--- 5251,5264 ----
{
rtx op00, op01, op0both;
! /* Do a logical OR or AND of the two words and compare the
! result. */
op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
op0both = expand_binop (word_mode,
op1 == const0_rtx ? ior_optab : and_optab,
! op00, op01, NULL_RTX, unsignedp,
! OPTAB_DIRECT);
if (op0both != 0)
return emit_store_flag (target, code, op0both, op1, word_mode,
*************** emit_store_flag (rtx target, enum rtx_co
*** 5197,5211 ****
/* If testing the sign bit, can just test on high word. */
op0h = simplify_gen_subreg (word_mode, op0, mode,
! subreg_highpart_offset (word_mode, mode));
return emit_store_flag (target, code, op0h, op1, word_mode,
unsignedp, normalizep);
}
}
- /* From now on, we won't change CODE, so set ICODE now. */
- icode = setcc_gen_code[(int) code];
-
/* If this is A < 0 or A >= 0, we can do this by taking the ones
complement of A (for GE) and shifting the sign bit to the low bit. */
if (op1 == const0_rtx && (code == LT || code == GE)
--- 5270,5282 ----
/* If testing the sign bit, can just test on high word. */
op0h = simplify_gen_subreg (word_mode, op0, mode,
! subreg_highpart_offset (word_mode,
! mode));
return emit_store_flag (target, code, op0h, op1, word_mode,
unsignedp, normalizep);
}
}
/* If this is A < 0 or A >= 0, we can do this by taking the ones
complement of A (for GE) and shifting the sign bit to the low bit. */
if (op1 == const0_rtx && (code == LT || code == GE)
*************** emit_store_flag (rtx target, enum rtx_co
*** 5213,5219 ****
&& (normalizep || STORE_FLAG_VALUE == 1
|| (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
&& ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
! == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
{
subtarget = target;
--- 5284,5291 ----
&& (normalizep || STORE_FLAG_VALUE == 1
|| (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
&& ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
! == ((unsigned HOST_WIDE_INT) 1
! << (GET_MODE_BITSIZE (mode) - 1))))))
{
subtarget = target;
*************** emit_store_flag (rtx target, enum rtx_co
*** 5248,5253 ****
--- 5320,5327 ----
return op0;
}
+ icode = setcc_gen_code[(int) code];
+
if (icode != CODE_FOR_nothing)
{
insn_operand_predicate_fn pred;
*************** emit_store_flag (rtx target, enum rtx_co
*** 5305,5376 ****
if (pattern)
{
emit_insn (pattern);
! /* If we are converting to a wider mode, first convert to
! TARGET_MODE, then normalize. This produces better combining
! opportunities on machines that have a SIGN_EXTRACT when we are
! testing a single bit. This mostly benefits the 68k.
!
! If STORE_FLAG_VALUE does not have the sign bit set when
! interpreted in COMPARE_MODE, we can do this conversion as
! unsigned, which is usually more efficient. */
! if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
! {
! convert_move (target, subtarget,
! (GET_MODE_BITSIZE (compare_mode)
! <= HOST_BITS_PER_WIDE_INT)
! && 0 == (STORE_FLAG_VALUE
! & ((HOST_WIDE_INT) 1
! << (GET_MODE_BITSIZE (compare_mode) -1))));
! op0 = target;
! compare_mode = target_mode;
! }
! else
! op0 = subtarget;
! /* If we want to keep subexpressions around, don't reuse our
! last target. */
! if (optimize)
! subtarget = 0;
! /* Now normalize to the proper value in COMPARE_MODE. Sometimes
! we don't have to do anything. */
! if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
! ;
! /* STORE_FLAG_VALUE might be the most negative number, so write
! the comparison this way to avoid a compiler-time warning. */
! else if (- normalizep == STORE_FLAG_VALUE)
! op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
!
! /* We don't want to use STORE_FLAG_VALUE < 0 below since this
! makes it hard to use a value of just the sign bit due to
! ANSI integer constant typing rules. */
! else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
! && (STORE_FLAG_VALUE
! & ((HOST_WIDE_INT) 1
! << (GET_MODE_BITSIZE (compare_mode) - 1))))
! op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
! size_int (GET_MODE_BITSIZE (compare_mode) - 1),
! subtarget, normalizep == 1);
! else
{
! gcc_assert (STORE_FLAG_VALUE & 1);
!
! op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
! if (normalizep == -1)
! op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
}
! /* If we were converting to a smaller mode, do the
! conversion now. */
! if (target_mode != compare_mode)
{
! convert_move (target, op0, 0);
! return target;
}
- else
- return op0;
}
}
--- 5379,5443 ----
if (pattern)
{
emit_insn (pattern);
+ return emit_store_flag_1 (target, subtarget, compare_mode,
+ normalizep);
+ }
+ }
+ else
+ {
+ /* We don't have an scc insn, so try a cstore insn. */
! for (compare_mode = mode; compare_mode != VOIDmode;
! compare_mode = GET_MODE_WIDER_MODE (compare_mode))
! {
! icode = cstore_optab->handlers[(int) compare_mode].insn_code;
! if (icode != CODE_FOR_nothing)
! break;
! }
! if (icode != CODE_FOR_nothing)
! {
! enum machine_mode result_mode
! = insn_data[(int) icode].operand[0].mode;
! rtx cstore_op0 = op0;
! rtx cstore_op1 = op1;
! do_pending_stack_adjust ();
! last = get_last_insn ();
! if (compare_mode != mode)
{
! cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
! unsignedp);
! cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
! unsignedp);
}
+
+ if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
+ compare_mode))
+ cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
+
+ if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
+ compare_mode))
+ cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
+
+ comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
+ cstore_op1);
+ subtarget = target;
+
+ if (optimize || !(insn_data[(int) icode].operand[0].predicate
+ (subtarget, result_mode)))
+ subtarget = gen_reg_rtx (result_mode);
! pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
! cstore_op1);
!
! if (pattern)
{
! emit_insn (pattern);
! return emit_store_flag_1 (target, subtarget, result_mode,
! normalizep);
}
}
}
*** expr.c (revision 118864)
--- expr.c (local)
*************** do_store_flag (tree exp, rtx target, enu
*** 9155,9160 ****
--- 9155,9171 ----
return 0;
icode = setcc_gen_code[(int) code];
+
+ if (icode == CODE_FOR_nothing)
+ {
+ enum machine_mode wmode;
+
+ for (wmode = operand_mode;
+ icode == CODE_FOR_nothing && wmode != VOIDmode;
+ wmode = GET_MODE_WIDER_MODE (wmode))
+ icode = cstore_optab->handlers[(int) wmode].insn_code;
+ }
+
if (icode == CODE_FOR_nothing
|| (only_cheap && insn_data[(int) icode].operand[0].mode != mode))
{
*************** do_store_flag (tree exp, rtx target, enu
*** 9200,9224 ****
target = gen_reg_rtx (GET_MODE (target));
emit_move_insn (target, invert ? const0_rtx : const1_rtx);
- result = compare_from_rtx (op0, op1, code, unsignedp,
- operand_mode, NULL_RTX);
- if (GET_CODE (result) == CONST_INT)
- return (((result == const0_rtx && ! invert)
- || (result != const0_rtx && invert))
- ? const0_rtx : const1_rtx);
-
- /* The code of RESULT may not match CODE if compare_from_rtx
- decided to swap its operands and reverse the original code.
-
- We know that compare_from_rtx returns either a CONST_INT or
- a new comparison code, so it is safe to just extract the
- code from RESULT. */
- code = GET_CODE (result);
-
label = gen_label_rtx ();
! gcc_assert (bcc_gen_fctn[(int) code]);
!
! emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
emit_move_insn (target, invert ? const1_rtx : const0_rtx);
emit_label (label);
--- 9211,9220 ----
target = gen_reg_rtx (GET_MODE (target));
emit_move_insn (target, invert ? const0_rtx : const1_rtx);
label = gen_label_rtx ();
! do_compare_rtx_and_jump (op0, op1, code, unsignedp, operand_mode, NULL_RTX,
! NULL_RTX, label);
!
emit_move_insn (target, invert ? const1_rtx : const0_rtx);
emit_label (label);
More information about the Gcc-patches
mailing list