This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[csl-arm-branch]: Better bypasses for 1026 and 1136 processors
- From: Nathan Sidwell <nathan at codesourcery dot com>
- To: Richard Earnshaw <rearnsha at arm dot com>
- Cc: gcc-patches at gcc dot gnu dot org, Paul Brook <paul at codesourcery dot com>
- Date: Tue, 16 Dec 2003 19:15:04 +0000
- Subject: [csl-arm-branch]: Better bypasses for 1026 and 1136 processors
- Organization: Codesourcery LLC
Richard,
this patch implements the alu, alu_shift and alu_shift_reg instruction type
attributes we discussed. All seems to be good with this. I also realized
I'd not tested the previous patch properly, which I have done with this
one, hence the robustifying on the scheduler predicates.
built and tested with an arm-unknown-linux cross compiler.
nathan
--
Nathan Sidwell :: http://www.codesourcery.com :: CodeSourcery LLC
The voices in my head said this was stupid too
nathan@codesourcery.com :: http://www.planetfall.pwp.blueyonder.co.uk
2003-12-16 Nathan Sidwell <nathan@codesourcery.com>
* config/arm/arm-protos.h (arm_no_early_alu_shift_value_dep): Declare.
* config/arm/arm.c (arm_adjust_cost): Check shift cost for
TYPE_ALU_SHIFT and TYPE_ALU_SHIFT_REG.
(arm_no_early_store_addr_dep, arm_no_early_alu_shift_dep,
arm_no_early_mul_dep): Correctly deal with conditional execution,
parallels and single shift operations.
(arm_no_early_alu_shift_value_dep): Define.
* arm.md (attr type): Replace 'normal' with 'alu',
'alu_shift' and 'alu_shift_reg'.
(attr core_cycles): Adjust.
(*addsi3_carryin_shift, andsi_not_shiftsi_si, *arm_shiftsi3,
*shiftsi3_compare0, *notsi_shiftsi, *notsi_shiftsi_compare0,
*not_shiftsi_compare0_scratch, *cmpsi_shiftsi, *cmpsi_shiftsi_swp,
*cmpsi_neg_shiftsi, *arith_shiftsi, *arith_shiftsi_compare0,
*arith_shiftsi_compare0_scratch, *sub_shiftsi,
*sub_shiftsi_compare0, *sub_shiftsi_compare0_scratch,
*if_shift_move, *if_move_shift, *if_shift_shift): Set type
attribute appropriately.
* config/arm/arm1026ejs.md (alu_op): Adjust.
(alu_shift_op, alu_shift_reg_op): New.
* config/arm/arm1136.md: Add better bypasses for early
registers. Remove load[234] and store[234] bypasses.
(11_alu_op): Adjust.
(11_alu_shift_op, 11_alu_shift_reg_op): New.
Index: config/arm/arm-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm-protos.h,v
retrieving revision 1.60.4.1
diff -c -3 -p -r1.60.4.1 arm-protos.h
*** config/arm/arm-protos.h 15 Dec 2003 14:36:23 -0000 1.60.4.1
--- config/arm/arm-protos.h 16 Dec 2003 19:08:47 -0000
*************** extern int cirrus_shift_const (rtx, enum
*** 97,102 ****
--- 97,103 ----
extern int cirrus_memory_offset (rtx);
extern int arm_no_early_store_addr_dep (rtx, rtx);
extern int arm_no_early_alu_shift_dep (rtx, rtx);
+ extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx);
extern int symbol_mentioned_p (rtx);
Index: config/arm/arm.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.303.2.5
diff -c -3 -p -r1.303.2.5 arm.c
*** config/arm/arm.c 15 Dec 2003 14:36:26 -0000 1.303.2.5
--- config/arm/arm.c 16 Dec 2003 19:09:18 -0000
*************** arm_adjust_cost (rtx insn, rtx link, rtx
*** 3589,3595 ****
operand for INSN. If we have a shifted input operand and the
instruction we depend on is another ALU instruction, then we may
have to account for an additional stall. */
! if (shift_opnum != 0 && attr_type == TYPE_NORMAL)
{
rtx shifted_operand;
int opno;
--- 3589,3596 ----
operand for INSN. If we have a shifted input operand and the
instruction we depend on is another ALU instruction, then we may
have to account for an additional stall. */
! if (shift_opnum != 0
! && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
{
rtx shifted_operand;
int opno;
*************** arm_output_load_gr (operands)
*** 13225,13248 ****
int
arm_no_early_store_addr_dep (rtx producer, rtx consumer)
{
! rtx value = XEXP (PATTERN (producer), 0);
! rtx addr = XEXP (PATTERN (consumer), 0);
return !reg_overlap_mentioned_p (value, addr);
}
/* Return non-zero if the CONSUMER instruction (an ALU op) does not
! have an early register shift dependency on the result of
! PRODUCER. */
int
arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
{
! rtx value = XEXP (PATTERN (producer), 0);
! rtx early_op = XEXP (XEXP (PATTERN (consumer), 1), 0);
! return (GET_CODE (early_op) != MULT
! || !reg_overlap_mentioned_p (value, early_op));
}
/* Return non-zero if the CONSUMER (a mul or mac op) does not
--- 13226,13311 ----
int
arm_no_early_store_addr_dep (rtx producer, rtx consumer)
{
! rtx value = PATTERN (producer);
! rtx addr = PATTERN (consumer);
+ if (GET_CODE (value) == COND_EXEC)
+ value = XEXP (value, 1);
+ if (GET_CODE (value) == PARALLEL)
+ value = XVECEXP (value, 0, 0);
+ value = XEXP (value, 0);
+ if (GET_CODE (addr) == COND_EXEC)
+ addr = XEXP (addr, 1);
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+ addr = XEXP (addr, 0);
+
return !reg_overlap_mentioned_p (value, addr);
}
/* Return non-zero if the CONSUMER instruction (an ALU op) does not
! have an early register shift value or amount dependency on the
! result of PRODUCER. */
int
arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
{
! rtx value = PATTERN (producer);
! rtx op = PATTERN (consumer);
! rtx early_op;
!
! if (GET_CODE (value) == COND_EXEC)
! value = XEXP (value, 1);
! if (GET_CODE (value) == PARALLEL)
! value = XVECEXP (value, 0, 0);
! value = XEXP (value, 0);
! if (GET_CODE (op) == COND_EXEC)
! op = XEXP (op, 1);
! if (GET_CODE (op) == PARALLEL)
! op = XVECEXP (op, 0, 0);
! op = XEXP (op, 1);
!
! early_op = XEXP (op, 0);
! /* This is either an actual independent shift, or a shift applied to
! the first operand of another operation. We want the whole shift
! operation. */
! if (GET_CODE (early_op) == REG)
! early_op = op;
! return !reg_overlap_mentioned_p (value, early_op);
! }
!
! /* Return non-zero if the CONSUMER instruction (an ALU op) does not
! have an early register shift value dependency on the result of
! PRODUCER. */
!
! int
! arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
! {
! rtx value = PATTERN (producer);
! rtx op = PATTERN (consumer);
! rtx early_op;
!
! if (GET_CODE (value) == COND_EXEC)
! value = XEXP (value, 1);
! if (GET_CODE (value) == PARALLEL)
! value = XVECEXP (value, 0, 0);
! value = XEXP (value, 0);
! if (GET_CODE (op) == COND_EXEC)
! op = XEXP (op, 1);
! if (GET_CODE (op) == PARALLEL)
! op = XVECEXP (op, 0, 0);
! op = XEXP (op, 1);
!
! early_op = XEXP (op, 0);
!
! /* This is either an actual independent shift, or a shift applied to
! the first operand of another operation. We want the value being
! shifted, in either case. */
! if (GET_CODE (early_op) != REG)
! early_op = XEXP (early_op, 0);
!
! return !reg_overlap_mentioned_p (value, early_op);
}
/* Return non-zero if the CONSUMER (a mul or mac op) does not
*************** arm_no_early_alu_shift_dep (rtx producer
*** 13252,13261 ****
int
arm_no_early_mul_dep (rtx producer, rtx consumer)
{
! rtx value = XEXP (PATTERN (producer), 0);
! rtx early_ops = XEXP (PATTERN (consumer), 1);
! return (GET_CODE (early_ops) == PLUS
! && !reg_overlap_mentioned_p (value, XEXP (early_ops, 0)));
}
--- 13315,13335 ----
int
arm_no_early_mul_dep (rtx producer, rtx consumer)
{
! rtx value = PATTERN (producer);
! rtx op = PATTERN (consumer);
! if (GET_CODE (value) == COND_EXEC)
! value = XEXP (value, 1);
! if (GET_CODE (value) == PARALLEL)
! value = XVECEXP (value, 0, 0);
! value = XEXP (value, 0);
! if (GET_CODE (op) == COND_EXEC)
! op = XEXP (op, 1);
! if (GET_CODE (op) == PARALLEL)
! op = XVECEXP (op, 0, 0);
! op = XEXP (op, 1);
!
! return (GET_CODE (op) == PLUS
! && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
}
Index: config/arm/arm.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.md,v
retrieving revision 1.145.2.4
diff -c -3 -p -r1.145.2.4 arm.md
*** config/arm/arm.md 10 Dec 2003 12:16:04 -0000 1.145.2.4
--- config/arm/arm.md 16 Dec 2003 19:09:41 -0000
***************
*** 214,224 ****
; mav_dmult Double multiplies (7 cycle)
;
(define_attr "type"
! "normal,mult,block,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith,float_em,f_load,f_store,f_mem_r,r_mem_f,f_2_r,r_2_f,branch,call,load,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult"
(if_then_else
(eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
(const_string "mult")
! (const_string "normal")))
; Load scheduling, set from the arm_ld_sched variable
; initialized by arm_override_options()
--- 214,224 ----
; mav_dmult Double multiplies (7 cycle)
;
(define_attr "type"
! "alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith,float_em,f_load,f_store,f_mem_r,r_mem_f,f_2_r,r_2_f,branch,call,load,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult"
(if_then_else
(eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
(const_string "mult")
! (const_string "alu")))
; Load scheduling, set from the arm_ld_sched variable
; initialized by arm_override_options()
***************
*** 274,280 ****
; than one on the main cpu execution unit.
(define_attr "core_cycles" "single,multi"
(if_then_else (eq_attr "type"
! "normal,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith")
(const_string "single")
(const_string "multi")))
--- 274,280 ----
; than one on the main cpu execution unit.
(define_attr "core_cycles" "single,multi"
(if_then_else (eq_attr "type"
! "alu,alu_shift,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith")
(const_string "single")
(const_string "multi")))
***************
*** 727,733 ****
(match_operand:SI 1 "s_register_operand" ""))))]
"TARGET_ARM"
"adc%?\\t%0, %1, %3%S2"
! [(set_attr "conds" "use")]
)
(define_insn "*addsi3_carryin_alt1"
--- 727,736 ----
(match_operand:SI 1 "s_register_operand" ""))))]
"TARGET_ARM"
"adc%?\\t%0, %1, %3%S2"
! [(set_attr "conds" "use")
! (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*addsi3_carryin_alt1"
***************
*** 1925,1931 ****
"bic%?\\t%0, %1, %2%S4"
[(set_attr "predicable" "yes")
(set_attr "shift" "2")
! ]
)
(define_insn "*andsi_notsi_si_compare0"
--- 1928,1936 ----
"bic%?\\t%0, %1, %2%S4"
[(set_attr "predicable" "yes")
(set_attr "shift" "2")
! (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*andsi_notsi_si_compare0"
***************
*** 2531,2537 ****
"mov%?\\t%0, %1%S3"
[(set_attr "predicable" "yes")
(set_attr "shift" "1")
! ]
)
(define_insn "*shiftsi3_compare0"
--- 2536,2544 ----
"mov%?\\t%0, %1%S3"
[(set_attr "predicable" "yes")
(set_attr "shift" "1")
! (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*shiftsi3_compare0"
***************
*** 2546,2552 ****
"mov%?s\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! ]
)
(define_insn "*shiftsi3_compare0_scratch"
--- 2553,2561 ----
"mov%?s\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*shiftsi3_compare0_scratch"
***************
*** 2559,2566 ****
"TARGET_ARM"
"mov%?s\\t%0, %1%S3"
[(set_attr "conds" "set")
! (set_attr "shift" "1")
! ]
)
(define_insn "*notsi_shiftsi"
--- 2568,2574 ----
"TARGET_ARM"
"mov%?s\\t%0, %1%S3"
[(set_attr "conds" "set")
! (set_attr "shift" "1")]
)
(define_insn "*notsi_shiftsi"
***************
*** 2572,2578 ****
"mvn%?\\t%0, %1%S3"
[(set_attr "predicable" "yes")
(set_attr "shift" "1")
! ]
)
(define_insn "*notsi_shiftsi_compare0"
--- 2580,2588 ----
"mvn%?\\t%0, %1%S3"
[(set_attr "predicable" "yes")
(set_attr "shift" "1")
! (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*notsi_shiftsi_compare0"
***************
*** 2587,2593 ****
"mvn%?s\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! ]
)
(define_insn "*not_shiftsi_compare0_scratch"
--- 2597,2605 ----
"mvn%?s\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*not_shiftsi_compare0_scratch"
***************
*** 2601,2607 ****
"mvn%?s\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! ]
)
;; We don't really have extzv, but defining this using shifts helps
--- 2613,2621 ----
"mvn%?s\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
;; We don't really have extzv, but defining this using shifts helps
***************
*** 6305,6311 ****
"cmp%?\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! ]
)
(define_insn "*cmpsi_shiftsi_swp"
--- 6319,6327 ----
"cmp%?\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*cmpsi_shiftsi_swp"
***************
*** 6318,6324 ****
"cmp%?\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! ]
)
(define_insn "*cmpsi_neg_shiftsi"
--- 6334,6342 ----
"cmp%?\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*cmpsi_neg_shiftsi"
***************
*** 6331,6337 ****
"cmn%?\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! ]
)
;; Cirrus SF compare instruction
--- 6349,6357 ----
"cmn%?\\t%0, %1%S3"
[(set_attr "conds" "set")
(set_attr "shift" "1")
! (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
;; Cirrus SF compare instruction
***************
*** 7490,7496 ****
"%i1%?\\t%0, %2, %4%S3"
[(set_attr "predicable" "yes")
(set_attr "shift" "4")
! ]
)
(define_insn "*arith_shiftsi_compare0"
--- 7510,7518 ----
"%i1%?\\t%0, %2, %4%S3"
[(set_attr "predicable" "yes")
(set_attr "shift" "4")
! (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*arith_shiftsi_compare0"
***************
*** 7508,7514 ****
"%i1%?s\\t%0, %2, %4%S3"
[(set_attr "conds" "set")
(set_attr "shift" "4")
! ]
)
(define_insn "*arith_shiftsi_compare0_scratch"
--- 7530,7538 ----
"%i1%?s\\t%0, %2, %4%S3"
[(set_attr "conds" "set")
(set_attr "shift" "4")
! (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*arith_shiftsi_compare0_scratch"
***************
*** 7524,7530 ****
"%i1%?s\\t%0, %2, %4%S3"
[(set_attr "conds" "set")
(set_attr "shift" "4")
! ]
)
(define_insn "*sub_shiftsi"
--- 7548,7556 ----
"%i1%?s\\t%0, %2, %4%S3"
[(set_attr "conds" "set")
(set_attr "shift" "4")
! (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*sub_shiftsi"
***************
*** 7537,7543 ****
"sub%?\\t%0, %1, %3%S2"
[(set_attr "predicable" "yes")
(set_attr "shift" "3")
! ]
)
(define_insn "*sub_shiftsi_compare0"
--- 7563,7571 ----
"sub%?\\t%0, %1, %3%S2"
[(set_attr "predicable" "yes")
(set_attr "shift" "3")
! (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*sub_shiftsi_compare0"
***************
*** 7554,7561 ****
"TARGET_ARM"
"sub%?s\\t%0, %1, %3%S2"
[(set_attr "conds" "set")
! (set_attr "shift" "3")
! ]
)
(define_insn "*sub_shiftsi_compare0_scratch"
--- 7582,7591 ----
"TARGET_ARM"
"sub%?s\\t%0, %1, %3%S2"
[(set_attr "conds" "set")
! (set_attr "shift" "3")
! (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*sub_shiftsi_compare0_scratch"
***************
*** 7570,7577 ****
"TARGET_ARM"
"sub%?s\\t%0, %1, %3%S2"
[(set_attr "conds" "set")
! (set_attr "shift" "3")
! ]
)
--- 7600,7609 ----
"TARGET_ARM"
"sub%?s\\t%0, %1, %3%S2"
[(set_attr "conds" "set")
! (set_attr "shift" "3")
! (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
***************
*** 8406,8412 ****
mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
[(set_attr "conds" "use")
(set_attr "shift" "2")
! (set_attr "length" "4,8,8")]
)
(define_insn "*ifcompare_move_shift"
--- 8438,8447 ----
mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
[(set_attr "conds" "use")
(set_attr "shift" "2")
! (set_attr "length" "4,8,8")
! (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*ifcompare_move_shift"
***************
*** 8442,8448 ****
mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
[(set_attr "conds" "use")
(set_attr "shift" "2")
! (set_attr "length" "4,8,8")]
)
(define_insn "*ifcompare_shift_shift"
--- 8477,8486 ----
mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
[(set_attr "conds" "use")
(set_attr "shift" "2")
! (set_attr "length" "4,8,8")
! (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*ifcompare_shift_shift"
***************
*** 8479,8485 ****
"mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
[(set_attr "conds" "use")
(set_attr "shift" "1")
! (set_attr "length" "8")]
)
(define_insn "*ifcompare_not_arith"
--- 8517,8528 ----
"mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
[(set_attr "conds" "use")
(set_attr "shift" "1")
! (set_attr "length" "8")
! (set (attr "type") (if_then_else
! (and (match_operand 2 "const_int_operand" "")
! (match_operand 4 "const_int_operand" ""))
! (const_string "alu_shift")
! (const_string "alu_shift_reg")))]
)
(define_insn "*ifcompare_not_arith"
Index: config/arm/arm1026ejs.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/Attic/arm1026ejs.md,v
retrieving revision 1.1.2.3
diff -c -3 -p -r1.1.2.3 arm1026ejs.md
*** config/arm/arm1026ejs.md 15 Dec 2003 14:36:29 -0000 1.1.2.3
--- config/arm/arm1026ejs.md 16 Dec 2003 19:09:41 -0000
***************
*** 64,73 ****
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
(define_insn_reservation "alu_op" 1
(and (eq_attr "tune" "arm1026ejs")
! (eq_attr "type" "normal"))
"a_e,a_m,a_w")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
--- 64,89 ----
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
+ ;; ALU operations with no shifted operand
(define_insn_reservation "alu_op" 1
(and (eq_attr "tune" "arm1026ejs")
! (eq_attr "type" "alu"))
"a_e,a_m,a_w")
+
+ ;; ALU operations with a shift-by-constant operand
+ (define_insn_reservation "alu_shift_op" 1
+ (and (eq_attr "tune" "arm1026ejs")
+ (eq_attr "type" "alu_shift"))
+ "a_e,a_m,a_w")
+
+ ;; ALU operations with a shift-by-register operand
+ ;; These really stall in the decoder, in order to read
+ ;; the shift value in a second cycle. Pretend we take two cycles in
+ ;; the execute stage.
+ (define_insn_reservation "alu_shift_reg_op" 2
+ (and (eq_attr "tune" "arm1026ejs")
+ (eq_attr "type" "alu_shift_reg"))
+ "a_e*2,a_m,a_w")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
Index: config/arm/arm1136jfs.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/Attic/arm1136jfs.md,v
retrieving revision 1.1.2.2
diff -c -3 -p -r1.1.2.2 arm1136jfs.md
*** config/arm/arm1136jfs.md 15 Dec 2003 14:36:29 -0000 1.1.2.2
--- config/arm/arm1136jfs.md 16 Dec 2003 19:09:42 -0000
***************
*** 73,86 ****
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modelled here.
(define_insn_reservation "11_alu_op" 2
(and (eq_attr "tune" "arm1136js,arm1136jfs")
! (eq_attr "type" "normal"))
"e_1,e_2,e_3,e_wb")
;; alu_ops can start sooner, if there is no shifter dependency
! (define_bypass 1 "11_alu_op" "11_alu_op" "arm_no_early_alu_shift_dep")
! (define_bypass 1 "11_alu_op"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
--- 73,121 ----
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modelled here.
+ ;; ALU operations with no shifted operand
(define_insn_reservation "11_alu_op" 2
(and (eq_attr "tune" "arm1136js,arm1136jfs")
! (eq_attr "type" "alu"))
"e_1,e_2,e_3,e_wb")
+ ;; ALU operations with a shift-by-constant operand
+ (define_insn_reservation "11_alu_shift_op" 2
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+ (eq_attr "type" "alu_shift"))
+ "e_1,e_2,e_3,e_wb")
+
+ ;; ALU operations with a shift-by-register operand
+ ;; These really stall in the decoder, in order to read
+ ;; the shift value in a second cycle. Pretend we take two cycles in
+ ;; the shift stage.
+ (define_insn_reservation "11_alu_shift_reg_op" 3
+ (and (eq_attr "tune" "arm1136js,arm1136jfs")
+ (eq_attr "type" "alu_shift_reg"))
+ "e_1*2,e_2,e_3,e_wb")
+
;; alu_ops can start sooner, if there is no shifter dependency
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! "11_alu_op")
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! "11_alu_shift_op"
! "arm_no_early_alu_shift_value_dep")
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! "11_alu_shift_reg_op"
! "arm_no_early_alu_shift_dep")
! (define_bypass 2 "11_alu_shift_reg_op"
! "11_alu_op")
! (define_bypass 2 "11_alu_shift_reg_op"
! "11_alu_shift_op"
! "arm_no_early_alu_shift_value_dep")
! (define_bypass 2 "11_alu_shift_reg_op"
! "11_alu_shift_reg_op"
! "arm_no_early_alu_shift_dep")
!
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
! "arm_no_early_mul_dep")
! (define_bypass 2 "11_alu_shift_reg_op"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
***************
*** 107,115 ****
(define_bypass 3 "11_mult1,11_mult2"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
! (define_bypass 3 "11_mult1,11_mult2" "11_alu_op"
"arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_mult1,11_mult2" "11_store1,11_store2,11_store34"
"arm_no_early_store_addr_dep")
;; Signed and unsigned multiply long results are available across two cycles;
--- 142,157 ----
(define_bypass 3 "11_mult1,11_mult2"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
! (define_bypass 3 "11_mult1,11_mult2"
! "11_alu_op")
! (define_bypass 3 "11_mult1,11_mult2"
! "11_alu_shift_op"
! "arm_no_early_alu_shift_value_dep")
! (define_bypass 3 "11_mult1,11_mult2"
! "11_alu_shift_reg_op"
"arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_mult1,11_mult2"
! "11_store1"
"arm_no_early_store_addr_dep")
;; Signed and unsigned multiply long results are available across two cycles;
***************
*** 131,139 ****
(define_bypass 4 "11_mult3,11_mult4"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
! (define_bypass 4 "11_mult3,11_mult4" "11_alu_op"
"arm_no_early_alu_shift_dep")
! (define_bypass 4 "11_mult3,11_mult4" "11_store1,11_store2,11_store34"
"arm_no_early_store_addr_dep")
;; Various 16x16->32 multiplies and multiply-accumulates, using combinations
--- 173,188 ----
(define_bypass 4 "11_mult3,11_mult4"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
! (define_bypass 4 "11_mult3,11_mult4"
! "11_alu_op")
! (define_bypass 4 "11_mult3,11_mult4"
! "11_alu_shift_op"
! "arm_no_early_alu_shift_value_dep")
! (define_bypass 4 "11_mult3,11_mult4"
! "11_alu_shift_reg_op"
"arm_no_early_alu_shift_dep")
! (define_bypass 4 "11_mult3,11_mult4"
! "11_store1"
"arm_no_early_store_addr_dep")
;; Various 16x16->32 multiplies and multiply-accumulates, using combinations
***************
*** 148,159 ****
(define_bypass 2 "11_mult5"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
! (define_bypass 2 "11_mult5" "11_alu_op"
"arm_no_early_alu_shift_dep")
! (define_bypass 2 "11_mult5" "11_store1,11_store2,11_store34"
"arm_no_early_store_addr_dep")
-
;; The same idea, then the 32-bit result is added to a 64-bit quantity.
(define_insn_reservation "11_mult6" 4
(and (eq_attr "tune" "arm1136js,arm1136jfs")
--- 197,214 ----
(define_bypass 2 "11_mult5"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
! (define_bypass 2 "11_mult5"
! "11_alu_op")
! (define_bypass 2 "11_mult5"
! "11_alu_shift_op"
! "arm_no_early_alu_shift_value_dep")
! (define_bypass 2 "11_mult5"
! "11_alu_shift_reg_op"
"arm_no_early_alu_shift_dep")
! (define_bypass 2 "11_mult5"
! "11_store1"
"arm_no_early_store_addr_dep")
;; The same idea, then the 32-bit result is added to a 64-bit quantity.
(define_insn_reservation "11_mult6" 4
(and (eq_attr "tune" "arm1136js,arm1136jfs")
***************
*** 170,178 ****
(define_bypass 3 "11_mult6,11_mult7"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
! (define_bypass 3 "11_mult6,11_mult7" "11_alu_op"
"arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_mult6,11_mult7" "11_store1,11_store2,11_store34"
"arm_no_early_store_addr_dep")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
--- 225,240 ----
(define_bypass 3 "11_mult6,11_mult7"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
! (define_bypass 3 "11_mult6,11_mult7"
! "11_alu_op")
! (define_bypass 3 "11_mult6,11_mult7"
! "11_alu_shift_op"
! "arm_no_early_alu_shift_value_dep")
! (define_bypass 3 "11_mult6,11_mult7"
! "11_alu_shift_reg_op"
"arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_mult6,11_mult7"
! "11_store1"
"arm_no_early_store_addr_dep")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
***************
*** 201,209 ****
;; Branches are predicted. A correctly predicted branch will be no
;; cost, but we're conservative here, and use the timings a
;; late-register would give us.
! (define_bypass 1 "11_alu_op" "11_branches")
! (define_bypass 2 "11_load1,11_load2" "11_branches")
! (define_bypass 3 "11_load34" "11_branches")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
--- 263,276 ----
;; Branches are predicted. A correctly predicted branch will be no
;; cost, but we're conservative here, and use the timings a
;; late-register would give us.
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! "11_branches")
! (define_bypass 2 "11_alu_shift_reg_op"
! "11_branches")
! (define_bypass 2 "11_load1,11_load2"
! "11_branches")
! (define_bypass 3 "11_load34"
! "11_branches")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
***************
*** 253,281 ****
;; A store can start immediately after an alu op, if that alu op does
;; not provide part of the address to access.
! (define_bypass 1 "11_alu_op" "11_store1,11_store2,11_store34"
"arm_no_early_store_addr_dep")
;; An alu op can start sooner after a load, if that alu op does not
;; have an early register dependancy on the load
! (define_bypass 2 "11_load1,11_load2" "11_alu_op"
! "arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_load34" "11_alu_op"
"arm_no_early_alu_shift_dep")
;; A mul op can start sooner after a load, if that mul op does not
;; have an early multipl dependency
! (define_bypass 2 "11_load1,11_load2"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
(define_bypass 3 "11_load34"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
-
;; A store can start sooner after a load, if that load does not
;; produce part of the address to access
! (define_bypass 2 "11_load1,11_load2" "11_store1,11_store2,11_store34"
! "arm_no_early_store_addr_dep")
! (define_bypass 3 "11_load34" "11_store1,11_store2,11_store34"
"arm_no_early_store_addr_dep")
--- 320,354 ----
;; A store can start immediately after an alu op, if that alu op does
;; not provide part of the address to access.
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! "11_store1"
! "arm_no_early_store_addr_dep")
! (define_bypass 2 "11_alu_shift_reg_op"
! "11_store1"
"arm_no_early_store_addr_dep")
;; An alu op can start sooner after a load, if that alu op does not
;; have an early register dependancy on the load
! (define_bypass 2 "11_load1"
! "11_alu_op")
! (define_bypass 2 "11_load1"
! "11_alu_shift_op"
! "arm_no_early_alu_shift_value_dep")
! (define_bypass 2 "11_load1"
! "11_alu_shift_reg_op"
"arm_no_early_alu_shift_dep")
;; A mul op can start sooner after a load, if that mul op does not
;; have an early multipl dependency
! (define_bypass 2 "11_load1"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
(define_bypass 3 "11_load34"
"11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
"arm_no_early_mul_dep")
;; A store can start sooner after a load, if that load does not
;; produce part of the address to access
! (define_bypass 2 "11_load1"
! "11_store1"
"arm_no_early_store_addr_dep")