This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[csl-arm-branch]: Better bypasses for 1026 and 1136 processors


Richard,
this patch implements the alu, alu_shift and alu_shift_reg instruction type
attributes we discussed. All seems to be good with this. I also realized
I'd not tested the previous patch properly, which I have done with this
one, hence the robustifying on the scheduler predicates.

built and tested with an arm-unknown-linux cross compiler.

nathan
--
Nathan Sidwell    ::   http://www.codesourcery.com   ::     CodeSourcery LLC
         The voices in my head said this was stupid too
nathan@codesourcery.com    ::     http://www.planetfall.pwp.blueyonder.co.uk

2003-12-16  Nathan Sidwell  <nathan@codesourcery.com>

	* config/arm/arm-protos.h (arm_no_early_alu_shift_value_dep): Declare.
	* config/arm/arm.c (arm_adjust_cost): Check shift cost for
	TYPE_ALU_SHIFT and TYPE_ALU_SHIFT_REG.
	(arm_no_early_store_addr_dep, arm_no_early_alu_shift_dep,
	arm_no_early_mul_dep): Correctly deal with conditional execution,
	parallels and single shift operations.
	(arm_no_early_alu_shift_value_dep): Define.
	* arm.md (attr type): Replace 'normal' with 'alu',
	'alu_shift' and 'alu_shift_reg'.
	(attr core_cycles): Adjust.
	(*addsi3_carryin_shift, andsi_not_shiftsi_si, *arm_shiftsi3,
	*shiftsi3_compare0, *notsi_shiftsi, *notsi_shiftsi_compare0,
	*not_shiftsi_compare0_scratch, *cmpsi_shiftsi, *cmpsi_shiftsi_swp,
	*cmpsi_neg_shiftsi, *arith_shiftsi, *arith_shiftsi_compare0,
	*arith_shiftsi_compare0_scratch, *sub_shiftsi,
	*sub_shiftsi_compare0, *sub_shiftsi_compare0_scratch,
	*if_shift_move, *if_move_shift, *if_shift_shift): Set type
	attribute appropriately.
	* config/arm/arm1026ejs.md (alu_op): Adjust.
	(alu_shift_op, alu_shift_reg_op): New.
	* config/arm/arm1136.md: Add better bypasses for early
	registers. Remove load[234] and store[234] bypasses.
	(11_alu_op): Adjust.
	(11_alu_shift_op, 11_alu_shift_reg_op): New.

Index: config/arm/arm-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm-protos.h,v
retrieving revision 1.60.4.1
diff -c -3 -p -r1.60.4.1 arm-protos.h
*** config/arm/arm-protos.h	15 Dec 2003 14:36:23 -0000	1.60.4.1
--- config/arm/arm-protos.h	16 Dec 2003 19:08:47 -0000
*************** extern int cirrus_shift_const (rtx, enum
*** 97,102 ****
--- 97,103 ----
  extern int cirrus_memory_offset (rtx);
  extern int arm_no_early_store_addr_dep (rtx, rtx);
  extern int arm_no_early_alu_shift_dep (rtx, rtx);
+ extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
  extern int arm_no_early_mul_dep (rtx, rtx);
  
  extern int symbol_mentioned_p (rtx);
Index: config/arm/arm.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.303.2.5
diff -c -3 -p -r1.303.2.5 arm.c
*** config/arm/arm.c	15 Dec 2003 14:36:26 -0000	1.303.2.5
--- config/arm/arm.c	16 Dec 2003 19:09:18 -0000
*************** arm_adjust_cost (rtx insn, rtx link, rtx
*** 3589,3595 ****
  	 operand for INSN.  If we have a shifted input operand and the
  	 instruction we depend on is another ALU instruction, then we may
  	 have to account for an additional stall.  */
!       if (shift_opnum != 0 && attr_type == TYPE_NORMAL)
  	{
  	  rtx shifted_operand;
  	  int opno;
--- 3589,3596 ----
  	 operand for INSN.  If we have a shifted input operand and the
  	 instruction we depend on is another ALU instruction, then we may
  	 have to account for an additional stall.  */
!       if (shift_opnum != 0
! 	  && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
  	{
  	  rtx shifted_operand;
  	  int opno;
*************** arm_output_load_gr (operands)
*** 13225,13248 ****
  int
  arm_no_early_store_addr_dep (rtx producer, rtx consumer)
  {
!   rtx value = XEXP (PATTERN (producer), 0);
!   rtx addr = XEXP (PATTERN (consumer), 0);
  
    return !reg_overlap_mentioned_p (value, addr);
  }
  
  /* Return non-zero if the CONSUMER instruction (an ALU op) does not
!    have an early register shift dependency on the result of
!    PRODUCER.  */
  
  int
  arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
  {
!   rtx value = XEXP (PATTERN (producer), 0);
!   rtx early_op = XEXP (XEXP (PATTERN (consumer), 1), 0);
  
!   return (GET_CODE (early_op) != MULT
! 	  || !reg_overlap_mentioned_p (value, early_op));
  }
  
  /* Return non-zero if the CONSUMER (a mul or mac op) does not
--- 13226,13311 ----
  int
  arm_no_early_store_addr_dep (rtx producer, rtx consumer)
  {
!   rtx value = PATTERN (producer);
!   rtx addr = PATTERN (consumer);
  
+   if (GET_CODE (value) == COND_EXEC)
+     value = XEXP (value, 1);
+   if (GET_CODE (value) == PARALLEL)
+     value = XVECEXP (value, 0, 0);
+   value = XEXP (value, 0);
+   if (GET_CODE (addr) == COND_EXEC)
+     addr = XEXP (addr, 1);
+   if (GET_CODE (addr) == PARALLEL)
+     addr = XVECEXP (addr, 0, 0);
+   addr = XEXP (addr, 0);
+   
    return !reg_overlap_mentioned_p (value, addr);
  }
  
  /* Return non-zero if the CONSUMER instruction (an ALU op) does not
!    have an early register shift value or amount dependency on the
!    result of PRODUCER.  */
  
  int
  arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
  {
!   rtx value = PATTERN (producer);
!   rtx op = PATTERN (consumer);
!   rtx early_op;
! 
!   if (GET_CODE (value) == COND_EXEC)
!     value = XEXP (value, 1);
!   if (GET_CODE (value) == PARALLEL)
!     value = XVECEXP (value, 0, 0);
!   value = XEXP (value, 0);
!   if (GET_CODE (op) == COND_EXEC)
!     op = XEXP (op, 1);
!   if (GET_CODE (op) == PARALLEL)
!     op = XVECEXP (op, 0, 0);
!   op = XEXP (op, 1);
!   
!   early_op = XEXP (op, 0);
!   /* This is either an actual independent shift, or a shift applied to
!      the first operand of another operation.  We want the whole shift
!      operation.  */
!   if (GET_CODE (early_op) == REG)
!     early_op = op;
  
!   return !reg_overlap_mentioned_p (value, early_op);
! }
! 
! /* Return non-zero if the CONSUMER instruction (an ALU op) does not
!    have an early register shift value dependency on the result of
!    PRODUCER.  */
! 
! int
! arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
! {
!   rtx value = PATTERN (producer);
!   rtx op = PATTERN (consumer);
!   rtx early_op;
! 
!   if (GET_CODE (value) == COND_EXEC)
!     value = XEXP (value, 1);
!   if (GET_CODE (value) == PARALLEL)
!     value = XVECEXP (value, 0, 0);
!   value = XEXP (value, 0);
!   if (GET_CODE (op) == COND_EXEC)
!     op = XEXP (op, 1);
!   if (GET_CODE (op) == PARALLEL)
!     op = XVECEXP (op, 0, 0);
!   op = XEXP (op, 1);
!   
!   early_op = XEXP (op, 0);
! 
!   /* This is either an actual independent shift, or a shift applied to
!      the first operand of another operation.  We want the value being
!      shifted, in either case.  */
!   if (GET_CODE (early_op) != REG)
!     early_op = XEXP (early_op, 0);
!   
!   return !reg_overlap_mentioned_p (value, early_op);
  }
  
  /* Return non-zero if the CONSUMER (a mul or mac op) does not
*************** arm_no_early_alu_shift_dep (rtx producer
*** 13252,13261 ****
  int
  arm_no_early_mul_dep (rtx producer, rtx consumer)
  {
!   rtx value = XEXP (PATTERN (producer), 0);
!   rtx early_ops = XEXP (PATTERN (consumer), 1);
  
!   return (GET_CODE (early_ops) == PLUS
! 	  && !reg_overlap_mentioned_p (value, XEXP (early_ops, 0)));
  }
  
--- 13315,13335 ----
  int
  arm_no_early_mul_dep (rtx producer, rtx consumer)
  {
!   rtx value = PATTERN (producer);
!   rtx op = PATTERN (consumer);
  
!   if (GET_CODE (value) == COND_EXEC)
!     value = XEXP (value, 1);
!   if (GET_CODE (value) == PARALLEL)
!     value = XVECEXP (value, 0, 0);
!   value = XEXP (value, 0);
!   if (GET_CODE (op) == COND_EXEC)
!     op = XEXP (op, 1);
!   if (GET_CODE (op) == PARALLEL)
!     op = XVECEXP (op, 0, 0);
!   op = XEXP (op, 1);
!   
!   return (GET_CODE (op) == PLUS
! 	  && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
  }
  
Index: config/arm/arm.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.md,v
retrieving revision 1.145.2.4
diff -c -3 -p -r1.145.2.4 arm.md
*** config/arm/arm.md	10 Dec 2003 12:16:04 -0000	1.145.2.4
--- config/arm/arm.md	16 Dec 2003 19:09:41 -0000
***************
*** 214,224 ****
  ; mav_dmult	Double multiplies (7 cycle)
  ;
  (define_attr "type"
! 	"normal,mult,block,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith,float_em,f_load,f_store,f_mem_r,r_mem_f,f_2_r,r_2_f,branch,call,load,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult" 
  	(if_then_else 
  	 (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
  	 (const_string "mult")
! 	 (const_string "normal")))
  
  ; Load scheduling, set from the arm_ld_sched variable
  ; initialized by arm_override_options() 
--- 214,224 ----
  ; mav_dmult	Double multiplies (7 cycle)
  ;
  (define_attr "type"
! 	"alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith,float_em,f_load,f_store,f_mem_r,r_mem_f,f_2_r,r_2_f,branch,call,load,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult" 
  	(if_then_else 
  	 (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
  	 (const_string "mult")
! 	 (const_string "alu")))
  
  ; Load scheduling, set from the arm_ld_sched variable
  ; initialized by arm_override_options() 
***************
*** 274,280 ****
  ; than one on the main cpu execution unit.
  (define_attr "core_cycles" "single,multi"
    (if_then_else (eq_attr "type"
! 		 "normal,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith")
  		(const_string "single")
  	        (const_string "multi")))
  
--- 274,280 ----
  ; than one on the main cpu execution unit.
  (define_attr "core_cycles" "single,multi"
    (if_then_else (eq_attr "type"
! 		 "alu,alu_shift,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith")
  		(const_string "single")
  	        (const_string "multi")))
  
***************
*** 727,733 ****
  		    (match_operand:SI 1 "s_register_operand" ""))))]
    "TARGET_ARM"
    "adc%?\\t%0, %1, %3%S2"
!   [(set_attr "conds" "use")]
  )
  
  (define_insn "*addsi3_carryin_alt1"
--- 727,736 ----
  		    (match_operand:SI 1 "s_register_operand" ""))))]
    "TARGET_ARM"
    "adc%?\\t%0, %1, %3%S2"
!   [(set_attr "conds" "use")
!    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*addsi3_carryin_alt1"
***************
*** 1925,1931 ****
    "bic%?\\t%0, %1, %2%S4"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "2")
!    ]
  )
  
  (define_insn "*andsi_notsi_si_compare0"
--- 1928,1936 ----
    "bic%?\\t%0, %1, %2%S4"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "2")
!    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*andsi_notsi_si_compare0"
***************
*** 2531,2537 ****
    "mov%?\\t%0, %1%S3"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "1")
!    ]
  )
  
  (define_insn "*shiftsi3_compare0"
--- 2536,2544 ----
    "mov%?\\t%0, %1%S3"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "1")
!    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*shiftsi3_compare0"
***************
*** 2546,2552 ****
    "mov%?s\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    ]
  )
  
  (define_insn "*shiftsi3_compare0_scratch"
--- 2553,2561 ----
    "mov%?s\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*shiftsi3_compare0_scratch"
***************
*** 2559,2566 ****
    "TARGET_ARM"
    "mov%?s\\t%0, %1%S3"
    [(set_attr "conds" "set")
!    (set_attr "shift" "1")
!    ]
  )
  
  (define_insn "*notsi_shiftsi"
--- 2568,2574 ----
    "TARGET_ARM"
    "mov%?s\\t%0, %1%S3"
    [(set_attr "conds" "set")
!    (set_attr "shift" "1")]
  )
  
  (define_insn "*notsi_shiftsi"
***************
*** 2572,2578 ****
    "mvn%?\\t%0, %1%S3"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "1")
!    ]
  )
  
  (define_insn "*notsi_shiftsi_compare0"
--- 2580,2588 ----
    "mvn%?\\t%0, %1%S3"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "1")
!    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*notsi_shiftsi_compare0"
***************
*** 2587,2593 ****
    "mvn%?s\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    ]
  )
  
  (define_insn "*not_shiftsi_compare0_scratch"
--- 2597,2605 ----
    "mvn%?s\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*not_shiftsi_compare0_scratch"
***************
*** 2601,2607 ****
    "mvn%?s\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!   ]
  )
  
  ;; We don't really have extzv, but defining this using shifts helps
--- 2613,2621 ----
    "mvn%?s\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  ;; We don't really have extzv, but defining this using shifts helps
***************
*** 6305,6311 ****
    "cmp%?\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    ]
  )
  
  (define_insn "*cmpsi_shiftsi_swp"
--- 6319,6327 ----
    "cmp%?\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*cmpsi_shiftsi_swp"
***************
*** 6318,6324 ****
    "cmp%?\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    ]
  )
  
  (define_insn "*cmpsi_neg_shiftsi"
--- 6334,6342 ----
    "cmp%?\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*cmpsi_neg_shiftsi"
***************
*** 6331,6337 ****
    "cmn%?\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    ]
  )
  
  ;; Cirrus SF compare instruction
--- 6349,6357 ----
    "cmn%?\\t%0, %1%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "1")
!    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  ;; Cirrus SF compare instruction
***************
*** 7490,7496 ****
    "%i1%?\\t%0, %2, %4%S3"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "4")
!    ]
  )
  
  (define_insn "*arith_shiftsi_compare0"
--- 7510,7518 ----
    "%i1%?\\t%0, %2, %4%S3"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "4")
!    (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*arith_shiftsi_compare0"
***************
*** 7508,7514 ****
    "%i1%?s\\t%0, %2, %4%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "4")
!    ]
  )
  
  (define_insn "*arith_shiftsi_compare0_scratch"
--- 7530,7538 ----
    "%i1%?s\\t%0, %2, %4%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "4")
!    (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*arith_shiftsi_compare0_scratch"
***************
*** 7524,7530 ****
    "%i1%?s\\t%0, %2, %4%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "4")
!    ]
  )
  
  (define_insn "*sub_shiftsi"
--- 7548,7556 ----
    "%i1%?s\\t%0, %2, %4%S3"
    [(set_attr "conds" "set")
     (set_attr "shift" "4")
!    (set (attr "type") (if_then_else (match_operand 5 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*sub_shiftsi"
***************
*** 7537,7543 ****
    "sub%?\\t%0, %1, %3%S2"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "3")
!    ]
  )
  
  (define_insn "*sub_shiftsi_compare0"
--- 7563,7571 ----
    "sub%?\\t%0, %1, %3%S2"
    [(set_attr "predicable" "yes")
     (set_attr "shift" "3")
!    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*sub_shiftsi_compare0"
***************
*** 7554,7561 ****
    "TARGET_ARM"
    "sub%?s\\t%0, %1, %3%S2"
    [(set_attr "conds" "set")
!    (set_attr "shift" "3") 
!    ]
  )
  
  (define_insn "*sub_shiftsi_compare0_scratch"
--- 7582,7591 ----
    "TARGET_ARM"
    "sub%?s\\t%0, %1, %3%S2"
    [(set_attr "conds" "set")
!    (set_attr "shift" "3")
!    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*sub_shiftsi_compare0_scratch"
***************
*** 7570,7577 ****
    "TARGET_ARM"
    "sub%?s\\t%0, %1, %3%S2"
    [(set_attr "conds" "set")
!    (set_attr "shift" "3") 
!    ]
  )
  
  
--- 7600,7609 ----
    "TARGET_ARM"
    "sub%?s\\t%0, %1, %3%S2"
    [(set_attr "conds" "set")
!    (set_attr "shift" "3")
!    (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  
***************
*** 8406,8412 ****
     mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
    [(set_attr "conds" "use")
     (set_attr "shift" "2")
!    (set_attr "length" "4,8,8")]
  )
  
  (define_insn "*ifcompare_move_shift"
--- 8438,8447 ----
     mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
    [(set_attr "conds" "use")
     (set_attr "shift" "2")
!    (set_attr "length" "4,8,8")
!    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*ifcompare_move_shift"
***************
*** 8442,8448 ****
     mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
    [(set_attr "conds" "use")
     (set_attr "shift" "2")
!    (set_attr "length" "4,8,8")]
  )
  
  (define_insn "*ifcompare_shift_shift"
--- 8477,8486 ----
     mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
    [(set_attr "conds" "use")
     (set_attr "shift" "2")
!    (set_attr "length" "4,8,8")
!    (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "")
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*ifcompare_shift_shift"
***************
*** 8479,8485 ****
    "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
    [(set_attr "conds" "use")
     (set_attr "shift" "1")
!    (set_attr "length" "8")]
  )
  
  (define_insn "*ifcompare_not_arith"
--- 8517,8528 ----
    "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
    [(set_attr "conds" "use")
     (set_attr "shift" "1")
!    (set_attr "length" "8")
!    (set (attr "type") (if_then_else
! 		        (and (match_operand 2 "const_int_operand" "")
!                              (match_operand 4 "const_int_operand" ""))
! 		      (const_string "alu_shift")
! 		      (const_string "alu_shift_reg")))]
  )
  
  (define_insn "*ifcompare_not_arith"
Index: config/arm/arm1026ejs.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/Attic/arm1026ejs.md,v
retrieving revision 1.1.2.3
diff -c -3 -p -r1.1.2.3 arm1026ejs.md
*** config/arm/arm1026ejs.md	15 Dec 2003 14:36:29 -0000	1.1.2.3
--- config/arm/arm1026ejs.md	16 Dec 2003 19:09:41 -0000
***************
*** 64,73 ****
  ;; If the destination register is the PC, the pipelines are stalled
  ;; for several cycles.  That case is not modeled here.
  
  (define_insn_reservation "alu_op" 1 
   (and (eq_attr "tune" "arm1026ejs")
!       (eq_attr "type" "normal"))
   "a_e,a_m,a_w")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; Multiplication Instructions
--- 64,89 ----
  ;; If the destination register is the PC, the pipelines are stalled
  ;; for several cycles.  That case is not modeled here.
  
+ ;; ALU operations with no shifted operand
  (define_insn_reservation "alu_op" 1 
   (and (eq_attr "tune" "arm1026ejs")
!       (eq_attr "type" "alu"))
   "a_e,a_m,a_w")
+ 
+ ;; ALU operations with a shift-by-constant operand
+ (define_insn_reservation "alu_shift_op" 1 
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "type" "alu_shift"))
+  "a_e,a_m,a_w")
+ 
+ ;; ALU operations with a shift-by-register operand
+ ;; These really stall in the decoder, in order to read
+ ;; the shift value in a second cycle. Pretend we take two cycles in
+ ;; the execute stage.
+ (define_insn_reservation "alu_shift_reg_op" 2 
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "type" "alu_shift_reg"))
+  "a_e*2,a_m,a_w")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; Multiplication Instructions
Index: config/arm/arm1136jfs.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/Attic/arm1136jfs.md,v
retrieving revision 1.1.2.2
diff -c -3 -p -r1.1.2.2 arm1136jfs.md
*** config/arm/arm1136jfs.md	15 Dec 2003 14:36:29 -0000	1.1.2.2
--- config/arm/arm1136jfs.md	16 Dec 2003 19:09:42 -0000
***************
*** 73,86 ****
  ;; If the destination register is the PC, the pipelines are stalled
  ;; for several cycles.  That case is not modelled here.
  
  (define_insn_reservation "11_alu_op" 2
   (and (eq_attr "tune" "arm1136js,arm1136jfs")
!       (eq_attr "type" "normal"))
   "e_1,e_2,e_3,e_wb")
  
  ;; alu_ops can start sooner, if there is no shifter dependency
! (define_bypass 1 "11_alu_op" "11_alu_op" "arm_no_early_alu_shift_dep")
! (define_bypass 1 "11_alu_op"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
  
--- 73,121 ----
  ;; If the destination register is the PC, the pipelines are stalled
  ;; for several cycles.  That case is not modelled here.
  
+ ;; ALU operations with no shifted operand
  (define_insn_reservation "11_alu_op" 2
   (and (eq_attr "tune" "arm1136js,arm1136jfs")
!       (eq_attr "type" "alu"))
   "e_1,e_2,e_3,e_wb")
  
+ ;; ALU operations with a shift-by-constant operand
+ (define_insn_reservation "11_alu_shift_op" 2
+  (and (eq_attr "tune" "arm1136js,arm1136jfs")
+       (eq_attr "type" "alu_shift"))
+  "e_1,e_2,e_3,e_wb")
+ 
+ ;; ALU operations with a shift-by-register operand
+ ;; These really stall in the decoder, in order to read
+ ;; the shift value in a second cycle. Pretend we take two cycles in
+ ;; the shift stage.
+ (define_insn_reservation "11_alu_shift_reg_op" 3
+  (and (eq_attr "tune" "arm1136js,arm1136jfs")
+       (eq_attr "type" "alu_shift_reg"))
+  "e_1*2,e_2,e_3,e_wb")
+ 
  ;; alu_ops can start sooner, if there is no shifter dependency
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! 	       "11_alu_op")
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! 	       "11_alu_shift_op"
! 	       "arm_no_early_alu_shift_value_dep")
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! 	       "11_alu_shift_reg_op"
! 	       "arm_no_early_alu_shift_dep")
! (define_bypass 2 "11_alu_shift_reg_op"
! 	       "11_alu_op")
! (define_bypass 2 "11_alu_shift_reg_op"
! 	       "11_alu_shift_op"
! 	       "arm_no_early_alu_shift_value_dep")
! (define_bypass 2 "11_alu_shift_reg_op"
! 	       "11_alu_shift_reg_op"
! 	       "arm_no_early_alu_shift_dep")
! 
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! 	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
! 	       "arm_no_early_mul_dep")
! (define_bypass 2 "11_alu_shift_reg_op"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
  
***************
*** 107,115 ****
  (define_bypass 3 "11_mult1,11_mult2"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
! (define_bypass 3 "11_mult1,11_mult2" "11_alu_op"
  	       "arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_mult1,11_mult2" "11_store1,11_store2,11_store34"
  	       "arm_no_early_store_addr_dep")
  
  ;; Signed and unsigned multiply long results are available across two cycles;
--- 142,157 ----
  (define_bypass 3 "11_mult1,11_mult2"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
! (define_bypass 3 "11_mult1,11_mult2"
! 	       "11_alu_op")
! (define_bypass 3 "11_mult1,11_mult2"
! 	       "11_alu_shift_op"
! 	       "arm_no_early_alu_shift_value_dep")
! (define_bypass 3 "11_mult1,11_mult2"
! 	       "11_alu_shift_reg_op"
  	       "arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_mult1,11_mult2"
! 	       "11_store1"
  	       "arm_no_early_store_addr_dep")
  
  ;; Signed and unsigned multiply long results are available across two cycles;
***************
*** 131,139 ****
  (define_bypass 4 "11_mult3,11_mult4"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
! (define_bypass 4 "11_mult3,11_mult4" "11_alu_op"
  	       "arm_no_early_alu_shift_dep")
! (define_bypass 4 "11_mult3,11_mult4" "11_store1,11_store2,11_store34"
  	       "arm_no_early_store_addr_dep")
  
  ;; Various 16x16->32 multiplies and multiply-accumulates, using combinations
--- 173,188 ----
  (define_bypass 4 "11_mult3,11_mult4"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
! (define_bypass 4 "11_mult3,11_mult4"
! 	       "11_alu_op")
! (define_bypass 4 "11_mult3,11_mult4"
! 	       "11_alu_shift_op"
! 	       "arm_no_early_alu_shift_value_dep")
! (define_bypass 4 "11_mult3,11_mult4"
! 	       "11_alu_shift_reg_op"
  	       "arm_no_early_alu_shift_dep")
! (define_bypass 4 "11_mult3,11_mult4"
! 	       "11_store1"
  	       "arm_no_early_store_addr_dep")
  
  ;; Various 16x16->32 multiplies and multiply-accumulates, using combinations
***************
*** 148,159 ****
  (define_bypass 2 "11_mult5"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
! (define_bypass 2 "11_mult5" "11_alu_op"
  	       "arm_no_early_alu_shift_dep")
! (define_bypass 2 "11_mult5" "11_store1,11_store2,11_store34"
  	       "arm_no_early_store_addr_dep")
  
- 
  ;; The same idea, then the 32-bit result is added to a 64-bit quantity.
  (define_insn_reservation "11_mult6" 4
   (and (eq_attr "tune" "arm1136js,arm1136jfs")
--- 197,214 ----
  (define_bypass 2 "11_mult5"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
! (define_bypass 2 "11_mult5"
! 	       "11_alu_op")
! (define_bypass 2 "11_mult5"
! 	       "11_alu_shift_op"
! 	       "arm_no_early_alu_shift_value_dep")
! (define_bypass 2 "11_mult5"
! 	       "11_alu_shift_reg_op"
  	       "arm_no_early_alu_shift_dep")
! (define_bypass 2 "11_mult5"
! 	       "11_store1"
  	       "arm_no_early_store_addr_dep")
  
  ;; The same idea, then the 32-bit result is added to a 64-bit quantity.
  (define_insn_reservation "11_mult6" 4
   (and (eq_attr "tune" "arm1136js,arm1136jfs")
***************
*** 170,178 ****
  (define_bypass 3 "11_mult6,11_mult7"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
! (define_bypass 3 "11_mult6,11_mult7" "11_alu_op"
  	       "arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_mult6,11_mult7" "11_store1,11_store2,11_store34"
  	       "arm_no_early_store_addr_dep")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
--- 225,240 ----
  (define_bypass 3 "11_mult6,11_mult7"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
! (define_bypass 3 "11_mult6,11_mult7"
! 	       "11_alu_op")
! (define_bypass 3 "11_mult6,11_mult7"
! 	       "11_alu_shift_op"
! 	       "arm_no_early_alu_shift_value_dep")
! (define_bypass 3 "11_mult6,11_mult7"
! 	       "11_alu_shift_reg_op"
  	       "arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_mult6,11_mult7"
! 	       "11_store1"
  	       "arm_no_early_store_addr_dep")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
***************
*** 201,209 ****
  ;; Branches are predicted. A correctly predicted branch will be no
  ;; cost, but we're conservative here, and use the timings a
  ;; late-register would give us.
! (define_bypass 1 "11_alu_op" "11_branches")
! (define_bypass 2 "11_load1,11_load2" "11_branches")
! (define_bypass 3 "11_load34" "11_branches")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; Load/Store Instructions
--- 263,276 ----
  ;; Branches are predicted. A correctly predicted branch will be no
  ;; cost, but we're conservative here, and use the timings a
  ;; late-register would give us.
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! 	       "11_branches")
! (define_bypass 2 "11_alu_shift_reg_op"
! 	       "11_branches")
! (define_bypass 2 "11_load1,11_load2"
! 	       "11_branches")
! (define_bypass 3 "11_load34"
! 	       "11_branches")
  
  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  ;; Load/Store Instructions
***************
*** 253,281 ****
  
  ;; A store can start immediately after an alu op, if that alu op does
  ;; not provide part of the address to access.
! (define_bypass 1 "11_alu_op" "11_store1,11_store2,11_store34"
  	       "arm_no_early_store_addr_dep")
  
  ;; An alu op can start sooner after a load, if that alu op does not
  ;; have an early register dependancy on the load
! (define_bypass 2 "11_load1,11_load2" "11_alu_op"
! 	       "arm_no_early_alu_shift_dep")
! (define_bypass 3 "11_load34" "11_alu_op"
  	       "arm_no_early_alu_shift_dep")
  
  ;; A mul op can start sooner after a load, if that mul op does not
  ;; have an early multipl dependency
! (define_bypass 2 "11_load1,11_load2"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
  (define_bypass 3 "11_load34"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
  
- 
  ;; A store can start sooner after a load, if that load does not
  ;; produce part of the address to access
! (define_bypass 2 "11_load1,11_load2" "11_store1,11_store2,11_store34"
! 	       "arm_no_early_store_addr_dep")
! (define_bypass 3 "11_load34" "11_store1,11_store2,11_store34"
  	       "arm_no_early_store_addr_dep")
--- 320,354 ----
  
  ;; A store can start immediately after an alu op, if that alu op does
  ;; not provide part of the address to access.
! (define_bypass 1 "11_alu_op,11_alu_shift_op"
! 	       "11_store1"
! 	       "arm_no_early_store_addr_dep")
! (define_bypass 2 "11_alu_shift_reg_op"
! 	       "11_store1"
  	       "arm_no_early_store_addr_dep")
  
  ;; An alu op can start sooner after a load, if that alu op does not
  ;; have an early register dependancy on the load
! (define_bypass 2 "11_load1"
! 	       "11_alu_op")
! (define_bypass 2 "11_load1"
! 	       "11_alu_shift_op"
! 	       "arm_no_early_alu_shift_value_dep")
! (define_bypass 2 "11_load1"
! 	       "11_alu_shift_reg_op"
  	       "arm_no_early_alu_shift_dep")
  
  ;; A mul op can start sooner after a load, if that mul op does not
  ;; have an early multipl dependency
! (define_bypass 2 "11_load1"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
  (define_bypass 3 "11_load34"
  	       "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7"
  	       "arm_no_early_mul_dep")
  
  ;; A store can start sooner after a load, if that load does not
  ;; produce part of the address to access
! (define_bypass 2 "11_load1"
! 	       "11_store1"
  	       "arm_no_early_store_addr_dep")

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]