x86_64 merger part 28 - peepholes

Jan Hubicka jh@suse.cz
Sun Mar 25 05:11:00 GMT 2001


Hi,
Just few changes on i386.md side remains...
This patch updates the peepholes.

Honza

Ne bře 25 15:14:20 CEST 2001  Jan Hubicka  <jh@suse.cz>

	* i386.md (push mem DI peep2): New.
	(mov 0, mov -1 peep2): Handle 64bit.
	(lea to arithmetics peep2): Handle 64bit leas.
	(rsp arithmetics to push/pop peep2s): New.

*** i386.md	Thu Mar 22 21:45:30 2001
--- /p1/new/x86-64/gcc/gcc/config/i386/i386.md	Sun Mar 25 01:07:27 2001
***************
*** 13796,13801 ****
--- 16175,16189 ----
     (set (match_dup 0) (match_dup 2))]
    "")
  
+ (define_peephole2
+   [(set (match_operand:DI 0 "push_operand" "")
+ 	(match_operand:DI 1 "memory_operand" ""))
+    (match_scratch:DI 2 "r")]
+   "! optimize_size && ! TARGET_PUSH_MEMORY"
+   [(set (match_dup 2) (match_dup 1))
+    (set (match_dup 0) (match_dup 2))]
+   "")
+ 
  ;; We need to handle SFmode only, because DFmode and XFmode is split to
  ;; SImode pushes.
  (define_peephole2
***************
*** 14115,14126 ****
  	(const_int 0))]
    "(GET_MODE (operands[0]) == QImode
      || GET_MODE (operands[0]) == HImode
!     || GET_MODE (operands[0]) == SImode)
     && (! TARGET_USE_MOV0 || optimize_size)
     && peep2_regno_dead_p (0, FLAGS_REG)"
    [(parallel [(set (match_dup 0) (const_int 0))
  	      (clobber (reg:CC 17))])]
!   "operands[0] = gen_rtx_REG (SImode, true_regnum (operands[0]));")
  
  (define_peephole2
    [(set (strict_low_part (match_operand 0 "register_operand" ""))
--- 16503,16516 ----
  	(const_int 0))]
    "(GET_MODE (operands[0]) == QImode
      || GET_MODE (operands[0]) == HImode
!     || GET_MODE (operands[0]) == SImode
!     || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
     && (! TARGET_USE_MOV0 || optimize_size)
     && peep2_regno_dead_p (0, FLAGS_REG)"
    [(parallel [(set (match_dup 0) (const_int 0))
  	      (clobber (reg:CC 17))])]
!   "operands[0] = gen_rtx_REG (GET_MODE (operands[0]) == DImode ? DImode : SImode,
! 			      true_regnum (operands[0]));")
  
  (define_peephole2
    [(set (strict_low_part (match_operand 0 "register_operand" ""))
***************
*** 14137,14148 ****
    [(set (match_operand 0 "register_operand" "")
  	(const_int -1))]
    "(GET_MODE (operands[0]) == HImode
!     || GET_MODE (operands[0]) == SImode)
     && (optimize_size || TARGET_PENTIUM)
     && peep2_regno_dead_p (0, FLAGS_REG)"
    [(parallel [(set (match_dup 0) (const_int -1))
  	      (clobber (reg:CC 17))])]
!   "operands[0] = gen_rtx_REG (SImode, true_regnum (operands[0]));")
  
  ;; Attempt to convert simple leas to adds. These can be created by
  ;; move expanders.
--- 16527,16540 ----
    [(set (match_operand 0 "register_operand" "")
  	(const_int -1))]
    "(GET_MODE (operands[0]) == HImode
!     || GET_MODE (operands[0]) == SImode 
!     || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
     && (optimize_size || TARGET_PENTIUM)
     && peep2_regno_dead_p (0, FLAGS_REG)"
    [(parallel [(set (match_dup 0) (const_int -1))
  	      (clobber (reg:CC 17))])]
!   "operands[0] = gen_rtx_REG (GET_MODE (operands[0]) == DImode ? DImode : SImode,
! 			      true_regnum (operands[0]));")
  
  ;; Attempt to convert simple leas to adds. These can be created by
  ;; move expanders.
***************
*** 14157,14170 ****
  
  (define_peephole2
    [(set (match_operand:SI 0 "register_operand" "")
    	(mult:SI (match_dup 0)
! 		 (match_operand:SI 1 "immediate_operand" "")))]
    "exact_log2 (INTVAL (operands[1])) >= 0
     && peep2_regno_dead_p (0, FLAGS_REG)"
    [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
  	      (clobber (reg:CC 17))])]
    "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
  
  ;; The ESP adjustments can be done by the push and pop instructions.  Resulting
  ;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes.  On
  ;; many CPUs it is also faster, since special hardware to avoid esp
--- 16549,16601 ----
  
  (define_peephole2
    [(set (match_operand:SI 0 "register_operand" "")
+   	(subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "")
+ 			    (match_operand:DI 2 "nonmemory_operand" "")) 0))]
+   "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])"
+   [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+ 	      (clobber (reg:CC 17))])]
+   "operands[2] = gen_lowpart (SImode, operands[2]);")
+ 
+ (define_peephole2
+   [(set (match_operand:DI 0 "register_operand" "")
+   	(plus:DI (match_dup 0)
+ 		 (match_operand:DI 1 "x86_64_general_operand" "")))]
+   "peep2_regno_dead_p (0, FLAGS_REG)"
+   [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
+ 	      (clobber (reg:CC 17))])]
+   "")
+ 
+ (define_peephole2
+   [(set (match_operand:SI 0 "register_operand" "")
    	(mult:SI (match_dup 0)
! 		 (match_operand:SI 1 "const_int_operand" "")))]
    "exact_log2 (INTVAL (operands[1])) >= 0
     && peep2_regno_dead_p (0, FLAGS_REG)"
    [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
  	      (clobber (reg:CC 17))])]
    "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
  
+ (define_peephole2
+   [(set (match_operand:DI 0 "register_operand" "")
+   	(mult:DI (match_dup 0)
+ 		 (match_operand:DI 1 "const_int_operand" "")))]
+   "exact_log2 (INTVAL (operands[1])) >= 0
+    && peep2_regno_dead_p (0, FLAGS_REG)"
+   [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
+ 	      (clobber (reg:CC 17))])]
+   "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+ 
+ (define_peephole2
+   [(set (match_operand:SI 0 "register_operand" "")
+   	(subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "")
+ 		   (match_operand:DI 2 "const_int_operand" "")) 0))]
+   "exact_log2 (INTVAL (operands[1])) >= 0
+    && REGNO (operands[0]) == REGNO (operands[1])
+    && peep2_regno_dead_p (0, FLAGS_REG)"
+   [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
+ 	      (clobber (reg:CC 17))])]
+   "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+ 
  ;; The ESP adjustments can be done by the push and pop instructions.  Resulting
  ;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes.  On
  ;; many CPUs it is also faster, since special hardware to avoid esp
***************
*** 14362,14367 ****
--- 16793,16913 ----
  	      (clobber (match_dup 0))])]
    "")
  
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8)))
+ 	      (set (reg:DI 6) (reg:DI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_SUB_ESP_4"
+   [(clobber (match_dup 0))
+    (parallel [(set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))
+ 	      (set (reg:DI 6) (reg:DI 6))])])
+ 
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16)))
+ 	      (set (reg:DI 6) (reg:DI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_SUB_ESP_8"
+   [(clobber (match_dup 0))
+    (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))
+    (parallel [(set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))
+ 	      (set (reg:DI 6) (reg:DI 6))])])
+ 
+ ;; Convert esp substractions to push.
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8)))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_SUB_ESP_4"
+   [(clobber (match_dup 0))
+    (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))])
+ 
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16)))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_SUB_ESP_8"
+   [(clobber (match_dup 0))
+    (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))
+    (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))])
+ 
+ ;; Convert epilogue deallocator to pop.
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))
+ 	      (set (reg:DI 6) (reg:DI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_ADD_ESP_4"
+   [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))
+ 	      (set (reg:DI 6) (reg:DI 6))])]
+   "")
+ 
+ ;; Two pops case is tricky, since pop causes dependency on destination register.
+ ;; We use two registers if available.
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (match_scratch:DI 1 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16)))
+ 	      (set (reg:DI 6) (reg:DI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size || !TARGET_ADD_ESP_8"
+   [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))
+ 	      (set (reg:DI 6) (reg:DI 6))])
+    (parallel [(set (match_dup 1) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])]
+   "")
+ 
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16)))
+ 	      (set (reg:DI 6) (reg:DI 6))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size"
+   [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))
+ 	      (set (reg:DI 6) (reg:DI 6))])
+    (parallel [(set (match_dup 0) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])]
+   "")
+ 
+ ;; Convert esp additions to pop.
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))
+ 	      (clobber (reg:CC 17))])]
+   ""
+   [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])]
+   "")
+ 
+ ;; Two pops case is tricky, since pop causes dependency on destination register.
+ ;; We use two registers if available.
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (match_scratch:DI 1 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16)))
+ 	      (clobber (reg:CC 17))])]
+   ""
+   [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])
+    (parallel [(set (match_dup 1) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])]
+   "")
+ 
+ (define_peephole2
+   [(match_scratch:DI 0 "r")
+    (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16)))
+ 	      (clobber (reg:CC 17))])]
+   "optimize_size"
+   [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])
+    (parallel [(set (match_dup 0) (mem:DI (reg:DI 7)))
+ 	      (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])]
+   "")
+ 
  ;; Call-value patterns last so that the wildcard operand does not
  ;; disrupt insn-recog's switch tables.
  



More information about the Gcc-patches mailing list