This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

I386 HI to SI mode promoting revmap


Hi
The HI to SI mode promoting code have some glitches. It don't handle some
instructions (such as shifts, muls, negs, test etc.) and have some
desynchronization in it.

This patch is attempt to make it more maitainable. I've decided to put all
splitters to single place near peephole2 optimization. That place contains
many patterns that needs to be verified once some new pattern is added to
i386.md so it is probably easier to keep them all up to date.
I've also removed redundant splitters for each type of instruction and
added "promotable_binary_operator" predicate to handle this.

I've also implemented promoting QI to SI mode. This is small (but cheap)
size optimization (because SImode isntructions have short forms sometimes)
I am getting 0.5% improvements on cc1. Also SImodes are faster on K6 CPU.

It also make possible to implement peep2's that splits read-modify
operations to HImode load and promoted SImode operation.

All this changes together adds about 5% speedup to integer index of byte
benchmark, that make heavy use of HImodes.

Honza

Čt prosinec 16 07:52:30 CET 1999  Jan Hubicka  <hubicka@freesoft.cz>

	* i386.md (HI to SImode promoting splitters): Rewrite.
	(pushsf mem peep2): New.
	(testhi to andhi peep2): Remove.
	* i386.h (x86_promote_QImode): New.
	(TARGET_PROMOTE_QImode): New.
	(PREDICATE_CODES): Add promotable_binary_operator.
	* i386.c (x86_promote_QImode0: New.
	(promotable_binary_operator): New.
	* i386-protos.h (promotable_binary_operator): New.

*** i386.md.nos	Wed Dec 15 19:56:53 1999
--- i386.md	Wed Dec 15 23:10:44 1999
***************
*** 3275,3293 ****
  	(const_string "incdec")
  	(const_string "alu")))])
  
- ;; If we know we're not touching memory, promote HImode references to SImode.
- (define_split
-   [(set (match_operand:HI 0 "register_operand" "")
- 	(plus:HI (match_operand:HI 1 "register_operand" "")
- 		 (match_operand:HI 2 "nonmemory_operand" "")))
-    (clobber (reg:CC 17))]
-   "! TARGET_PARTIAL_REG_STALL && reload_completed"
-   [(parallel [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))
- 	      (clobber (reg:CC 17))])]
-   "operands[0] = gen_lowpart (SImode, operands[0]);
-    operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[2] = gen_lowpart (SImode, operands[2]);")
- 
  (define_insn "*addhi_2"
    [(set (reg:CCNO 17)
  	(compare:CCNO
--- 3275,3280 ----
***************
*** 4387,4404 ****
  }"
    [(set_attr "type" "alu,alu,imovx")])
  
- (define_split
-   [(set (match_operand:HI 0 "register_operand" "")
- 	(and:HI (match_operand:HI 1 "register_operand" "")
- 		(match_operand:HI 2 "nonmemory_operand" "")))
-    (clobber (reg:CC 17))]
-   "! TARGET_PARTIAL_REG_STALL && reload_completed"
-   [(parallel [(set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))
- 	      (clobber (reg:CC 17))])]
-   "operands[0] = gen_lowpart (SImode, operands[0]);
-    operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[2] = gen_lowpart (SImode, operands[2]);")
- 
  (define_insn "*andhi_2"
    [(set (reg:CCNO 17)
  	(compare:CCNO (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
--- 4374,4379 ----
***************
*** 4410,4432 ****
    "and{w}\\t{%2, %0|%0, %2}"
    [(set_attr "type" "alu")])
  
- (define_split
-   [(set (reg:CCNO 17)
- 	(compare:CCNO (and:HI (match_operand:HI 1 "register_operand" "")
- 			      (match_operand:HI 2 "immediate_operand" ""))
- 		      (const_int 0)))
-    (set (match_operand:HI 0 "register_operand" "")
- 	(and:HI (match_dup 1) (match_dup 2)))]
-   "! TARGET_PARTIAL_REG_STALL && reload_completed"
-   [(parallel [(set (reg:CCNO 17)
- 		   (compare:CCNO (and:SI (match_dup 1) (match_dup 2))
- 			         (const_int 0)))
- 	      (set (match_dup 0)
- 		   (and:SI (match_dup 1) (match_dup 2)))])]
-   "operands[0] = gen_lowpart (SImode, operands[0]);
-    operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[2] = gen_lowpart (SImode, operands[2]);")
- 
  (define_expand "andqi3"
    [(set (match_operand:QI 0 "nonimmediate_operand" "")
  	(and:QI (match_operand:QI 1 "nonimmediate_operand" "")
--- 4385,4390 ----
***************
*** 4592,4609 ****
    "or{w}\\t{%2, %0|%0, %2}"
    [(set_attr "type" "alu")])
  
- (define_split
-   [(set (match_operand:HI 0 "register_operand" "")
- 	(ior:HI (match_operand:HI 1 "register_operand" "")
- 		(match_operand:HI 2 "nonmemory_operand" "")))
-    (clobber (reg:CC 17))]
-   "! TARGET_PARTIAL_REG_STALL && reload_completed"
-   [(parallel [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2)))
- 	      (clobber (reg:CC 17))])]
-   "operands[0] = gen_lowpart (SImode, operands[0]);
-    operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[2] = gen_lowpart (SImode, operands[2]);")
- 
  (define_insn "*iorhi_2"
    [(set (reg:CCNO 17)
  	(compare:CCNO (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
--- 4550,4555 ----
***************
*** 4697,4714 ****
    "xor{w}\\t{%2, %0|%0, %2}"
    [(set_attr "type" "alu")])
  
- (define_split
-   [(set (match_operand:HI 0 "register_operand" "")
- 	(xor:HI (match_operand:HI 1 "register_operand" "")
- 		(match_operand:HI 2 "nonmemory_operand" "")))
-    (clobber (reg:CC 17))]
-   "! TARGET_PARTIAL_REG_STALL && reload_completed"
-   [(parallel [(set (match_dup 0) (xor:SI (match_dup 1) (match_dup 2)))
- 	      (clobber (reg:CC 17))])]
-   "operands[0] = gen_lowpart (SImode, operands[0]);
-    operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[2] = gen_lowpart (SImode, operands[2]);")
- 
  (define_insn "*xorhi_2"
    [(set (reg:CCNO 17)
  	(compare:CCNO (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
--- 4643,4648 ----
***************
*** 5298,5333 ****
    "")
  
  (define_expand "one_cmplhi2"
    [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
  		   (not:HI (match_operand:HI 1 "general_operand" "")))
  	      (clobber (reg:CC 17))])]
    ""
    "ix86_expand_unary_operator (NOT, HImode, operands); DONE;")
  
  (define_insn "*one_cmplhi2_1"
    [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
  	(not:HI (match_operand:HI 1 "nonimmediate_operand" "0")))]
    "ix86_unary_operator_ok (NEG, HImode, operands)"
    "not{w}\\t%0"
    [(set_attr "type" "negnot")])
  
- (define_split
-   [(set (match_operand:HI 0 "register_operand" "")
- 	(not:HI (match_operand:HI 1 "register_operand" "")))
-    (clobber (reg:CC 17))]
-   "! TARGET_PARTIAL_REG_STALL && reload_completed"
-   [(parallel [(set (match_dup 0) (not:SI (match_dup 1)))
- 	      (clobber (reg:CC 17))])]
-   "operands[0] = gen_lowpart (SImode, operands[0]);
-    operands[1] = gen_lowpart (SImode, operands[1]);")
- 
  (define_insn "*one_cmplhi2_2"
    [(set (reg:CCNO 17)
  	(compare:CCNO (not:HI (match_operand:HI 1 "nonimmediate_operand" "0"))
  		      (const_int 0)))
     (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
  	(not:HI (match_dup 1)))]
    "ix86_unary_operator_ok (NEG, HImode, operands)"
    "#"
    [(set_attr "type" "alu1")])
  
--- 5231,5255 ----
***************
*** 5661,5677 ****
  	   ]
  	   (const_string "ishift")))])
  
- (define_split
-   [(set (match_operand:HI 0 "register_operand" "")
- 	(ashift:HI (match_operand:HI 1 "register_operand" "")
- 		   (match_operand:QI 2 "nonmemory_operand" "")))
-    (clobber (reg:CC 17))]
-   "! TARGET_PARTIAL_REG_STALL && reload_completed"
-   [(parallel [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
- 	      (clobber (reg:CC 17))])]
-   "operands[0] = gen_lowpart (SImode, operands[0]);
-    operands[1] = gen_lowpart (SImode, operands[1]);")
- 
  (define_insn "*ashlhi3_cmpno"
    [(set (reg:CCNO 17)
  	(compare:CCNO
--- 5582,5587 ----
***************
*** 8211,8229 ****
     cmov%c1\\t{%3, %0|%0, %3}"
    [(set_attr "type" "icmov")])
  
- (define_split 
-   [(set (match_operand:HI 0 "register_operand" "")
- 	(if_then_else:HI (match_operator 1 "comparison_operator" 
- 				[(reg 17) (const_int 0)])
- 		      (match_operand:HI 2 "register_operand" "")
- 		      (match_operand:HI 3 "register_operand" "")))]
-   "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE"
-   [(set (match_dup 0)
- 	(if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
-   "operands[0] = gen_lowpart (SImode, operands[0]);
-    operands[2] = gen_lowpart (SImode, operands[2]);
-    operands[3] = gen_lowpart (SImode, operands[3]);")
- 			
  (define_expand "movsfcc"
    [(set (match_operand:SF 0 "register_operand" "")
  	(if_then_else:SF (match_operand 1 "comparison_operator" "")
--- 8121,8126 ----
***************
*** 8370,8375 ****
--- 8267,8373 ----
    DONE;
  }")
  
+ ;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
+ 
+ (define_split
+   [(set (match_operand 0 "register_operand" "")
+ 	(match_operator 3 "promotable_binary_operator"
+ 	   [(match_operand 1 "register_operand" "")
+ 	    (match_operand 2 "nonmemory_operand" "")]))
+    (clobber (reg:CC 17))]
+   "! TARGET_PARTIAL_REG_STALL && reload_completed
+    && ((GET_MODE (operands[0]) == HImode 
+ 	&& (!optimize_size || GET_CODE (operands[2]) != CONST_INT
+ 	    || CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')))
+        || (GET_MODE (operands[0]) == QImode 
+ 	   && (TARGET_PROMOTE_QImode || optimize_size)))"
+   [(parallel [(set (match_dup 0)
+ 		   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+ 	      (clobber (reg:CC 17))])]
+   "operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    if (GET_CODE (operands[3]) != ASHIFT)
+      operands[2] = gen_lowpart (SImode, operands[2]);
+    GET_MODE (operands[3]) = SImode;")
+ 
+ (define_split
+   [(set (reg:CCNO 17)
+ 	(compare:CCNO (and (match_operand 1 "register_operand" "")
+ 			   (match_operand 2 "immediate_operand" ""))
+ 		      (const_int 0)))
+    (set (match_operand 0 "register_operand" "")
+ 	(and (match_dup 1) (match_dup 2)))]
+   "! TARGET_PARTIAL_REG_STALL && reload_completed
+    && (GET_MODE (operands[0]) == HImode
+        || (GET_MODE (operands[0]) == QImode 
+ 	   && (TARGET_PROMOTE_QImode || optimize_size)))"
+   [(parallel [(set (reg:CCNO 17)
+ 		   (compare:CCNO (and:SI (match_dup 1) (match_dup 2))
+ 			         (const_int 0)))
+ 	      (set (match_dup 0)
+ 		   (and:SI (match_dup 1) (match_dup 2)))])]
+   "operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_lowpart (SImode, operands[2]);")
+ 
+ (define_split
+   [(set (reg:CCNO 17)
+ 	(compare:CCNO (and (match_operand 0 "register_operand" "")
+ 			   (match_operand 1 "immediate_operand" ""))
+ 		      (const_int 0)))]
+   "! TARGET_PARTIAL_REG_STALL && reload_completed
+    && (GET_MODE (operands[0]) == HImode
+        || (GET_MODE (operands[0]) == QImode 
+ 	   && (TARGET_PROMOTE_QImode || optimize_size)))"
+   [(set (reg:CCNO 17)
+ 	(compare:CCNO (and:SI (match_dup 0) (match_dup 1))
+ 		      (const_int 0)))]
+   "operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);")
+ 
+ (define_split
+   [(set (match_operand 0 "register_operand" "")
+ 	(neg (match_operand 1 "register_operand" "")))
+    (clobber (reg:CC 17))]
+   "! TARGET_PARTIAL_REG_STALL && reload_completed
+    && (GET_MODE (operands[0]) == HImode
+        || (GET_MODE (operands[0]) == QImode 
+ 	   && (TARGET_PROMOTE_QImode || optimize_size)))"
+   [(parallel [(set (match_dup 0)
+ 		   (neg:SI (match_dup 1)))
+ 	      (clobber (reg:CC 17))])]
+   "operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);")
+ 
+ (define_split
+   [(set (match_operand 0 "register_operand" "")
+ 	(not (match_operand 1 "register_operand" "")))]
+   "! TARGET_PARTIAL_REG_STALL && reload_completed
+    && (GET_MODE (operands[0]) == HImode
+        || (GET_MODE (operands[0]) == QImode 
+ 	   && (TARGET_PROMOTE_QImode || optimize_size)))"
+   [(set (match_dup 0)
+ 	(not:SI (match_dup 1)))]
+   "operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[1] = gen_lowpart (SImode, operands[1]);")
+ 
+ (define_split 
+   [(set (match_operand 0 "register_operand" "")
+ 	(if_then_else (match_operator 1 "comparison_operator" 
+ 				[(reg 17) (const_int 0)])
+ 		      (match_operand 2 "register_operand" "")
+ 		      (match_operand 3 "register_operand" "")))]
+   "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE
+    && (GET_MODE (operands[0]) == HImode
+        || (GET_MODE (operands[0]) == QImode 
+ 	   && (TARGET_PROMOTE_QImode || optimize_size)))"
+   [(set (match_dup 0)
+ 	(if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+   "operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+    operands[3] = gen_lowpart (SImode, operands[3]);")
+ 			
+ 
  ;; RTL Peephole optimizations, run before sched2.  These primarily look to
  ;; transform a complex memory operation into two memory to register operations.
  
***************
*** 8383,8388 ****
--- 8381,8397 ----
     (set (match_dup 0) (match_dup 2))]
    "")
  
+ ;; We need to handle SFmode only, because DFmode and XFmode is split to
+ ;; SImode pushes.
+ (define_peephole2
+   [(set (match_operand:SF 0 "push_operand" "")
+ 	(match_operand:SF 1 "memory_operand" ""))
+    (match_scratch:SF 2 "r")]
+   "! optimize_size && ! TARGET_PUSH_MEMORY"
+   [(set (match_dup 2) (match_dup 1))
+    (set (match_dup 0) (match_dup 2))]
+   "")
+ 
  (define_peephole2
    [(set (match_operand:HI 0 "push_operand" "")
  	(match_operand:HI 1 "memory_operand" ""))
***************
*** 8555,8577 ****
  	   (and:SI (match_dup 0) (match_dup 1)))])]
    "")
  
! (define_peephole2
!   [(set (reg:CCNO 17)
! 	(compare:CCNO (and:HI (match_operand:HI 0 "register_operand" "")
! 			      (match_operand:HI 1 "immediate_operand" ""))
! 		      (const_int 0)))]
!   "! TARGET_PARTIAL_REG_STALL
!    && (true_regnum (operands[0]) != 0
!        || CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K'))
!    && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))"
!   [(parallel
!      [(set (reg:CCNO 17)
! 	   (compare:CCNO (and:HI (match_dup 0)
! 			         (match_dup 1))
! 		         (const_int 0)))
!       (set (match_dup 0)
! 	   (and:HI (match_dup 0) (match_dup 1)))])]
!   "")
  
  (define_peephole2
    [(set (reg:CCNO 17)
--- 8564,8571 ----
  	   (and:SI (match_dup 0) (match_dup 1)))])]
    "")
  
! ;; We don't need to handle HImode case, because it will be promoted to SImode
! ;; on ! TARGET_PARTIAL_REG_STALL
  
  (define_peephole2
    [(set (reg:CCNO 17)
*** i386.h.nos	Wed Dec 15 19:56:56 1999
--- i386.h	Wed Dec 15 22:48:47 1999
*************** extern const int x86_double_with_add, x8
*** 162,167 ****
--- 162,168 ----
  extern const int x86_use_loop, x86_use_fiop, x86_use_mov0;
  extern const int x86_use_cltd, x86_read_modify_write;
  extern const int x86_read_modify, x86_split_long_moves;
+ extern const int x86_promote_QImode;
  
  #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
  #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
*************** extern const int x86_read_modify, x86_sp
*** 183,188 ****
--- 184,190 ----
  #define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & CPUMASK)
  #define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & CPUMASK)
  #define TARGET_READ_MODIFY (x86_read_modify & CPUMASK)
+ #define TARGET_PROMOTE_QImode (x86_promote_QImode & CPUMASK)
  
  #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
  
*************** do { long l;						\
*** 2444,2449 ****
--- 2446,2452 ----
  				 UMIN, UMAX, COMPARE, MINUS, DIV, MOD,	\
  				 UDIV, UMOD, ASHIFT, ROTATE, ASHIFTRT,	\
  				 LSHIFTRT, ROTATERT}},			\
+   {"promotable_binary_operator", {PLUS, MULT, AND, IOR, XOR, ASHIFT}},	\
    {"memory_displacement_operand", {MEM}},				\
    {"cmpsi_operand", {CONST_INT, CONST_DOUBLE, CONST, SYMBOL_REF,	\
  		     LABEL_REF, SUBREG, REG, MEM, AND}},		\
*** i386.c.nos	Wed Dec 15 19:57:08 1999
--- i386.c	Wed Dec 15 22:54:31 1999
*************** const int x86_use_cltd = ~(m_PENT | m_K6
*** 194,199 ****
--- 194,200 ----
  const int x86_read_modify_write = ~m_PENT;
  const int x86_read_modify = ~(m_PENT | m_PPRO);
  const int x86_split_long_moves = m_PPRO;
+ const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
  
  #define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
  
*************** fcmov_comparison_operator (op, mode)
*** 1160,1165 ****
--- 1161,1190 ----
    return ((mode == VOIDmode || GET_MODE (op) == mode)
  	  && GET_RTX_CLASS (GET_CODE (op)) == '<'
  	  && GET_CODE (op) == unsigned_condition (GET_CODE (op)));
+ }
+ 
+ /* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
+ 
+ int
+ promotable_binary_operator (op, mode)
+      register rtx op;
+      enum machine_mode mode ATTRIBUTE_UNUSED;
+ {
+   switch (GET_CODE (op))
+     {
+     case MULT:
+       /* Modern CPUs have same latency for HImode and SImode multiply,
+          but 386 and 486 do HImode multiply faster.  */
+       return ix86_cpu > PROCESSOR_I486;
+     case PLUS:
+     case AND:
+     case IOR:
+     case XOR:
+     case ASHIFT:
+       return 1;
+     default:
+       return 0;
+     }
  }
  
  /* Nearly general operand, but accept any const_double, since we wish
*** i386-protos.h.nos	Wed Dec 15 19:57:03 1999
--- i386-protos.h	Wed Dec 15 22:34:56 1999
*************** extern int binary_fp_operator PROTO((rtx
*** 60,65 ****
--- 60,66 ----
  extern int mult_operator PROTO((rtx, enum machine_mode));
  extern int div_operator PROTO((rtx, enum machine_mode));
  extern int arith_or_logical_operator PROTO((rtx, enum machine_mode));
+ extern int promotable_binary_operator PROTO((rtx, enum machine_mode));
  extern int memory_displacement_operand PROTO((rtx, enum machine_mode));
  extern int cmpsi_operand PROTO((rtx, enum machine_mode));
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]