This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

i386 zero_extend patch


Huh this cost me lots of hours! While hacking the allocation patches I've run
into problems with random speedups/slowdowns in some tests, especially byte
benchark. I spent last four days hunting them and then I found that the
problem likes in incorrectly modeled zero_extend operations. Those patterns
got splitted into very slow code (zlear destination, movw source, dest)
when source!=destination and fast code otherwise. This made those misterious
speedups and slowdowns I was experiencing with my patches.

If I knew who was responsible for this code, I would send him the email bomb!

So here is fix. It redesignes the move patterns to use separate templates
into TARGET_ZERO_EXTEND_WITH_AND and for others.
It uses more strict constraints to let reload do job it can do correctly
and just do the necesary job.
It also models correctly the movzx instructions and don't do fake clobber of
flags as well as results in correct length attrubute for and operation.
(because it is now splitted in all cases)

Now I can celebrate a bit :) I am geting consistent speedups from the allocation
patches and gcc beats MSVC++ on my machine now in byte benchamrks :)
And also my Pentium/150 is benchmarked as 2*Pentium/90 in both integer
and fp benchamrks (was 1.89 and 1.93)

Honza

Wed Nov 24 16:12:30 MET 1999  Jan Hubicka  <hubicka@freesoft.cz>
	* i386.md (zero_extend?i?i2): Rewrite to expanders; new patterns
	rewrite splitters.

Index: egcs/gcc/config/i386/i386.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.md,v
retrieving revision 1.109
diff -c -3 -p -r1.109 i386.md
*** i386.md	1999/11/21 01:34:22	1.109
--- i386.md	1999/11/24 15:04:46
***************
*** 2167,2340 ****
  
  ;; Zero extension instructions
  
! (define_insn "zero_extendhisi2"
!   [(set (match_operand:SI 0 "register_operand" "=r,?r")
!      (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0,rm")))
!    (clobber (reg:CC 17))]
    ""
!   "*
  {
!   switch (get_attr_type (insn))
      {
!     case TYPE_ALU1:
!       if (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1]))
! 	abort ();
!       operands[1] = GEN_INT (0xffff);
!       return \"and{l}\\t{%1, %0|%0, %1}\";
!     default:
!       return \"movz{wl|x}\\t{%1, %0|%0, %1}\";
      }
! }"
!   [(set (attr "type")
!      (if_then_else (and (eq_attr "alternative" "0")
! 			(ne (symbol_ref "TARGET_ZERO_EXTEND_WITH_AND")
! 			    (const_int 0)))
!        (const_string "alu1")
!        (const_string "imovx")))])
  
! (define_split
!   [(set (match_operand:SI 0 "register_operand" "")
! 	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))
!    (clobber (reg:CC 17))]
!   "reload_completed
!    && TARGET_ZERO_EXTEND_WITH_AND
!    && !reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(parallel [(set (match_dup 0) (const_int 0))
! 	      (clobber (reg:CC 17))])
!    (set (strict_low_part (subreg:HI (match_dup 0) 0)) (match_dup 1))]
!   "")
  
  (define_split
    [(set (match_operand:SI 0 "register_operand" "")
! 	(zero_extend:SI (match_operand:HI 1 "memory_operand" "")))
     (clobber (reg:CC 17))]
!   "reload_completed
!    && TARGET_ZERO_EXTEND_WITH_AND
!    && reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(set (strict_low_part (subreg:HI (match_dup 0) 0)) (match_dup 1))
!    (parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
  	      (clobber (reg:CC 17))])]
    "")
  
! (define_insn "zero_extendqihi2"
!   [(set (match_operand:HI 0 "register_operand" "=q,r,r")
!      (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,0,qm")))
!    (clobber (reg:CC 17))]
    ""
!   "*
! {
!   switch (get_attr_type (insn))
!     {
!     case TYPE_ALU1:
!       if (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1]))
! 	abort ();
!       operands[1] = GEN_INT (0xff);
!       return \"and{l}\\t{%1, %k0|%k0, %1}\";
!     default:
!       return \"movz{bw|x}\\t{%1, %0|%0, %1}\";
!     }
! }"
!   [(set (attr "type")
!      (cond [(and (eq_attr "alternative" "0")
! 		 (ne (symbol_ref "TARGET_ZERO_EXTEND_WITH_AND")
! 		     (const_int 0)))
! 	      (const_string "alu1")
! 	    (eq_attr "alternative" "1")
! 	      (const_string "alu1")
! 	   ]
!            (const_string "imovx")))])
  
  (define_split
    [(set (match_operand:HI 0 "register_operand" "")
  	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
     (clobber (reg:CC 17))]
!   "reload_completed
!    && QI_REG_P (operands[0])
!    && TARGET_ZERO_EXTEND_WITH_AND
!    && !reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(parallel [(set (match_dup 0) (const_int 0))
! 	      (clobber (reg:CC 17))])
!    (set (strict_low_part (subreg:QI (match_dup 0) 0)) (match_dup 1))]
!   "")
  
  (define_split
    [(set (match_operand:HI 0 "register_operand" "")
! 	(zero_extend:HI (match_operand:QI 1 "memory_operand" "")))
     (clobber (reg:CC 17))]
    "reload_completed
     && QI_REG_P (operands[0])
!    && TARGET_ZERO_EXTEND_WITH_AND
!    && reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(set (strict_low_part (subreg:QI (match_dup 0) 0)) (match_dup 1))
!    (parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255)))
! 	      (clobber (reg:CC 17))])]
!   "")
  
  (define_split
    [(set (match_operand:HI 0 "register_operand" "")
  	(zero_extend:HI (match_operand:QI 1 "register_operand" "")))
     (clobber (reg:CC 17))]
    "reload_completed
!    && TARGET_ZERO_EXTEND_WITH_AND
!    && ! reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(set (match_dup 0) (subreg:HI (match_dup 1) 0))
!    (parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255)))
  	      (clobber (reg:CC 17))])]
    "")
  
! (define_insn "zero_extendqisi2"
!   [(set (match_operand:SI 0 "register_operand" "=q,r,r")
!      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,0,qm")))
!    (clobber (reg:CC 17))]
    ""
!   "*
! {
!   switch (get_attr_type (insn))
!     {
!     case TYPE_ALU1:
!       if (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1]))
! 	abort ();
!       operands[1] = GEN_INT (0xff);
!       return \"and{l}\\t{%1, %0|%0, %1}\";
!     default:
!       return \"movz{bl|x}\\t{%1, %0|%0, %1}\";
!     }
! }"
!   [(set (attr "type")
!      (cond [(and (eq_attr "alternative" "0")
! 		 (ne (symbol_ref "TARGET_ZERO_EXTEND_WITH_AND")
! 		     (const_int 0)))
! 	      (const_string "alu1")
! 	    (eq_attr "alternative" "1")
! 	      (const_string "alu1")
! 	   ]
!            (const_string "imovx")))])
  
  (define_split
    [(set (match_operand:SI 0 "register_operand" "")
  	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
     (clobber (reg:CC 17))]
    "reload_completed
-    && TARGET_ZERO_EXTEND_WITH_AND
     && QI_REG_P (operands[0])
!    && (GET_CODE (operands[1]) == MEM || QI_REG_P (operands[1]))
     && !reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(parallel [(set (match_dup 0) (const_int 0))
! 	      (clobber (reg:CC 17))])
!    (set (strict_low_part (subreg:QI (match_dup 0) 0)) (match_dup 1))]
!   "")
  
  (define_split
    [(set (match_operand:SI 0 "register_operand" "")
  	(zero_extend:SI (match_operand:QI 1 "register_operand" "")))
     (clobber (reg:CC 17))]
    "reload_completed
!    && TARGET_ZERO_EXTEND_WITH_AND
!    && ! reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(set (match_dup 0) (subreg:SI (match_dup 1) 0))
!    (parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
  	      (clobber (reg:CC 17))])]
    "")
  
  ;; %%% Kill me once multi-word ops are sane.
  (define_insn "zero_extendsidi2"
--- 2167,2349 ----
  
  ;; Zero extension instructions
  
! (define_expand "zero_extendhisi2"
!   [(set (match_operand:SI 0 "register_operand" "")
!      (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
    ""
!   "
  {
!   if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
      {
!       operands[1] = force_reg (HImode, operands[1]);
!       emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
!       DONE;
      }
! }")
  
! (define_insn "zero_extendhisi2_and"
!   [(set (match_operand:SI 0 "register_operand" "=r")
!      (zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
!    (clobber (reg:CC 17))]
!   "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
!   "#"
!   [(set_attr "type" "alu1")])
  
  (define_split
    [(set (match_operand:SI 0 "register_operand" "")
! 	(zero_extend:SI (match_operand:HI 1 "register_operand" "")))
     (clobber (reg:CC 17))]
!   "reload_completed && TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
!   [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
  	      (clobber (reg:CC 17))])]
    "")
  
! (define_insn "*zero_extendhisi2_movzwl"
!   [(set (match_operand:SI 0 "register_operand" "=r")
!      (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
!   "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
!   "movz{wl|x}\\t{%1, %0|%0, %1}"
!   [(set_attr "type" "imovx")])
! 
! 
! 
! (define_expand "zero_extendqihi2"
!   [(parallel
!     [(set (match_operand:HI 0 "register_operand" "")
!        (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
!      (clobber (reg:CC 17))])]
    ""
!   "")
! 
! (define_insn "*zero_extendqihi2_and"
!   [(set (match_operand:HI 0 "register_operand" "=r,?&q")
!      (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
!    (clobber (reg:CC 17))]
!   "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
!   "#"
!   [(set_attr "type" "alu1")])
! 
! (define_insn "*zero_extendqihi2_movzbw_and"
!   [(set (match_operand:HI 0 "register_operand" "=r,r")
!      (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
!    (clobber (reg:CC 17))]
!   "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
!   "#"
!   [(set_attr "type" "imovx,alu1")])
! 
! (define_insn "*zero_extendqihi2_movzbw"
!   [(set (match_operand:HI 0 "register_operand" "=r")
!      (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
!   "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
!   "movz{bw|x}\\t{%1, %0|%0, %1}"
!   [(set_attr "type" "imovx")])
  
+ ;; For the movzbw case strip only the clobber
  (define_split
    [(set (match_operand:HI 0 "register_operand" "")
  	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
     (clobber (reg:CC 17))]
!   "reload_completed 
!    && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size)
!    && (!REG_P (operands[1]) || QI_REG_P (operands[1]))"
!   [(set (match_operand:HI 0 "register_operand" "")
! 	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))])
  
+ ;; When source and destination does not overlap, clear destination
+ ;; first and then do the movb
  (define_split
    [(set (match_operand:HI 0 "register_operand" "")
! 	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
     (clobber (reg:CC 17))]
    "reload_completed
     && QI_REG_P (operands[0])
!    && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
!    && !reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(set (match_dup 0) (const_int 0))
!    (set (strict_low_part (match_dup 2)) (match_dup 1))]
!   "operands[2] = gen_lowpart (QImode, operands[0]);")
  
+ ;; Rest is handled by single and.
  (define_split
    [(set (match_operand:HI 0 "register_operand" "")
  	(zero_extend:HI (match_operand:QI 1 "register_operand" "")))
     (clobber (reg:CC 17))]
    "reload_completed
!    && true_regnum (operands[0]) == true_regnum (operands[1])"
!   [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255)))
  	      (clobber (reg:CC 17))])]
    "")
  
! 
! 
! (define_expand "zero_extendqisi2"
!   [(parallel
!     [(set (match_operand:SI 0 "register_operand" "")
!        (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
!      (clobber (reg:CC 17))])]
    ""
!   "")
! 
! (define_insn "*zero_extendqisi2_and"
!   [(set (match_operand:SI 0 "register_operand" "=r,?&q")
!      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
!    (clobber (reg:CC 17))]
!   "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
!   "#"
!   [(set_attr "type" "alu1")])
! 
! (define_insn "*zero_extendqisi2_movzbw_and"
!   [(set (match_operand:SI 0 "register_operand" "=r,r")
!      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
!    (clobber (reg:CC 17))]
!   "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
!   "#"
!   [(set_attr "type" "imovx,alu1")])
! 
! (define_insn "*zero_extendqisi2_movzbw"
!   [(set (match_operand:SI 0 "register_operand" "=r")
!      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
!   "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
!   "movz{bl|x}\\t{%1, %0|%0, %1}"
!   [(set_attr "type" "imovx")])
! 
! ;; For the movzbl case strip only the clobber
! (define_split
!   [(set (match_operand:SI 0 "register_operand" "")
! 	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
!    (clobber (reg:CC 17))]
!   "reload_completed 
!    && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size)
!    && (!REG_P (operands[1]) || QI_REG_P (operands[1]))"
!   [(set (match_dup 0)
! 	(zero_extend:SI (match_dup 1)))])
  
+ ;; When source and destination does not overlap, clear destination
+ ;; first and then do the movb
  (define_split
    [(set (match_operand:SI 0 "register_operand" "")
  	(zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
     (clobber (reg:CC 17))]
    "reload_completed
     && QI_REG_P (operands[0])
!    && (QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)
!    && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
     && !reg_overlap_mentioned_p (operands[0], operands[1])"
!   [(set (match_dup 0) (const_int 0))
!    (set (strict_low_part (match_dup 2)) (match_dup 1))]
!   "operands[2] = gen_lowpart (QImode, operands[0]);")
  
+ ;; Rest is handled by single and.
  (define_split
    [(set (match_operand:SI 0 "register_operand" "")
  	(zero_extend:SI (match_operand:QI 1 "register_operand" "")))
     (clobber (reg:CC 17))]
    "reload_completed
!    && true_regnum (operands[0]) == true_regnum (operands[1])"
!   [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
  	      (clobber (reg:CC 17))])]
    "")
+ 
  
  ;; %%% Kill me once multi-word ops are sane.
  (define_insn "zero_extendsidi2"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]