This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, take 2] Fix PR target/28946


On Tue, Sep 05, 2006 at 09:32:32PM +0200, Uros Bizjak wrote:
> 
> Attached patch should implement missing patterns for all shift 
> instructions (wheee...). It is currently bootstrapping on x86_64. A 
> regression-test suite will run over night for c, c++ and fortran.
> 
> 2006-09-06 Uros Bizjak <uros@kss-loka.si>
> 
>        PR target/28946
>        * config/i386/i386.md ("*ashldi3_cconly_rex64", "*ashlsi3_cconly",
>        "*ashlhi3_cconly", "*ashlqi3_cconly", 
> "*ashrdi3_one_bit_cconly_rex64",
>        "*ashrdi3_cconly_rex64", "*ashrsi3_one_bit_cconly", 
> "*ashrsi3_cconly",
>        "*ashrhi3_one_bit_cconly", "*ashrhi3_cconly",
>        "*ashrqi3_one_bit_cconly", "*ashrqi3_cconly",
>        "*lshrdi3_cconly_one_bit_rex64", "*lshrdi3_cconly_rex64",
>        "*lshrsi3_one_bit_cconly", "*lshrsi3_cconly",
>        "*lshrhi3_one_bit_cconly", "*lshrhi3_cconly",
>        "*lshrqi2_one_bit_cconly", "*lshrqi2_cconly": New patterns to
>        implement only CC setting effects of shift instructions.
> 

Core and Core 2 have partial flag register stalls. Penalty of partial
flag register stall is from 60-100%. Cost of extra test on Pentium 4 is
0-10%.

A partial flag register stall happens when:

.	an instruction modifies a part of the flag register, and 
.	the following instruction is dependant on the outcome of these
	flags. 

Partial flag register stalls occur most often with the shift
instructions (sar, sal, shr, shl), which do not modify the flags in the
case of zero shift count.  The shift count is known only at execution
time and therefore the front-end stalls until the instruction is
retired.

To avoid this stall, do not use flags immediately after an instruction
that may update the flags. Insert a test or compare instruction between
the instruction that may set the flags and the instruction that uses
them.

This followup patch disables shift patterns with setting flags register
to avoid partial flag register stall.


H.J.
----
2006-09-05  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.c (x86_partial_flag_reg_stall): New.

	* config/i386/i386.h (x86_partial_flag_reg_stall): New.
	(TARGET_PARTIAL_FLAG_REG_STALL): New.

	* config/i386/i386.md (*ashldi3_cmp_rex64): Disabled for
	TARGET_PARTIAL_FLAG_REG_STALL.
	(*ashldi3_cconly_rex64): Likewise.
	(*ashlsi3_cmp): Likewise.
	(*ashlsi3_cconly): Likewise.
	(*ashlsi3_cmp_zext): Likewise.
	(*ashlhi3_cmp): Likewise.
	(*ashlhi3_cconly): Likewise.
	(*ashlqi3_cmp): Likewise.
	(*ashlqi3_cconly): Likewise.
	(*ashrdi3_cmp_rex64): Likewise.
	(*ashrdi3_cconly_rex64): Likewise.
	(*ashrsi3_cmp): Likewise.
	(*ashrsi3_cconly): Likewise.
	(*ashrsi3_cmp_zext): Likewise.
	(*ashrhi3_cmp): Likewise.
	(*ashrhi3_cconly): Likewise.
	(*ashrqi3_cmp): Likewise.
	(*ashrqi3_cconly): Likewise.
	(*lshrdi3_cmp_rex64): Likewise.
	(*lshrdi3_cconly_rex64): Likewise.
	(*lshrsi3_cmp): Likewise.
	(*lshrsi3_cconly): Likewise.
	(*lshrsi3_cmp_zext): Likewise.
	(*lshrhi3_cmp): Likewise.
	(*lshrhi3_cconly): Likewise.
	(*lshrqi2_cmp): Likewise.
	(*lshrqi2_cconly): Likewise.

--- gcc/config/i386/i386.c.pfrs	2006-09-06 18:03:55.000000000 -0700
+++ gcc/config/i386/i386.c	2006-09-06 18:09:55.000000000 -0700
@@ -761,6 +761,7 @@ const int x86_use_sahf = m_PPRO | m_K6 |
    with partial reg. dependencies used by Athlon/P4 based chips, it is better
    to leave it off for generic32 for now.  */
 const int x86_partial_reg_stall = m_PPRO;
+const int x86_partial_flag_reg_stall = m_GENERIC;
 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
 const int x86_use_mov0 = m_K6;
--- gcc/config/i386/i386.h.pfrs	2006-09-06 18:03:55.000000000 -0700
+++ gcc/config/i386/i386.h	2006-09-06 18:03:55.000000000 -0700
@@ -182,6 +182,10 @@ extern int x86_prefetch_sse;
 #define TARGET_USE_SAHF ((x86_use_sahf & TUNEMASK) && !TARGET_64BIT)
 #define TARGET_MOVX (x86_movx & TUNEMASK)
 #define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & TUNEMASK)
+
+extern const int x86_partial_flag_reg_stall;
+#define TARGET_PARTIAL_FLAG_REG_STALL (x86_partial_flag_reg_stall & TUNEMASK)
+
 #define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & TUNEMASK)
 #define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & TUNEMASK)
 #define TARGET_USE_MOV0 (x86_use_mov0 & TUNEMASK)
--- gcc/config/i386/i386.md.pfrs	2006-09-06 18:03:55.000000000 -0700
+++ gcc/config/i386/i386.md	2006-09-06 18:03:56.000000000 -0700
@@ -10440,7 +10440,12 @@
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 	(ashift:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, DImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
 {
   switch (get_attr_type (insn))
     {
@@ -10476,7 +10481,12 @@
 	  (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, DImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))"
 {
   switch (get_attr_type (insn))
     {
@@ -10757,7 +10767,12 @@
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
 	(ashift:SI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
 {
   switch (get_attr_type (insn))
     {
@@ -10793,7 +10808,12 @@
 	  (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))"
 {
   switch (get_attr_type (insn))
     {
@@ -10830,7 +10850,12 @@
    (set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))"
 {
   switch (get_attr_type (insn))
     {
@@ -10949,7 +10974,12 @@
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
 	(ashift:HI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
 {
   switch (get_attr_type (insn))
     {
@@ -10985,7 +11015,12 @@
 	  (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))"
 {
   switch (get_attr_type (insn))
     {
@@ -11143,7 +11178,12 @@
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
 	(ashift:QI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
 {
   switch (get_attr_type (insn))
     {
@@ -11179,7 +11219,12 @@
 	  (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL
+       || (operands[2] == const1_rtx
+	   && (TARGET_SHIFT1
+	       || TARGET_DOUBLE_WITH_ADD)))"
 {
   switch (get_attr_type (insn))
     {
@@ -11376,7 +11421,9 @@
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 	(ashiftrt:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
@@ -11389,7 +11436,9 @@
 	  (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
@@ -11623,7 +11672,9 @@
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
 	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
@@ -11636,7 +11687,9 @@
 	  (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
@@ -11650,7 +11703,9 @@
    (set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
@@ -11736,7 +11791,9 @@
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
 	(ashiftrt:HI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
@@ -11749,7 +11806,9 @@
 	  (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
@@ -11863,7 +11922,9 @@
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
 	(ashiftrt:QI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
@@ -11876,7 +11937,9 @@
 	  (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "sar{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
@@ -12020,7 +12083,9 @@
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 	(lshiftrt:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
@@ -12033,7 +12098,9 @@
 	  (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
@@ -12191,7 +12258,9 @@
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
 	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
@@ -12204,7 +12273,9 @@
         (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
@@ -12218,7 +12289,9 @@
    (set (match_operand:DI 0 "register_operand" "=r")
 	(lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
@@ -12304,7 +12377,9 @@
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
 	(lshiftrt:HI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
@@ -12317,7 +12392,9 @@
 	  (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
@@ -12430,7 +12507,9 @@
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
 	(lshiftrt:QI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
@@ -12443,7 +12522,9 @@
 	  (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
   "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
+   && (optimize_size
+       || !TARGET_PARTIAL_FLAG_REG_STALL)"
   "shr{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]