This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Committed: fix PR target/31101


Hello!

This bug was caused by somehow incorrect RTL description of fpremxf4_i387 and fprem1xf4_i387 insn patterns. This caused wrong CSE when -funroll-all-loops unrolled reduction loop a couple of times.

Attached patch also adds LABEL_NUSES to emitted labes and adds branch probability of 0.1 to emitted jump instructions. Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu. Patch was committed to SVN mainline.


2007-03-10 Uros Bizjak <ubizjak@gmail.com>


       PR target/31101
       * config/i386/i386.md (UNSPEC_C2_FLAG): New constant.
       (fpremxf4_i387, fprem1xf4_i387): Use UNSPEC_C2_FLAG.
       (fmodxf3, fmod<mode>3, remainderxf3, remainder<mode>3):
       Add LABEL_NUSES to emmitted label.
       * config/i386/i386.c (ix86_emit_fp_unordered_jump): Add
       branch probability value to emmitted jump insn.
       * reg-stack.c (subst_stack_regs_pat)[UNSPEC]: Handle UNSPEC_C2_FLAG.
       Do not check life information and do not re-arrange input operands
       for UNSPEC_FSCALE_EXP, UNSPEC_FPREM_U and UNSPEC_FPREM1_U.

Uros.
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 122792)
+++ ChangeLog	(working copy)
@@ -1,3 +1,17 @@
+2007-03-10  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/31101
+	* config/i386/i386.md (UNSPEC_C2_FLAG): New constant.
+	(fpremxf4_i387, fprem1xf4_i387): Use UNSPEC_C2_FLAG.
+	(fmodxf3, fmod<mode>3, remainderxf3, remainder<mode>3):
+	Add LABEL_NUSES to emmitted label.
+	* config/i386/i386.c (ix86_emit_fp_unordered_jump): Add
+	branch probability value to emmitted jump insn.
+	* reg-stack.c (subst_stack_regs_pat)[UNSPEC]: Handle UNSPEC_C2_FLAG.
+	Do not check life information and do not re-arrange input operands
+	for UNSPEC_FSCALE_EXP, UNSPEC_FPREM_U and UNSPEC_FPREM1_U.
+
+
 2007-03-10  Kaz Kojima  <kkojima@gcc.gnu.org>
 
 	* config/sh/sh.c (sh_insn_length_adjustment): Adjust for
Index: reg-stack.c
===================================================================
--- reg-stack.c	(revision 122792)
+++ reg-stack.c	(working copy)
@@ -1759,7 +1759,7 @@
 	      case UNSPEC_FSCALE_FRACT:
 	      case UNSPEC_FPREM_F:
 	      case UNSPEC_FPREM1_F:
-		/* These insns operate on the top two stack slots.
+		/* These insns operate on the top two stack slots,
 		   first part of double input, double output insn.  */
 
 		src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
@@ -1791,22 +1791,12 @@
 	      case UNSPEC_FSCALE_EXP:
 	      case UNSPEC_FPREM_U:
 	      case UNSPEC_FPREM1_U:
-		/* These insns operate on the top two stack slots./
+		/* These insns operate on the top two stack slots,
 		   second part of double input, double output insn.  */
 
 		src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
 		src2 = get_true_reg (&XVECEXP (pat_src, 0, 1));
 
-		src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
-		src2_note = find_regno_note (insn, REG_DEAD, REGNO (*src2));
-
-		/* Inputs should never die, they are
-		   replaced with outputs.  */
-		gcc_assert (!src1_note);
-		gcc_assert (!src2_note);
-
-		swap_to_top (insn, regstack, *src1, *src2);
-
 		/* Push the result back onto stack. Fill empty slot from
 		   first part of insn and fix top of stack pointer.  */
 		if (STACK_REG_P (*dest))
@@ -1820,6 +1810,17 @@
 		replace_reg (src2, FIRST_STACK_REG + 1);
 		break;
 
+	      case UNSPEC_C2_FLAG:
+		/* This insn operates on the top two stack slots,
+		   third part of C2 setting double input insn.  */
+
+		src1 = get_true_reg (&XVECEXP (pat_src, 0, 0));
+		src2 = get_true_reg (&XVECEXP (pat_src, 0, 1));
+
+		replace_reg (src1, FIRST_STACK_REG);
+		replace_reg (src2, FIRST_STACK_REG + 1);
+		break;
+
 	      case UNSPEC_SAHF:
 		/* (unspec [(unspec [(compare)] UNSPEC_FNSTSW)] UNSPEC_SAHF)
 		   The combination matches the PPRO fcomi instruction.  */
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 122792)
+++ config/i386/i386.c	(working copy)
@@ -21032,7 +21032,9 @@
 			      gen_rtx_LABEL_REF (VOIDmode, label),
 			      pc_rtx);
   temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+
   emit_jump_insn (temp);
+  predict_jump (REG_BR_PROB_BASE * 10 / 100);
 }
 
 /* Output code to perform a log1p XFmode calculation.  */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 122792)
+++ config/i386/i386.md	(working copy)
@@ -144,6 +144,8 @@
    (UNSPEC_FPREM1_F		90)
    (UNSPEC_FPREM1_U		91)
 
+   (UNSPEC_C2_FLAG		95)
+
    ; SSP patterns
    (UNSPEC_SP_SET		100)
    (UNSPEC_SP_TEST		101)
@@ -16315,7 +16317,8 @@
 	(unspec:XF [(match_dup 2) (match_dup 3)]
 		   UNSPEC_FPREM_U))
    (set (reg:CCFP FPSR_REG)
-	(unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+	(unspec:CCFP [(match_dup 2) (match_dup 3)]
+		     UNSPEC_C2_FLAG))]
   "TARGET_USE_FANCY_MATH_387"
   "fprem"
   [(set_attr "type" "fpspc")
@@ -16334,6 +16337,7 @@
   emit_insn (gen_fpremxf4_i387 (operands[1], operands[2],
 				operands[1], operands[2]));
   ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
 
   emit_move_insn (operands[0], operands[1]);
   DONE;
@@ -16356,6 +16360,7 @@
   emit_label (label);
   emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
 
   /* Truncate the result properly for strict SSE math.  */
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
@@ -16376,7 +16381,8 @@
 	(unspec:XF [(match_dup 2) (match_dup 3)]
 		   UNSPEC_FPREM1_U))
    (set (reg:CCFP FPSR_REG)
-	(unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+	(unspec:CCFP [(match_dup 2) (match_dup 3)]
+		     UNSPEC_C2_FLAG))]
   "TARGET_USE_FANCY_MATH_387"
   "fprem1"
   [(set_attr "type" "fpspc")
@@ -16395,6 +16401,7 @@
   emit_insn (gen_fprem1xf4_i387 (operands[1], operands[2],
 				 operands[1], operands[2]));
   ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
 
   emit_move_insn (operands[0], operands[1]);
   DONE;
@@ -16418,6 +16425,7 @@
 
   emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
 
   /* Truncate the result properly for strict SSE math.  */
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]