This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[mips patch RFA] add -mfix-sb1 to work around SB-1 CPU errata.

From: cgd at broadcom dot com
To: gcc-patches at gcc dot gnu dot org
Cc: echristo at redhat dot com
Date: 21 Aug 2003 13:34:13 -0700
Subject: [mips patch RFA] add -mfix-sb1 to work around SB-1 CPU errata.
This patch adds an -mfix-sb1 flag the MIPS back-end, which works
around certain early errata in early revision 2 SB-1 CPUs.

The comments in mips.md describe the errata fairly well, and the fixes
are straightforward.  ("Hey, if i could implement them..."  8-)

I've checked this on mipsisa64-elf against the simulator, C only (w/
sources as of a day or two ago).  No regressions.

I also created some test code to test all of the individual ops that
were affected.  No change w/o the new flag, changed as expected with
the new flag.

(One thing to note: I was never able to get the compiler -- modified
or unmodified -- to generate recip or rsqrt.  It looks like mips
const_float_1_operand is always returning 0 as a result of the first
'if' statement.  I've not debugged it.)



chris
--
2003-08-21  Chris Demetriou  <cgd@broadcom.com>

	* config/mips/mips.h (MASK_FIX_SB1, TARGET_FIX_SB1): New defines.
	(TARGET_SWITCHES): Add -mfix-sb1 and -mno-fix-sb1.
	* config/mips/mips.md (divdf3, divsf3, sqrtdf2, sqrtsf2): Change
	to be define_expands, and make output templates empty.
	(divdf3_internal, divdf3_sb1_rev2, divsf3_internal)
	(divsf3_sb1_rev2, sqrtdf2_internal, sqrtdf2_sb1_rev2)
	(sqrtsf2_internal, sqrtsf2_sb1_rev2): New define_insns.
	(define_insns for recip.[ds] and rsqrt.[ds]): Add new variants
	to handle TARGET_FIX_SB1.
	* doc/invoke.texi: Document MIPS -mfix-sb1 and -mno-fix-sb1.

Index: config/mips/mips.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/mips/mips.h,v
retrieving revision 1.284
diff -u -p -r1.284 mips.h
--- config/mips/mips.h	12 Aug 2003 06:44:54 -0000	1.284
+++ config/mips/mips.h	21 Aug 2003 20:18:49 -0000
@@ -170,6 +170,7 @@ extern const struct mips_cpu_info *mips_
 #define MASK_UNINIT_CONST_IN_RODATA \
 			   0x00800000	/* Store uninitialized
 					   consts in rodata */
+#define MASK_FIX_SB1       0x01000000   /* Work around SB-1 errata. */
 
 					/* Debug switches, not documented */
 #define MASK_DEBUG	0		/* unused */
@@ -255,6 +256,7 @@ extern const struct mips_cpu_info *mips_
 
 #define TARGET_BRANCHLIKELY	(target_flags & MASK_BRANCHLIKELY)
 
+#define TARGET_FIX_SB1		(target_flags & MASK_FIX_SB1)
 
 /* True if we should use NewABI-style relocation operators for
    symbolic addresses.  This is never true for mips16 code,
@@ -580,6 +582,10 @@ extern const struct mips_cpu_info *mips_
      N_("Work around early 4300 hardware bug")},			\
   {"no-fix4300",         -MASK_4300_MUL_FIX,				\
      N_("Don't work around early 4300 hardware bug")},			\
+  {"fix-sb1",             MASK_FIX_SB1,					\
+     N_("Work around errata for early SB-1 revision 2 cores")},		\
+  {"no-fix-sb1",         -MASK_FIX_SB1,					\
+     N_("Don't work around errata for early SB-1 revision 2 cores")},	\
   {"check-zero-division",-MASK_NO_CHECK_ZERO_DIV,			\
      N_("Trap on integer divide by zero")},				\
   {"no-check-zero-division", MASK_NO_CHECK_ZERO_DIV,			\
Index: config/mips/mips.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/mips/mips.md,v
retrieving revision 1.191
diff -u -p -r1.191 mips.md
--- config/mips/mips.md	19 Aug 2003 18:09:06 -0000	1.191
+++ config/mips/mips.md	21 Aug 2003 20:18:50 -0000
@@ -2365,42 +2365,119 @@
 ;;  ....................
 ;;
 
-(define_insn "divdf3"
+(define_expand "divdf3"
   [(set (match_operand:DF 0 "register_operand" "=f")
 	(div:DF (match_operand:DF 1 "register_operand" "f")
 		(match_operand:DF 2 "register_operand" "f")))]
   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT"
+  "")
+
+(define_insn "divdf3_internal"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "register_operand" "f")
+		(match_operand:DF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !TARGET_FIX_SB1"
   "div.d\t%0,%1,%2"
   [(set_attr "type"	"fdiv")
    (set_attr "mode"	"DF")])
 
-(define_insn "divsf3"
+;; Work around early SB-1 rev2 core F1 erratum.
+;;
+;; If an mfc1 or dmfc1 happens to access the floating point register
+;; file at the same time a long latency operation (div, sqrt, recip,
+;; sqrt) iterates an intermediate result back through the floating
+;; point register file bypass, then instead returning the correct
+;; register value the mfc1 or dmfc1 operation returns the intermediate
+;; result of the long latency operation.
+;;
+;; The workaround is to insert an unconditional 'mov' from/to the
+;; long latency op destination register.
+(define_insn "divdf3_sb1_rev2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "register_operand" "f")
+		(match_operand:DF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FIX_SB1"
+  "div.d\t%0,%1,%2\;mov.d\t%0,%0"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"DF")
+   (set_attr "length"	"8")])
+
+;; Work around early SB-1 rev2 core F2 erratum.
+;;
+;; In certain cases, div.s and div.ps may have a rounding error
+;; and/or wrong inexact flag.
+;;
+;; Therefore, we only allow div.s if not working around SB-1 rev2
+;; errata, or if working around those errata and a slight loss of
+;; precision is OK (i.e., flag_unsafe_math_optimizations is set).
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && (!TARGET_FIX_SB1 || flag_unsafe_math_optimizations)"
+  "")
+
+(define_insn "divsf3_internal"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(div:SF (match_operand:SF 1 "register_operand" "f")
 		(match_operand:SF 2 "register_operand" "f")))]
-  "TARGET_HARD_FLOAT"
+  "TARGET_HARD_FLOAT && !TARGET_FIX_SB1"
   "div.s\t%0,%1,%2"
   [(set_attr "type"	"fdiv")
    (set_attr "mode"	"SF")])
 
+;; Work around early SB-1 rev2 core F1 erratum (see "divdf3_sb1_rev2" comment).
+;; Work around early SB-1 rev2 core F2 erratum (see "divsf3" comment).
+(define_insn "divsf3_sb1_rev2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "register_operand" "f")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
+  "div.s\t%0,%1,%2\;mov.s\t%0,%0"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"8")])
+
 (define_insn ""
   [(set (match_operand:DF 0 "register_operand" "=f")
 	(div:DF (match_operand:DF 1 "const_float_1_operand" "")
 		(match_operand:DF 2 "register_operand" "f")))]
-  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && flag_unsafe_math_optimizations"
+  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
   "recip.d\t%0,%2"
   [(set_attr "type"	"fdiv")
    (set_attr "mode"	"DF")])
 
+;; Work around early SB-1 rev2 core F1 erratum (see "divdf3_sb1_rev2" comment).
+(define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "const_float_1_operand" "")
+		(match_operand:DF 2 "register_operand" "f")))]
+  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
+  "recip.d\t%0,%2\;mov.d\t%0,%0"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"DF")
+   (set_attr "length"	"8")])
+
 (define_insn ""
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
 		(match_operand:SF 2 "register_operand" "f")))]
-  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && flag_unsafe_math_optimizations"
+  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && !TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
   "recip.s\t%0,%2"
   [(set_attr "type"	"fdiv")
    (set_attr "mode"	"SF")])
 
+;; Work around early SB-1 rev2 core F1 erratum (see "divdf3_sb1_rev2" comment).
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(match_operand:SF 2 "register_operand" "f")))]
+  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
+  "recip.s\t%0,%2\;mov.s\t%0,%0"
+  [(set_attr "type"	"fdiv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"8")])
+
 (define_insn "divmodsi4"
   [(set (match_operand:SI 0 "register_operand" "=l")
 	(div:SI (match_operand:SI 1 "register_operand" "d")
@@ -2456,39 +2533,89 @@
 ;;
 ;;  ....................
 
-(define_insn "sqrtdf2"
+(define_expand "sqrtdf2"
   [(set (match_operand:DF 0 "register_operand" "=f")
 	(sqrt:DF (match_operand:DF 1 "register_operand" "f")))]
   "TARGET_HARD_FLOAT && HAVE_SQRT_P() && TARGET_DOUBLE_FLOAT"
+  "")
+
+(define_insn "sqrtdf2_internal"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && HAVE_SQRT_P() && TARGET_DOUBLE_FLOAT && !TARGET_FIX_SB1"
   "sqrt.d\t%0,%1"
   [(set_attr "type"	"fsqrt")
    (set_attr "mode"	"DF")])
 
-(define_insn "sqrtsf2"
+(define_insn "sqrtdf2_sb1_rev2"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && HAVE_SQRT_P() && TARGET_DOUBLE_FLOAT && TARGET_FIX_SB1"
+  "sqrt.d\t%0,%1\;mov.d\t%0,%0"
+  [(set_attr "type"	"fsqrt")
+   (set_attr "mode"	"DF")
+   (set_attr "length"	"8")])
+
+(define_expand "sqrtsf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_HARD_FLOAT && HAVE_SQRT_P()"
+  "")
+
+(define_insn "sqrtsf2_internal"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && HAVE_SQRT_P() && !TARGET_FIX_SB1"
   "sqrt.s\t%0,%1"
   [(set_attr "type"	"fsqrt")
    (set_attr "mode"	"SF")])
 
+(define_insn "sqrtsf2_sb1_rev2"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_HARD_FLOAT && HAVE_SQRT_P() && TARGET_FIX_SB1"
+  "sqrt.s\t%0,%1\;mov.s\t%0,%0"
+  [(set_attr "type"	"fsqrt")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"8")])
+
 (define_insn ""
   [(set (match_operand:DF 0 "register_operand" "=f")
 	(div:DF (match_operand:DF 1 "const_float_1_operand" "")
 		(sqrt:DF (match_operand:DF 2 "register_operand" "f"))))]
-  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && flag_unsafe_math_optimizations"
+  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
   "rsqrt.d\t%0,%2"
   [(set_attr "type"	"frsqrt")
    (set_attr "mode"	"DF")])
 
 (define_insn ""
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "const_float_1_operand" "")
+		(sqrt:DF (match_operand:DF 2 "register_operand" "f"))))]
+  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
+  "rsqrt.d\t%0,%2\;mov.d\t%0,%0"
+  [(set_attr "type"	"frsqrt")
+   (set_attr "mode"	"DF")
+   (set_attr "length"	"8")])
+
+(define_insn ""
   [(set (match_operand:SF 0 "register_operand" "=f")
 	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
 		(sqrt:SF (match_operand:SF 2 "register_operand" "f"))))]
-  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && flag_unsafe_math_optimizations"
+  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && !TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
   "rsqrt.s\t%0,%2"
   [(set_attr "type"	"frsqrt")
    (set_attr "mode"	"SF")])
+
+(define_insn ""
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(div:SF (match_operand:SF 1 "const_float_1_operand" "")
+		(sqrt:SF (match_operand:SF 2 "register_operand" "f"))))]
+  "ISA_HAS_FP4 && TARGET_HARD_FLOAT && TARGET_FIX_SB1 && flag_unsafe_math_optimizations"
+  "rsqrt.s\t%0,%2\;mov.s\t%0,%0"
+  [(set_attr "type"	"frsqrt")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"8")])
 
 ;;
 ;;  ....................
Index: doc/invoke.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/invoke.texi,v
retrieving revision 1.329
diff -u -p -r1.329 invoke.texi
--- doc/invoke.texi	19 Aug 2003 20:29:00 -0000	1.329
+++ doc/invoke.texi	21 Aug 2003 20:18:51 -0000
@@ -479,7 +479,8 @@ in the following sections.
 -m4650  -msingle-float  -mmad @gol
 -EL  -EB  -G @var{num}  -nocpp @gol
 -mabi=32  -mabi=n32  -mabi=64  -mabi=eabi  -mabi-fake-default @gol
--mfix7000  -mno-crt0 -mflush-func=@var{func} -mno-flush-func @gol
+-mfix7000  -mfix-sb1  -mno-fix-sb1 @gol
+-mno-crt0 -mflush-func=@var{func} -mno-flush-func @gol
 -mbranch-likely -mno-branch-likely}
 
 @emph{i386 and x86-64 Options}
@@ -8167,6 +8168,13 @@ assembler files (with a @samp{.s} suffix
 Pass an option to gas which will cause nops to be inserted if
 the read of the destination register of an mfhi or mflo instruction
 occurs in the following two instructions.
+
+@item -mfix-sb1
+@itemx -mno-fix-sb1
+@opindex mfix-sb1
+Work around certain SB-1 CPU core errata.
+(This flag currently works around the SB-1 revision 2
+``F1'' and ``F2'' floating point errata.)
 
 @item -no-crt0
 @opindex no-crt0
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]