This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH v4] Add support for sparc fused compare-and-branch.


This integrates the Solaris2 work from Eric Botcazou, and has
some changes based upon feedback from Richard Henderson.

The bootstrap comparison failure no longer happens, and this is fully
regstrapped on sparc-linux-gnu w/--with-cpu=niagara4, and I also did a
quick bootstrap check using --with-cpu=niagara3.

Eric, any objections to committing this?

Thanks!

gcc/

2012-11-12  David S. Miller  <davem@davemloft.net>

	* configure.ac: Add check for assembler SPARC4 instruction
	support.
	* configure: Rebuild.
	* config.in: Add HAVE_AS_SPARC4 section.
	* config/sparc/sparc.opt (mcbcond): New option.
	* doc/invoke.texi: Document it.
	* config/sparc/constraints.md: New constraint 'A' for 5-bit signed
	immediates.
	* doc/md.texi: Document it.
	* config/sparc/sparc.c (dump_target_flag_bits): Handle MASK_CBCOND.
	(sparc_option_override): Likewise.
	(emit_cbcond_insn): New function.
	(emit_conditional_branch_insn): Call it.
	(emit_cbcond_nop): New function.
	(output_ubranch): Use cbcond, remove label arg.
	(output_cbcond): New function.
	* config/sparc/sparc-protos.h (output_ubranch): Update.
	(output_cbcond): Declare it.
	(emit_cbcond_nop): Likewise.
	* config/sparc/sparc.md (type attribute): New types 'cbcond'
	and uncond_cbcond.
	(emit_cbcond_nop): New attribute.
	(length attribute): Handle cbcond and uncond_cbcond.
	(in_call_delay attribute): Reject cbcond and uncond_cbcond.
	(in_branch_delay attribute): Likewise.
	(in_uncond_branch_delay attribute): Likewise.
	(in_annul_branch_delay attribute): Likewise.
	(*cbcond_sp32, *cbcond_sp64): New insn patterns.
	(jump): Rewrite into an expander.
	(*jump_ubranch, *jump_cbcond): New patterns.
	* config/sparc/niagara4.md: Match 'cbcond' in 'n4_cti'.
	* config/sparc/sparc.h (AS_NIAGARA4_FLAG): New macro, use it
	when target default is niagara4.
	(SPARC_SIMM5_P): Define.
	* config/sparc/sol2.h (AS_SPARC64_FLAG): Adjust.
	(AS_SPARC32_FLAG): Define.
	(ASM_CPU32_DEFAULT_SPEC, ASM_CPU64_DEFAULT_SPEC): Use
	AS_NIAGARA4_FLAG as needed.
---
 gcc/config.in                   |   6 +
 gcc/config/sparc/constraints.md |   7 +-
 gcc/config/sparc/niagara4.md    |   6 +-
 gcc/config/sparc/predicates.md  |   8 ++
 gcc/config/sparc/sol2.h         |  95 +++++++++------
 gcc/config/sparc/sparc-protos.h |   4 +-
 gcc/config/sparc/sparc.c        | 249 ++++++++++++++++++++++++++++++++++++++--
 gcc/config/sparc/sparc.h        |  13 ++-
 gcc/config/sparc/sparc.md       |  81 +++++++++++--
 gcc/config/sparc/sparc.opt      |   4 +
 gcc/configure                   |  42 +++++++
 gcc/configure.ac                |  18 +++
 gcc/doc/invoke.texi             |  11 ++
 gcc/doc/md.texi                 |   3 +
 14 files changed, 485 insertions(+), 62 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index ee2a4d8..7038906 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -266,6 +266,12 @@
 #endif
 
 
+/* Define if your assembler supports SPARC4 instructions. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_SPARC4
+#endif
+
+
 /* Define if your assembler supports fprnd. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_FPRND
diff --git a/gcc/config/sparc/constraints.md b/gcc/config/sparc/constraints.md
index 8963a31..525e3ac 100644
--- a/gcc/config/sparc/constraints.md
+++ b/gcc/config/sparc/constraints.md
@@ -18,7 +18,7 @@
 ;; <http://www.gnu.org/licenses/>.
 
 ;;; Unused letters:
-;;;    AB
+;;;     B
 ;;;    a        jkl    q  tuv xyz
 
 
@@ -58,6 +58,11 @@
 
 ;; Integer constant constraints
 
+(define_constraint "A"
+ "Signed 5-bit integer constant"
+ (and (match_code "const_int")
+      (match_test "SPARC_SIMM5_P (ival)")))
+
 (define_constraint "H"
  "Valid operand of double arithmetic operation"
  (and (match_code "const_double")
diff --git a/gcc/config/sparc/niagara4.md b/gcc/config/sparc/niagara4.md
index 272c8ff..26f2391 100644
--- a/gcc/config/sparc/niagara4.md
+++ b/gcc/config/sparc/niagara4.md
@@ -54,10 +54,10 @@
     (eq_attr "type" "store,fpstore"))
   "(n4_slot0 | n4_slot2) + n4_load_store")
 
-(define_insn_reservation "n4_cti" 2
+(define_insn_reservation "n4_cti" 1
   (and (eq_attr "cpu" "niagara4")
-    (eq_attr "type" "branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
-  "n4_slot1, nothing")
+    (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
+  "n4_slot1")
 
 (define_insn_reservation "n4_fp" 11
   (and (eq_attr "cpu" "niagara4")
diff --git a/gcc/config/sparc/predicates.md b/gcc/config/sparc/predicates.md
index 326524b..b64e109 100644
--- a/gcc/config/sparc/predicates.md
+++ b/gcc/config/sparc/predicates.md
@@ -391,6 +391,14 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "uns_small_int_operand")))
 
+;; Return true if OP is a register, or is a CONST_INT that can fit in a
+;; signed 5-bit immediate field.  This is an acceptable second operand for
+;; the cbcond instructions.
+(define_predicate "arith5_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+            (match_test "SPARC_SIMM5_P (INTVAL (op))"))))
+
 
 ;; Predicates for miscellaneous instructions.
 
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h
index 90dfd89..25ff347 100644
--- a/gcc/config/sparc/sol2.h
+++ b/gcc/config/sparc/sol2.h
@@ -54,19 +54,56 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Supposedly the same as vanilla sparc svr4, except for the stuff below: */
 
-/* This is here rather than in sparc.h because it's not known what
-   other assemblers will accept.  */
+/* If the assembler supports -xarch=sparc4, we switch to the explicit
+   word size selection mechanism available both in GNU as and Sun as,
+   for the Niagara4 and above configurations.  */
+#ifdef HAVE_AS_SPARC4
+
+#define AS_SPARC32_FLAG ""
+#define AS_SPARC64_FLAG ""
 
 #ifndef USE_GAS
-#define AS_SPARC64_FLAG	"-xarch=v9"
-#else
-#define AS_SPARC64_FLAG	"-TSO -64 -Av9"
+#undef ASM_ARCH32_SPEC
+#define ASM_ARCH32_SPEC "-m32"
+#undef ASM_ARCH64_SPEC
+#define ASM_ARCH64_SPEC "-m64"
 #endif
 
+/* Both Sun as and GNU as understand -K PIC.  */
+#undef ASM_SPEC
+#define ASM_SPEC ASM_SPEC_BASE " %(asm_arch)" ASM_PIC_SPEC
+
+#else /* HAVE_AS_SPARC4 */
+
+#define AS_SPARC32_FLAG "-xarch=v8plus"
+#define AS_SPARC64_FLAG "-xarch=v9"
+
+#undef AS_NIAGARA4_FLAG
+#define AS_NIAGARA4_FLAG AS_NIAGARA3_FLAG
+
+#undef ASM_ARCH32_SPEC
+#define ASM_ARCH32_SPEC ""
+
+#undef ASM_ARCH64_SPEC
+#define ASM_ARCH64_SPEC ""
+
+#undef ASM_ARCH_DEFAULT_SPEC
+#define ASM_ARCH_DEFAULT_SPEC ""
+
+#undef ASM_ARCH_SPEC
+#define ASM_ARCH_SPEC ""
+
+/* Both Sun as and GNU as understand -K PIC.  */
+#undef ASM_SPEC
+#define ASM_SPEC ASM_SPEC_BASE ASM_PIC_SPEC
+
+#endif /* HAVE_AS_SPARC4 */
+
+
 #undef ASM_CPU32_DEFAULT_SPEC
 #define ASM_CPU32_DEFAULT_SPEC	""
 #undef ASM_CPU64_DEFAULT_SPEC
-#define ASM_CPU64_DEFAULT_SPEC	AS_SPARC64_FLAG
+#define ASM_CPU64_DEFAULT_SPEC	"-xarch=v9"
 
 #if TARGET_CPU_DEFAULT == TARGET_CPU_v9
 #undef CPP_CPU64_DEFAULT_SPEC
@@ -83,7 +120,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_CPU32_DEFAULT_SPEC
 #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusa"
 #undef ASM_CPU64_DEFAULT_SPEC
-#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "a"
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9a"
 #undef ASM_CPU_DEFAULT_SPEC
 #define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
 #endif
@@ -94,7 +131,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_CPU32_DEFAULT_SPEC
 #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
 #undef ASM_CPU64_DEFAULT_SPEC
-#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b"
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
 #undef ASM_CPU_DEFAULT_SPEC
 #define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
 #endif
@@ -105,7 +142,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_CPU32_DEFAULT_SPEC
 #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
 #undef ASM_CPU64_DEFAULT_SPEC
-#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b"
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
 #undef ASM_CPU_DEFAULT_SPEC
 #define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
 #endif
@@ -116,7 +153,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_CPU32_DEFAULT_SPEC
 #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
 #undef ASM_CPU64_DEFAULT_SPEC
-#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b"
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9b"
 #undef ASM_CPU_DEFAULT_SPEC
 #define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
 #endif
@@ -127,7 +164,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_CPU32_DEFAULT_SPEC
 #define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus" AS_NIAGARA3_FLAG
 #undef ASM_CPU64_DEFAULT_SPEC
-#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG AS_NIAGARA3_FLAG
+#define ASM_CPU64_DEFAULT_SPEC "-xarch=v9" AS_NIAGARA3_FLAG
 #undef ASM_CPU_DEFAULT_SPEC
 #define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
 #endif
@@ -136,17 +173,13 @@ along with GCC; see the file COPYING3.  If not see
 #undef CPP_CPU64_DEFAULT_SPEC
 #define CPP_CPU64_DEFAULT_SPEC ""
 #undef ASM_CPU32_DEFAULT_SPEC
-#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plus" AS_NIAGARA3_FLAG
+#define ASM_CPU32_DEFAULT_SPEC AS_SPARC32_FLAG AS_NIAGARA4_FLAG
 #undef ASM_CPU64_DEFAULT_SPEC
-#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG AS_NIAGARA3_FLAG
+#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG AS_NIAGARA4_FLAG
 #undef ASM_CPU_DEFAULT_SPEC
 #define ASM_CPU_DEFAULT_SPEC ASM_CPU32_DEFAULT_SPEC
 #endif
 
-/* Both Sun as and GNU as understand -K PIC.  */
-#undef ASM_SPEC
-#define ASM_SPEC ASM_SPEC_BASE ASM_PIC_SPEC
-
 #undef CPP_CPU_SPEC
 #define CPP_CPU_SPEC "\
 %{mcpu=sparclet|mcpu=tsc701:-D__sparclet__} \
@@ -235,29 +268,17 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 
 #undef ASM_CPU_SPEC
 #define ASM_CPU_SPEC "\
-%{mcpu=v9:" DEF_ARCH32_SPEC("-xarch=v8plus") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "} \
-%{mcpu=ultrasparc:" DEF_ARCH32_SPEC("-xarch=v8plusa") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "a") "} \
-%{mcpu=ultrasparc3:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \
-%{mcpu=niagara:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \
-%{mcpu=niagara2:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \
-%{mcpu=niagara3:" DEF_ARCH32_SPEC("-xarch=v8plus" AS_NIAGARA3_FLAG) DEF_ARCH64_SPEC(AS_SPARC64_FLAG AS_NIAGARA3_FLAG) "} \
-%{mcpu=niagara4:" DEF_ARCH32_SPEC("-xarch=v8plus" AS_NIAGARA3_FLAG) DEF_ARCH64_SPEC(AS_SPARC64_FLAG AS_NIAGARA3_FLAG) "} \
-%{!mcpu=niagara4:%{!mcpu=niagara3:%{!mcpu=niagara2:%{!mcpu=niagara:%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "}}}}}}}} \
+%{mcpu=v9:" DEF_ARCH32_SPEC("-xarch=v8plus") DEF_ARCH64_SPEC("-xarch=v9") "} \
+%{mcpu=ultrasparc:" DEF_ARCH32_SPEC("-xarch=v8plusa") DEF_ARCH64_SPEC("-xarch=v9a") "} \
+%{mcpu=ultrasparc3:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC("-xarch=v9b") "} \
+%{mcpu=niagara:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC("-xarch=v9b") "} \
+%{mcpu=niagara2:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC("-xarch=v9b") "} \
+%{mcpu=niagara3:" DEF_ARCH32_SPEC("-xarch=v8plus" AS_NIAGARA3_FLAG) DEF_ARCH64_SPEC("-xarch=v9" AS_NIAGARA3_FLAG) "} \
+%{mcpu=niagara4:" DEF_ARCH32_SPEC(AS_SPARC32_FLAG AS_NIAGARA4_FLAG) DEF_ARCH64_SPEC(AS_SPARC64_FLAG AS_NIAGARA4_FLAG) "} \
+%{!mcpu=niagara4:%{!mcpu=niagara3:%{!mcpu=niagara2:%{!mcpu=niagara:%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC("-xarch=v9") "}}}}}}}} \
 %{!mcpu*:%(asm_cpu_default)} \
 "
 
-#undef ASM_ARCH32_SPEC
-#define ASM_ARCH32_SPEC ""
-
-#undef ASM_ARCH64_SPEC
-#define ASM_ARCH64_SPEC ""
-
-#undef ASM_ARCH_DEFAULT_SPEC
-#define ASM_ARCH_DEFAULT_SPEC ""
-
-#undef ASM_ARCH_SPEC
-#define ASM_ARCH_SPEC ""
-
 #ifdef USE_GLD
 /* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly
    follow the Solaris 2 ABI.  Prefer them if present.  */
diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h
index 97f6233..d5b2b1f 100644
--- a/gcc/config/sparc/sparc-protos.h
+++ b/gcc/config/sparc/sparc-protos.h
@@ -71,7 +71,7 @@ extern void sparc_emit_set_symbolic_const64 (rtx, rtx, rtx);
 extern int sparc_splitdi_legitimate (rtx, rtx);
 extern int sparc_split_regreg_legitimate (rtx, rtx);
 extern int sparc_absnegfloat_split_legitimate (rtx, rtx);
-extern const char *output_ubranch (rtx, int, rtx);
+extern const char *output_ubranch (rtx, rtx);
 extern const char *output_cbranch (rtx, rtx, int, int, int, rtx);
 extern const char *output_return (rtx);
 extern const char *output_sibcall (rtx, rtx);
@@ -79,10 +79,12 @@ extern const char *output_v8plus_shift (rtx, rtx *, const char *);
 extern const char *output_v8plus_mult (rtx, rtx *, const char *);
 extern const char *output_v9branch (rtx, rtx, int, int, int, int, rtx);
 extern const char *output_probe_stack_range (rtx, rtx);
+extern const char *output_cbcond (rtx, rtx, rtx);
 extern bool emit_scc_insn (rtx []);
 extern void emit_conditional_branch_insn (rtx []);
 extern int mems_ok_for_ldd_peep (rtx, rtx, rtx);
 extern int empty_delay_slot (rtx);
+extern int emit_cbcond_nop (rtx);
 extern int eligible_for_return_delay (rtx);
 extern int eligible_for_sibcall_delay (rtx);
 extern int tls_call_delay (rtx);
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 272632e..002e238 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -840,6 +840,8 @@ dump_target_flag_bits (const int flags)
     fprintf (stderr, "VIS2 ");
   if (flags & MASK_VIS3)
     fprintf (stderr, "VIS3 ");
+  if (flags & MASK_CBCOND)
+    fprintf (stderr, "CBCOND ");
   if (flags & MASK_DEPRECATED_V8_INSNS)
     fprintf (stderr, "DEPRECATED_V8_INSNS ");
   if (flags & MASK_SPARCLET)
@@ -946,7 +948,7 @@ sparc_option_override (void)
       MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
     /* UltraSPARC T4 */
     { "niagara4",	MASK_ISA,
-      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
+      MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
   };
   const struct cpu_table *cpu;
   unsigned int i;
@@ -1073,6 +1075,9 @@ sparc_option_override (void)
 #ifndef HAVE_AS_FMAF_HPC_VIS3
 		   & ~(MASK_FMAF | MASK_VIS3)
 #endif
+#ifndef HAVE_AS_SPARC4
+		   & ~MASK_CBCOND
+#endif
 		   );
 
   /* If -mfpu or -mno-fpu was explicitly used, don't override with
@@ -1088,7 +1093,12 @@ sparc_option_override (void)
   if (TARGET_VIS3)
     target_flags |= MASK_VIS2 | MASK_VIS;
 
-  /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is disabled.  */
+  /* -mcbcond implies -mvis3, -mvis2 and -mvis */
+  if (TARGET_CBCOND)
+    target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
+
+  /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is
+     disabled.  */
   if (! TARGET_FPU)
     target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
 
@@ -2660,6 +2670,24 @@ emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
 				    pc_rtx)));
 }
 
+/* Emit a conditional jump insn for the UA2011 architecture using
+   comparison code CODE and jump target LABEL.  This function exists
+   to take advantage of the UA2011 Compare and Branch insns.  */
+
+static void
+emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
+{
+  rtx if_then_else;
+
+  if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
+				       gen_rtx_fmt_ee(code, GET_MODE(op0),
+						      op0, op1),
+				       gen_rtx_LABEL_REF (VOIDmode, label),
+				       pc_rtx);
+
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else));
+}
+
 void
 emit_conditional_branch_insn (rtx operands[])
 {
@@ -2674,6 +2702,20 @@ emit_conditional_branch_insn (rtx operands[])
       operands[2] = XEXP (operands[0], 1);
     }
 
+  /* If we can tell early on that the comparison is against a constant
+     that won't fit in the 5-bit signed immediate field of a cbcond,
+     use one of the other v9 conditional branch sequences.  */
+  if (TARGET_CBCOND
+      && GET_CODE (operands[1]) == REG
+      && (GET_MODE (operands[1]) == SImode
+	  || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
+      && (GET_CODE (operands[2]) != CONST_INT
+	  || SPARC_SIMM5_P (INTVAL (operands[2]))))
+    {
+      emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
+      return;
+    }
+
   if (TARGET_ARCH64 && operands[2] == const0_rtx
       && GET_CODE (operands[1]) == REG
       && GET_MODE (operands[1]) == DImode)
@@ -3014,6 +3056,44 @@ empty_delay_slot (rtx insn)
   return 1;
 }
 
+/* Return nonzero if we should emit a nop after a cbcond instruction.
+   The cbcond instruction does not have a delay slot, however there is
+   a severe performance penalty if a control transfer appears right
+   after a cbcond.  Therefore we emit a nop when we detect this
+   situation.  */
+
+int
+emit_cbcond_nop (rtx insn)
+{
+  rtx next = next_active_insn (insn);
+
+  if (!next)
+    return 1;
+
+  if (GET_CODE (next) == INSN
+      && GET_CODE (PATTERN (next)) == SEQUENCE)
+    next = XVECEXP (PATTERN (next), 0, 0);
+  else if (GET_CODE (next) == CALL_INSN
+	   && GET_CODE (PATTERN (next)) == PARALLEL)
+    {
+      rtx delay = XVECEXP (PATTERN (next), 0, 1);
+
+      if (GET_CODE (delay) == RETURN)
+	{
+	  /* It's a sibling call.  Do not emit the nop if we're going
+	     to emit something other than the jump itself as the first
+	     instruction of the sibcall sequence.  */
+	  if (sparc_leaf_function_p || TARGET_FLAT)
+	    return 0;
+	}
+    }
+
+  if (NONJUMP_INSN_P (next))
+    return 0;
+
+  return 1;
+}
+
 /* Return nonzero if TRIAL can go into the call delay slot.  */
 
 int
@@ -7102,19 +7182,49 @@ sparc_preferred_simd_mode (enum machine_mode mode)
    DEST is the destination insn (i.e. the label), INSN is the source.  */
 
 const char *
-output_ubranch (rtx dest, int label, rtx insn)
+output_ubranch (rtx dest, rtx insn)
 {
   static char string[64];
   bool v9_form = false;
+  int delta;
   char *p;
 
-  if (TARGET_V9 && INSN_ADDRESSES_SET_P ())
+  /* Even if we are trying to use cbcond for this, evaluate
+     whether we can use V9 branches as our backup plan.  */
+
+  delta = 5000000;
+  if (INSN_ADDRESSES_SET_P ())
+    delta = (INSN_ADDRESSES (INSN_UID (dest))
+	     - INSN_ADDRESSES (INSN_UID (insn)));
+
+  /* Leave some instructions for "slop".  */
+  if (TARGET_V9 && delta >= -260000 && delta < 260000)
+    v9_form = true;
+
+  if (TARGET_CBCOND)
     {
-      int delta = (INSN_ADDRESSES (INSN_UID (dest))
-		   - INSN_ADDRESSES (INSN_UID (insn)));
-      /* Leave some instructions for "slop".  */
-      if (delta >= -260000 && delta < 260000)
-	v9_form = true;
+      bool emit_nop = emit_cbcond_nop (insn);
+      bool far = false;
+      const char *rval;
+
+      if (delta < -500 || delta > 500)
+	far = true;
+
+      if (far)
+	{
+	  if (v9_form)
+	    rval = "ba,a,pt\t%%xcc, %l0";
+	  else
+	    rval = "b,a\t%l0";
+	}
+      else
+	{
+	  if (emit_nop)
+	    rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
+	  else
+	    rval = "cwbe\t%%g0, %%g0, %l0";
+	}
+      return rval;
     }
 
   if (v9_form)
@@ -7125,7 +7235,7 @@ output_ubranch (rtx dest, int label, rtx insn)
   p = strchr (string, '\0');
   *p++ = '%';
   *p++ = 'l';
-  *p++ = '0' + label;
+  *p++ = '0';
   *p++ = '%';
   *p++ = '(';
   *p = '\0';
@@ -7604,6 +7714,125 @@ sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode)
   emit_label (donelab);
 }
 
+/* Return the string to output a compare and branch instruction to DEST.
+   DEST is the destination insn (i.e. the label), INSN is the source,
+   and OP is the conditional expression.  */
+
+const char *
+output_cbcond (rtx op, rtx dest, rtx insn)
+{
+  enum machine_mode mode = GET_MODE (XEXP (op, 0));
+  enum rtx_code code = GET_CODE (op);
+  const char *cond_str, *tmpl;
+  int far, emit_nop, len;
+  static char string[64];
+  char size_char;
+
+  /* Compare and Branch is limited to +-2KB.  If it is too far away,
+     change
+
+     cxbne X, Y, .LC30
+
+     to
+
+     cxbe X, Y, .+16
+     nop
+     ba,pt xcc, .LC30
+      nop  */
+
+  len = get_attr_length (insn);
+
+  far = len == 4;
+  emit_nop = len == 2;
+
+  if (far)
+    code = reverse_condition (code);
+
+  size_char = ((mode == SImode) ? 'w' : 'x');
+
+  switch (code)
+    {
+    case NE:
+      cond_str = "ne";
+      break;
+
+    case EQ:
+      cond_str = "e";
+      break;
+
+    case GE:
+      if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
+	cond_str = "pos";
+      else
+	cond_str = "ge";
+      break;
+
+    case GT:
+      cond_str = "g";
+      break;
+
+    case LE:
+      cond_str = "le";
+      break;
+
+    case LT:
+      if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
+	cond_str = "neg";
+      else
+	cond_str = "l";
+      break;
+
+    case GEU:
+      cond_str = "cc";
+      break;
+
+    case GTU:
+      cond_str = "gu";
+      break;
+
+    case LEU:
+      cond_str = "leu";
+      break;
+
+    case LTU:
+      cond_str = "cs";
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (far)
+    {
+      int veryfar = 1, delta;
+
+      if (INSN_ADDRESSES_SET_P ())
+	{
+	  delta = (INSN_ADDRESSES (INSN_UID (dest))
+		   - INSN_ADDRESSES (INSN_UID (insn)));
+	  /* Leave some instructions for "slop".  */
+	  if (delta >= -260000 && delta < 260000)
+	    veryfar = 0;
+	}
+
+      if (veryfar)
+	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
+      else
+	tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
+    }
+  else
+    {
+      if (emit_nop)
+	tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
+      else
+	tmpl = "c%cb%s\t%%1, %%2, %%3";
+    }
+
+  snprintf (string, sizeof(string), tmpl, size_char, cond_str);
+
+  return string;
+}
+
 /* Return the string to output a conditional branch to LABEL, testing
    register REG.  LABEL is the operand number of the label; REG is the
    operand number of the reg.  OP is the conditional expression.  The mode
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 52e0828..57c5eac 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -195,7 +195,7 @@ extern enum cmodel sparc_cmodel;
 #endif
 #if TARGET_CPU_DEFAULT == TARGET_CPU_niagara4
 #define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
-#define ASM_CPU64_DEFAULT_SPEC "-Av9" AS_NIAGARA3_FLAG
+#define ASM_CPU64_DEFAULT_SPEC AS_NIAGARA4_FLAG
 #endif
 
 #else
@@ -337,7 +337,7 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=niagara:%{!mv8plus:-Av9b}} \
 %{mcpu=niagara2:%{!mv8plus:-Av9b}} \
 %{mcpu=niagara3:%{!mv8plus:-Av9" AS_NIAGARA3_FLAG "}} \
-%{mcpu=niagara4:%{!mv8plus:-Av9" AS_NIAGARA3_FLAG "}} \
+%{mcpu=niagara4:%{!mv8plus:" AS_NIAGARA4_FLAG "}} \
 %{!mcpu*:%(asm_cpu_default)} \
 "
 
@@ -1006,7 +1006,8 @@ extern char leaf_reg_remap[];
 /* Local macro to handle the two v9 classes of FP regs.  */
 #define FP_REG_CLASS_P(CLASS) ((CLASS) == FP_REGS || (CLASS) == EXTRA_FP_REGS)
 
-/* Predicates for 10-bit, 11-bit and 13-bit signed constants.  */
+/* Predicates for 5-bit, 10-bit, 11-bit and 13-bit signed constants.  */
+#define SPARC_SIMM5_P(X)  ((unsigned HOST_WIDE_INT) (X) + 0x10 < 0x20)
 #define SPARC_SIMM10_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x200 < 0x400)
 #define SPARC_SIMM11_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x400 < 0x800)
 #define SPARC_SIMM13_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x1000 < 0x2000)
@@ -1746,6 +1747,12 @@ extern int sparc_indent_opcode;
 #define AS_NIAGARA3_FLAG "b"
 #endif
 
+#ifdef HAVE_AS_SPARC4
+#define AS_NIAGARA4_FLAG "-xarch=sparc4"
+#else
+#define AS_NIAGARA4_FLAG "-Av9" AS_NIAGARA3_FLAG
+#endif
+
 /* We use gcc _mcount for profiling.  */
 #define NO_PROFILE_COUNTERS 0
 
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index f604f46..15ecdb4 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -257,6 +257,7 @@
   "ialu,compare,shift,
    load,sload,store,
    uncond_branch,branch,call,sibcall,call_no_delay_slot,return,
+   cbcond,uncond_cbcond,
    imul,idiv,
    fpload,fpstore,
    fp,fpmove,
@@ -275,6 +276,12 @@
   (symbol_ref "(empty_delay_slot (insn)
 		? EMPTY_DELAY_SLOT_TRUE : EMPTY_DELAY_SLOT_FALSE)"))
 
+;; True if we are making use of compare-and-branch instructions.
+;; True if we should emit a nop after a cbcond instruction
+(define_attr "emit_cbcond_nop" "false,true"
+  (symbol_ref "(emit_cbcond_nop (insn)
+                ? EMIT_CBCOND_NOP_TRUE : EMIT_CBCOND_NOP_FALSE)"))
+
 (define_attr "branch_type" "none,icc,fcc,reg"
   (const_string "none"))
 
@@ -377,6 +384,30 @@
 	       (if_then_else (eq_attr "empty_delay_slot" "true")
 		 (const_int 4)
 		 (const_int 3))))
+         (eq_attr "type" "cbcond")
+	   (if_then_else (lt (pc) (match_dup 3))
+	     (if_then_else (lt (minus (match_dup 3) (pc)) (const_int 500))
+               (if_then_else (eq_attr "emit_cbcond_nop" "true")
+                 (const_int 2)
+                 (const_int 1))
+               (const_int 4))
+	     (if_then_else (lt (minus (pc) (match_dup 3)) (const_int 500))
+               (if_then_else (eq_attr "emit_cbcond_nop" "true")
+                 (const_int 2)
+                 (const_int 1))
+               (const_int 4)))
+         (eq_attr "type" "uncond_cbcond")
+	   (if_then_else (lt (pc) (match_dup 0))
+	     (if_then_else (lt (minus (match_dup 0) (pc)) (const_int 500))
+               (if_then_else (eq_attr "emit_cbcond_nop" "true")
+                 (const_int 2)
+                 (const_int 1))
+               (const_int 1))
+	     (if_then_else (lt (minus (pc) (match_dup 0)) (const_int 500))
+               (if_then_else (eq_attr "emit_cbcond_nop" "true")
+                 (const_int 2)
+                 (const_int 1))
+               (const_int 1)))
 	 ] (const_int 1)))
 
 ;; FP precision.
@@ -397,7 +428,7 @@
 		? TLS_CALL_DELAY_TRUE : TLS_CALL_DELAY_FALSE)"))
 
 (define_attr "in_call_delay" "false,true"
-  (cond [(eq_attr "type" "uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
+  (cond [(eq_attr "type" "uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
 		(const_string "false")
 	 (eq_attr "type" "load,fpload,store,fpstore")
 		(if_then_else (eq_attr "length" "1")
@@ -431,19 +462,19 @@
 ;; because it prevents us from moving back the final store of inner loops.
 
 (define_attr "in_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
 		     (eq_attr "length" "1"))
 		(const_string "true")
 		(const_string "false")))
 
 (define_attr "in_uncond_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
 		     (eq_attr "length" "1"))
 		(const_string "true")
 		(const_string "false")))
 
 (define_attr "in_annul_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,call,sibcall,call_no_delay_slot,multi")
+  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi")
 		     (eq_attr "length" "1"))
 		(const_string "true")
 		(const_string "false")))
@@ -1313,6 +1344,32 @@
 ;; SPARC V9-specific jump insns.  None of these are guaranteed to be
 ;; in the architecture.
 
+(define_insn "*cbcond_sp32"
+  [(set (pc)
+        (if_then_else (match_operator 0 "noov_compare_operator"
+                       [(match_operand:SI 1 "register_operand" "r")
+                        (match_operand:SI 2 "arith5_operand" "rA")])
+                      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  "TARGET_CBCOND"
+{
+  return output_cbcond (operands[0], operands[3], insn);
+}
+  [(set_attr "type" "cbcond")])
+
+(define_insn "*cbcond_sp64"
+  [(set (pc)
+        (if_then_else (match_operator 0 "noov_compare_operator"
+                       [(match_operand:DI 1 "register_operand" "r")
+                        (match_operand:DI 2 "arith5_operand" "rA")])
+                      (label_ref (match_operand 3 "" ""))
+                      (pc)))]
+  "TARGET_ARCH64 && TARGET_CBCOND"
+{
+  return output_cbcond (operands[0], operands[3], insn);
+}
+  [(set_attr "type" "cbcond")])
+
 ;; There are no 32 bit brreg insns.
 
 ;; XXX
@@ -6076,12 +6133,22 @@
 
 ;; Unconditional and other jump instructions.
 
-(define_insn "jump"
+(define_expand "jump"
   [(set (pc) (label_ref (match_operand 0 "" "")))]
-  ""
-  "* return output_ubranch (operands[0], 0, insn);"
+  "")
+
+(define_insn "*jump_ubranch"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "! TARGET_CBCOND"
+  "* return output_ubranch (operands[0], insn);"
   [(set_attr "type" "uncond_branch")])
 
+(define_insn "*jump_cbcond"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "TARGET_CBCOND"
+  "* return output_ubranch (operands[0], insn);"
+  [(set_attr "type" "uncond_cbcond")])
+
 (define_expand "tablejump"
   [(parallel [(set (pc) (match_operand 0 "register_operand" "r"))
 	      (use (label_ref (match_operand 1 "" "")))])]
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
index 58ba6b7..241cb07 100644
--- a/gcc/config/sparc/sparc.opt
+++ b/gcc/config/sparc/sparc.opt
@@ -73,6 +73,10 @@ mvis3
 Target Report Mask(VIS3)
 Use UltraSPARC Visual Instruction Set version 3.0 extensions
 
+mcbcond
+Target Report Mask(CBCOND)
+Use UltraSPARC Compare-and-Branch extensions
+
 mfmaf
 Target Report Mask(FMAF)
 Use UltraSPARC Fused Multiply-Add extensions
diff --git a/gcc/configure b/gcc/configure
index 673b908..78ec2b6 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -24110,6 +24110,48 @@ if test $gcc_cv_as_sparc_fmaf = yes; then
 $as_echo "#define HAVE_AS_FMAF_HPC_VIS3 1" >>confdefs.h
 
 fi
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for SPARC4 instructions" >&5
+$as_echo_n "checking assembler for SPARC4 instructions... " >&6; }
+if test "${gcc_cv_as_sparc_sparc4+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_sparc_sparc4=no
+  if test x$gcc_cv_as != x; then
+    $as_echo '.text
+       .register %g2, #scratch
+       .register %g3, #scratch
+       .align 4
+       cxbe %g2, %g3, 1f
+1:     cwbneg %g2, %g3, 1f
+1:     sha1
+       md5
+       aes_kexpand0 %f4, %f6, %f8
+       des_round %f38, %f40, %f42, %f44
+       camellia_f %f54, %f56, %f58, %f60
+       kasumi_fi_xor %f46, %f48, %f50, %f52' > conftest.s
+    if { ac_try='$gcc_cv_as $gcc_cv_as_flags -xarch=sparc4 -o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+    then
+	gcc_cv_as_sparc_sparc4=yes
+    else
+      echo "configure: failed program was" >&5
+      cat conftest.s >&5
+    fi
+    rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sparc_sparc4" >&5
+$as_echo "$gcc_cv_as_sparc_sparc4" >&6; }
+if test $gcc_cv_as_sparc_sparc4 = yes; then
+
+$as_echo "#define HAVE_AS_SPARC4 1" >>confdefs.h
+
+fi
     ;;
 
   i[34567]86-*-* | x86_64-*-*)
diff --git a/gcc/configure.ac b/gcc/configure.ac
index e0e12c0..0baf4bc 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -3520,6 +3520,24 @@ foo:
        fnaddd %f10, %f12, %f14],,
       [AC_DEFINE(HAVE_AS_FMAF_HPC_VIS3, 1,
                 [Define if your assembler supports FMAF, HPC, and VIS 3.0 instructions.])])
+
+    gcc_GAS_CHECK_FEATURE([SPARC4 instructions],
+      gcc_cv_as_sparc_sparc4,,
+      [-xarch=sparc4],
+      [.text
+       .register %g2, #scratch
+       .register %g3, #scratch
+       .align 4
+       cxbe %g2, %g3, 1f
+1:     cwbneg %g2, %g3, 1f
+1:     sha1
+       md5
+       aes_kexpand0 %f4, %f6, %f8
+       des_round %f38, %f40, %f42, %f44
+       camellia_f %f54, %f56, %f58, %f60
+       kasumi_fi_xor %f46, %f48, %f50, %f52],,
+      [AC_DEFINE(HAVE_AS_SPARC4, 1,
+                [Define if your assembler supports SPARC4 instructions.])])
     ;;
 
 changequote(,)dnl
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6f03a55..93920e9 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -929,6 +929,7 @@ See RS/6000 and PowerPC Options.
 -munaligned-doubles  -mno-unaligned-doubles @gol
 -mv8plus  -mno-v8plus  -mvis  -mno-vis @gol
 -mvis2  -mno-vis2  -mvis3  -mno-vis3 @gol
+-mcbcond -mno-cbcond @gol
 -mfmaf  -mno-fmaf  -mpopc  -mno-popc @gol
 -mfix-at697f}
 
@@ -19148,6 +19149,16 @@ default is @option{-mvis3} when targeting a cpu that supports such
 instructions, such as niagara-3 and later.  Setting @option{-mvis3}
 also sets @option{-mvis2} and @option{-mvis}.
 
+@item -mcbcond
+@itemx -mno-cbcond
+@opindex mcbcond
+@opindex mno-cbcond
+With @option{-mcbcond}, GCC generates code that takes advantage of
+compare-and-branch instructions, as defined in the Sparc Architecture 2011.
+The default is @option{-mcbcond} when targeting a cpu that supports such
+instructions, such as niagara-4 and later.  Setting @option{-mcbcond} also
+sets @option{-mvis3}, @option{-mvis2}, and @option{-mvis}.
+
 @item -mpopc
 @itemx -mno-popc
 @opindex mpopc
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index eb6ba91..396bf43 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3222,6 +3222,9 @@ when the Visual Instruction Set is available.
 @item C
 The constant all-ones, for floating-point.
 
+@item A
+Signed 5-bit constant
+
 @item D
 A vector constant
 
-- 
1.7.12.2.dirty


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]