[Committed] S/390: Support the branch on index instruction

Andreas Krebbel krebbel@linux.vnet.ibm.com
Thu Aug 20 12:23:00 GMT 2009


Hi,

the attached patch adds support for the branch on index instruction to
the S/390 back-end.  The branch on index instruction is able to add a
value of a register to another one, compare that with a third and
branch depending on the result.  So it is basically an ADD, a CMP and
a conditional jump in one instruction. So far that instruction didn't
buy us very much since due to instruction grouping that single complex
instruction isn't much faster than the 3 simple instructions. Another
disadvantage is that for an immediate addend and/or an immediate
comparison value these have to be loaded into registers first. These
loads can't be moved out of the loop body since there is no invariant
motion pass after the combine step which usually will introduce that
instruction.

But the major advantage is its static prediction - which becomes
important when building z10 optimized code.  The branch on index
instruction is statically predicted as taken whereas a conditional
jump is predicted as not taken.  This only matters when the dynamic
prediction refuses to do its job what might happen on z10 especially
when dealing with large loop bodies.

With the attached patch the branch on index instruction is about 70
times used within the cc1 executable. That number would be much higher
if either the instruction would support more comparison operators
(only LE and H are supported yet) or if I could make GCC to transform
loops so that the pattern fits more often.

Bootstrapped and regtested on s390 and s390x.
Committed to mainline.

Bye,

-Andreas-


2009-08-20  Andreas Krebbel  <krebbel1@de.ibm.com>

	* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",
	"*brx_64bit", "*brx_31bit"): New patterns.
	* config/s390/s390.c ('E'): New output modifier.


Index: gcc/gcc/config/s390/s390.md
===================================================================
--- gcc.orig/gcc/config/s390/s390.md
+++ gcc/gcc/config/s390/s390.md
@@ -7561,6 +7561,180 @@
 ;;  This is all complicated by the fact that since this is a jump insn
 ;;  we must handle our own output reloads.
 
+;; branch on index
+
+; This splitter will be matched by combine and has to add the 2 moves
+; necessary to load the compare and the increment values into a
+; register pair as needed by brxle.
+
+(define_insn_and_split "*brx_stage1_<GPR:mode>"
+  [(set (pc)
+        (if_then_else
+	 (match_operator 6 "s390_brx_operator"
+	    [(plus:GPR (match_operand:GPR 1 "register_operand" "")
+		       (match_operand:GPR 2 "general_operand"  ""))
+	     (match_operand:GPR 3 "register_operand" "")])
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (set (match_operand:GPR 4 "nonimmediate_operand" "")
+        (plus:GPR (match_dup 1) (match_dup 2)))
+   (clobber (match_scratch:GPR 5 ""))]
+  "TARGET_CPU_ZARCH"
+  "#"
+  "!reload_completed && !reload_in_progress"
+  [(set (match_dup 7) (match_dup 2)) ; the increment
+   (set (match_dup 8) (match_dup 3)) ; the comparison value
+   (parallel [(set (pc)
+		   (if_then_else
+		    (match_op_dup 6
+		       [(plus:GPR (match_dup 1) (match_dup 7))
+			(match_dup 8)])
+		    (label_ref (match_dup 0))
+		    (pc)))
+	      (set (match_dup 4)
+		   (plus:GPR (match_dup 1) (match_dup 7)))
+	      (clobber (match_dup 5))
+	      (clobber (reg:CC CC_REGNUM))])]
+  {
+    rtx dreg = gen_reg_rtx (word_mode == DImode ? TImode : DImode);
+    operands[7] = gen_lowpart (<GPR:MODE>mode,
+			       gen_highpart (word_mode, dreg));
+    operands[8] = gen_lowpart (<GPR:MODE>mode,
+			       gen_lowpart (word_mode, dreg));
+  })
+
+; brxlg, brxhg
+
+(define_insn_and_split "*brxg_64bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	     [(plus:DI (match_operand:DI 1 "register_operand" "d,d,d")
+		       (subreg:DI (match_operand:TI 2 "register_operand" "d,d,d") 0))
+              (subreg:DI (match_dup 2) 8)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:DI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:DI (match_dup 1)
+		 (subreg:DI (match_dup 2) 0)))
+   (clobber (match_scratch:DI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_64BIT"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%E5g\t%1,%2,%l0";
+  else
+    return "agr\t%1,%2\;cgr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:DI (match_dup 4) (subreg:DI (match_dup 2) 0)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:DI (match_dup 2) 8)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RIE")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 16)))])
+
+; brxle, brxh
+
+(define_insn_and_split "*brx_64bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	     [(plus:SI (match_operand:SI 1 "register_operand" "d,d,d")
+		       (subreg:SI (match_operand:TI 2 "register_operand" "d,d,d") 4))
+              (subreg:SI (match_dup 2) 12)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1)
+		 (subreg:SI (match_dup 2) 4)))
+   (clobber (match_scratch:SI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_64BIT"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%C5\t%1,%2,%l0";
+  else
+    return "ar\t%1,%2\;cr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:SI (match_dup 4) (subreg:SI (match_dup 2) 4)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:SI (match_dup 2) 12)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RSI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 14)))])
+
+; brxle, brxh
+
+(define_insn_and_split "*brx_31bit"
+  [(set (pc)
+        (if_then_else
+          (match_operator 5 "s390_brx_operator"
+	    [(plus:SI (match_operand:SI 1 "register_operand" "d,d,d")
+		      (subreg:SI (match_operand:DI 2 "register_operand" "d,d,d") 0))
+	     (subreg:SI (match_dup 2) 4)])
+          (label_ref (match_operand 0 "" ""))
+          (pc)))
+   (set (match_operand:SI 3 "nonimmediate_operand" "=1,?X,?X")
+        (plus:SI (match_dup 1)
+		 (subreg:SI (match_dup 2) 0)))
+   (clobber (match_scratch:SI 4 "=X,&1,&?d"))
+   (clobber (reg:CC CC_REGNUM))]
+  "!TARGET_64BIT && TARGET_CPU_ZARCH"
+{
+  if (which_alternative != 0)
+    return "#";
+  else if (get_attr_length (insn) == 6)
+    return "brx%C5\t%1,%2,%l0";
+  else
+    return "ar\t%1,%2\;cr\t%1,%M2\;jg%C5\t%l0";
+}
+  "&& reload_completed
+   && (!REG_P (operands[3])
+       || !rtx_equal_p (operands[1], operands[3]))"
+  [(set (match_dup 4) (match_dup 1))
+   (parallel [(set (match_dup 4) (plus:SI (match_dup 4) (subreg:SI (match_dup 2) 0)))
+	      (clobber (reg:CC CC_REGNUM))])
+   (set (reg:CCS CC_REGNUM) (compare:CCS (match_dup 4) (subreg:SI (match_dup 2) 4)))
+   (set (match_dup 3) (match_dup 4))
+   (set (pc) (if_then_else (match_op_dup 5 [(reg:CCS CC_REGNUM) (const_int 0)])
+			   (label_ref (match_dup 0))
+			   (pc)))]
+  ""
+  [(set_attr "op_type"  "RSI")
+   (set_attr "type"  "branch")
+   (set (attr "length")
+        (if_then_else (lt (abs (minus (pc) (match_dup 0))) (const_int 60000))
+                      (const_int 6) (const_int 14)))])
+
+
+;; branch on count
+
 (define_expand "doloop_end"
   [(use (match_operand 0 "" ""))        ; loop pseudo
    (use (match_operand 1 "" ""))        ; iterations; zero if unknown
Index: gcc/gcc/config/s390/predicates.md
===================================================================
--- gcc.orig/gcc/config/s390/predicates.md
+++ gcc/gcc/config/s390/predicates.md
@@ -197,6 +197,9 @@
 (define_predicate "s390_scond_operator"
   (match_code "ltu, gtu, leu, geu"))
 
+(define_predicate "s390_brx_operator"
+  (match_code "le, gt"))
+
 ;; Return nonzero if OP is a valid comparison operator
 ;; for an ALC condition.
 
Index: gcc/gcc/config/s390/s390.c
===================================================================
--- gcc.orig/gcc/config/s390/s390.c
+++ gcc/gcc/config/s390/s390.c
@@ -4946,6 +4946,7 @@ print_operand_address (FILE *file, rtx a
 
     'C': print opcode suffix for branch condition.
     'D': print opcode suffix for inverse branch condition.
+    'E': print opcode suffix for branch on index instruction.
     'J': print tls_load/tls_gdcall/tls_ldcall suffix
     'G': print the size of the operand in bytes.
     'O': print only the displacement of a memory reference.
@@ -4978,6 +4979,15 @@ print_operand (FILE *file, rtx x, int co
       fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
       return;
 
+    case 'E':
+      if (GET_CODE (x) == LE)
+	fprintf (file, "l");
+      else if (GET_CODE (x) == GT)
+	fprintf (file, "h");
+      else
+	gcc_unreachable ();
+      return;
+
     case 'J':
       if (GET_CODE (x) == SYMBOL_REF)
 	{



More information about the Gcc-patches mailing list