[committed] Revamp casesi and jump table handling on PA

John David Anglin dave@hiauly1.hia.nrc.ca
Wed Oct 29 22:18:00 GMT 2003


It can to my attention that there was a problem with jump tables and
the "-mbig-switch" option on the PA.  The T' fixup was being used to
load the address of the branch target in each jump table element.
This was supposed to work for large, possibly computer generated,
case vectors.  However, the insn used with the T' fixup only supports
a 14 bit displacement.  Thus, it couldn't handle the large offsets
from the pic register that could occur with large branch tables.

I tried the initial obvious fix to revamp the patterns to use a
two insn sequence to load the target address using the LT' and RT'
fixups.  This worked but then it occcured to me that it might be better
to use relative offsets in the table for big tables when generating
pic code, and just the absolute address for non-pic code.  This
patch implements that approach.

The first benefit is big switch tables are now one quarter the
previous size.  The second benefit is the insn sequences are shorter
in length for the non-pic case, equal (PA 2.0) or one insn longer for
the pic case (PA 1.x and 64-bit code).  Thirdly, we have one less
branch and the same number of memory accesses.  Finally, with the
jump table in the text section, there are no relocations needed for
the pic case, so this should help assembler and linker performance.

The patch in various combinations and permutations has been tested
on hppa64-hp-hpux11.11, hppa2.0w-hp-hpux11.11 and hppa-unknown-linux-gnu.
Committed to the trunk.

Dave
-- 
J. David Anglin                                  dave.anglin@nrc-cnrc.gc.ca
National Research Council of Canada              (613) 990-0752 (FAX: 952-6602)

2003-10-29  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>

	* pa-linux.h (ASM_OUTPUT_ADDR_VEC_ELT): Use label in big switch ELTs.
	(ASM_OUTPUT_ADDR_DIFF_ELT): Use label difference in big switch ELTs.
	* pa.c (pa_adjust_insn_length): Check for btable branches using
	attribute TYPE_BTABLE_BRANCH.
	(pa_reorg): Simplify.
	* pa.h (CASE_VECTOR_MODE): Change big switch mode to SImode.
	(ASM_OUTPUT_ADDR_VEC_ELT): As above.
	(ASM_OUTPUT_ADDR_DIFF_ELT): As above.
	* pa.md (btable_branch): New instruction type.
	(in_branch_delay, in_nullified_branch_delay, in_call_delay): Disallow
	btable branches.
	(define_delay): Add btable branches to insn types that may have an
	insn in the delay position.
	(Z2, Z3): Add btable branch to list.
	Simplify unamed pattern set copy pic_label_operand to register.  Add
	PA 2.0 variant.
	(short_jump): New jump for use in branch tables.
	(casesi, casesi0): Revise for new branch table formats.
	(casesi32, casesi32p, casesi64p): New casesi patterns.
	(indirect_jump): Move.

Index: config/pa/pa-linux.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/pa/pa-linux.h,v
retrieving revision 1.35
diff -u -3 -p -r1.35 pa-linux.h
--- config/pa/pa-linux.h	23 Aug 2003 01:32:54 -0000	1.35
+++ config/pa/pa-linux.h	29 Oct 2003 19:11:04 -0000
@@ -121,14 +121,14 @@ Boston, MA 02111-1307, USA.  */
 #undef ASM_OUTPUT_ADDR_VEC_ELT
 #define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
   if (TARGET_BIG_SWITCH)					\
-    fprintf (FILE, "\tstw %%r1,-16(%%r30)\n\tldil LR'.L%d,%%r1\n\tbe RR'.L%d(%%sr4,%%r1)\n\tldw -16(%%r30),%%r1\n", VALUE, VALUE);		\
+    fprintf (FILE, "\t.word .L%d\n", VALUE);			\
   else								\
     fprintf (FILE, "\tb .L%d\n\tnop\n", VALUE)
 
 #undef ASM_OUTPUT_ADDR_DIFF_ELT
 #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
   if (TARGET_BIG_SWITCH)					\
-    fprintf (FILE, "\tstw %%r1,-16(%%r30)\n\tldw T'.L%d(%%r19),%%r1\n\tbv %%r0(%%r1)\n\tldw -16(%%r30),%%r1\n", VALUE);				\
+    fprintf (FILE, "\t.word .L%d-.L%d\n", VALUE, REL);		\
   else								\
     fprintf (FILE, "\tb .L%d\n\tnop\n", VALUE)
 
Index: config/pa/pa.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/pa/pa.c,v
retrieving revision 1.228
diff -u -3 -p -r1.228 pa.c
--- config/pa/pa.c	8 Oct 2003 17:55:41 -0000	1.228
+++ config/pa/pa.c	29 Oct 2003 19:11:07 -0000
@@ -4319,8 +4319,8 @@ pa_adjust_insn_length (rtx insn, int len
   /* Jumps inside switch tables which have unfilled delay slots need
      adjustment.  */
   if (GET_CODE (insn) == JUMP_INSN
-	   && simplejump_p (insn)
-	   && GET_MODE (insn) == SImode)
+      && GET_CODE (pat) == PARALLEL
+      && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
     return 4;
   /* Millicode insn with an unfilled delay slot.  */
   else if (GET_CODE (insn) == INSN
@@ -7848,7 +7848,8 @@ following_call (rtx insn)
    The jump instructions within the table are special; we must be able
    to identify them during assembly output (if the jumps don't get filled
    we need to emit a nop rather than nullifying the delay slot)).  We
-   identify jumps in switch tables by marking the SET with DImode.
+   identify jumps in switch tables by using insns with the attribute
+   type TYPE_BTABLE_BRANCH.
 
    We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
    insns.  This serves two purposes, first it prevents jump.c from
@@ -7874,7 +7875,7 @@ pa_reorg (void)
       /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns.  */
       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
 	{
-	  rtx pattern, tmp, location;
+	  rtx pattern, tmp, location, label;
 	  unsigned int length, i;
 
 	  /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode.  */
@@ -7900,57 +7901,17 @@ pa_reorg (void)
 	      location = NEXT_INSN (location);
 
 	      if (GET_CODE (pattern) == ADDR_VEC)
-		{
-		  /* Emit the jump itself.  */
-		  tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0));
-		  tmp = emit_jump_insn_after (tmp, location);
-		  JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0);
-		  /* It is easy to rely on the branch table markers
-		     during assembly output to trigger the correct code
-		     for a switch table jump with an unfilled delay slot,
-
-		     However, that requires state and assumes that we look
-		     at insns in order.
-
-		     We can't make such assumptions when computing the length
-		     of instructions.  Ugh.  We could walk the insn chain to
-		     determine if this instruction is in a branch table, but
-		     that can get rather expensive, particularly during the
-		     branch shortening phase of the compiler.
-
-		     So instead we mark this jump as being special.  This is
-		     far from ideal and knows that no code after this will
-		     muck around with the mode of the JUMP_INSN itself.  */
-		  PUT_MODE (tmp, SImode);
-		  LABEL_NUSES (JUMP_LABEL (tmp))++;
-		  location = NEXT_INSN (location);
-		}
+		label = XEXP (XVECEXP (pattern, 0, i), 0);
 	      else
-		{
-		  /* Emit the jump itself.  */
-		  tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0));
-		  tmp = emit_jump_insn_after (tmp, location);
-		  JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0);
-		  /* It is easy to rely on the branch table markers
-		     during assembly output to trigger the correct code
-		     for a switch table jump with an unfilled delay slot,
-
-		     However, that requires state and assumes that we look
-		     at insns in order.
-
-		     We can't make such assumptions when computing the length
-		     of instructions.  Ugh.  We could walk the insn chain to
-		     determine if this instruction is in a branch table, but
-		     that can get rather expensive, particularly during the
-		     branch shortening phase of the compiler.
-
-		     So instead we mark this jump as being special.  This is
-		     far from ideal and knows that no code after this will
-		     muck around with the mode of the JUMP_INSN itself.  */
-		  PUT_MODE (tmp, SImode);
-		  LABEL_NUSES (JUMP_LABEL (tmp))++;
-		  location = NEXT_INSN (location);
-		}
+		label = XEXP (XVECEXP (pattern, 1, i), 0);
+
+	      tmp = gen_short_jump (label);
+
+	      /* Emit the jump itself.  */
+	      tmp = emit_jump_insn_after (tmp, location);
+	      JUMP_LABEL (tmp) = label;
+	      LABEL_NUSES (label)++;
+	      location = NEXT_INSN (location);
 
 	      /* Emit a BARRIER after the jump.  */
 	      emit_barrier_after (location);
@@ -7968,7 +7929,12 @@ pa_reorg (void)
     }
   else
     {
-      /* Sill need an end_brtab insn.  */
+      /* Still need brtab marker insns.  FIXME: the presence of these
+	 markers disables output of the branch table to readonly memory,
+	 and any alignment directives that might be needed.  Possibly,
+	 the begin_brtab insn should be output before the label for the
+	 table.  This doesn matter at the moment since the tables are
+	 always output in the text section.  */
       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
 	{
 	  /* Find an ADDR_VEC insn.  */
Index: config/pa/pa.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/pa/pa.h,v
retrieving revision 1.203
diff -u -3 -p -r1.203 pa.h
--- config/pa/pa.h	23 Aug 2003 01:32:58 -0000	1.203
+++ config/pa/pa.h	29 Oct 2003 19:11:09 -0000
@@ -1550,11 +1550,14 @@ do { 									\
 
 #define FUNCTION_NAME_P(NAME)  (*(NAME) == '@')
 
-/* Specify the machine mode that this machine uses
-   for the index in the tablejump instruction.  */
-#define CASE_VECTOR_MODE (TARGET_BIG_SWITCH ? TImode : DImode)
+/* Specify the machine mode that this machine uses for the index in the
+   tablejump instruction.  For small tables, an element consists of a
+   ia-relative branch and its delay slot.  When -mbig-switch is specified,
+   we use a 32-bit absolute address for non-pic code, and a 32-bit offset
+   for both 32 and 64-bit pic code.  */
+#define CASE_VECTOR_MODE (TARGET_BIG_SWITCH ? SImode : DImode)
 
-/* Jump tables must be 32 bit aligned, no matter the size of the element.  */
+/* Jump tables must be 32-bit aligned, no matter the size of the element.  */
 #define ADDR_VEC_ALIGN(ADDR_VEC) 2
 
 /* Define this as 1 if `char' should by default be signed; else as 0.  */
@@ -1718,35 +1727,41 @@ do { 									\
 #define ASM_OUTPUT_ASCII(FILE, P, SIZE)  \
   output_ascii ((FILE), (P), (SIZE))
 
-/* This is how to output an element of a case-vector that is absolute.
-   Note that this method makes filling these branch delay slots
-   impossible.  */
-
-#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
-  if (TARGET_BIG_SWITCH)					\
-    fprintf (FILE, "\tstw %%r1,-16(%%r30)\n\tldil LR'L$%04d,%%r1\n\tbe RR'L$%04d(%%sr4,%%r1)\n\tldw -16(%%r30),%%r1\n", VALUE, VALUE);		\
-  else								\
-    fprintf (FILE, "\tb L$%04d\n\tnop\n", VALUE)
+/* Jump tables are always placed in the text section.  Technically, it
+   is possible to put them in the readonly data section when -mbig-switch
+   is specified.  This has the benefit of getting the table out of .text
+   and reducing branch lengths as a result.  The downside is that an
+   additional insn (addil) is needed to access the table when generating
+   PIC code.  The address difference table also has to use 32-bit
+   pc-relative relocations.  Currently, GAS does not support these
+   relocations, although it is easily modified to do this operation.
+   The table entries need to look like "$L1+(.+8-$L0)-$PIC_pcrel$0"
+   when using ELF GAS.  A simple difference can be used when using
+   SOM GAS or the HP assembler.  The final downside is GDB complains
+   about the nesting of the label for the table when debugging.  */
 
-/* Jump tables are executable code and live in the TEXT section on the PA.  */
 #define JUMP_TABLES_IN_TEXT_SECTION 1
 
-/* This is how to output an element of a case-vector that is relative.
-   This must be defined correctly as it is used when generating PIC code.
+/* This is how to output an element of a case-vector that is absolute.  */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
+  if (TARGET_BIG_SWITCH)						\
+    fprintf (FILE, "\t.word L$%04d\n", VALUE);				\
+  else									\
+    fprintf (FILE, "\tb L$%04d\n\tnop\n", VALUE)
 
-   I believe it safe to use the same definition as ASM_OUTPUT_ADDR_VEC_ELT
-   on the PA since ASM_OUTPUT_ADDR_VEC_ELT uses pc-relative jump instructions
-   rather than a table of absolute addresses.  */
+/* This is how to output an element of a case-vector that is relative. 
+   Since we always place jump tables in the text section, the difference
+   is absolute and requires no relocation.  */
 
 #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)  \
-  if (TARGET_BIG_SWITCH)					\
-    fprintf (FILE, "\tstw %%r1,-16(%%r30)\n\tldw T'L$%04d(%%r19),%%r1\n\tbv %%r0(%%r1)\n\tldw -16(%%r30),%%r1\n", VALUE);				\
-  else								\
+  if (TARGET_BIG_SWITCH)						\
+    fprintf (FILE, "\t.word L$%04d-L$%04d\n", VALUE, REL);		\
+  else									\
     fprintf (FILE, "\tb L$%04d\n\tnop\n", VALUE)
 
-/* This is how to output an assembler line
-   that says to advance the location counter
-   to a multiple of 2**LOG bytes.  */
+/* This is how to output an assembler line that says to advance the
+   location counter to a multiple of 2**LOG bytes.  */
 
 #define ASM_OUTPUT_ALIGN(FILE,LOG)	\
     fprintf (FILE, "\t.align %d\n", (1<<(LOG)))
Index: config/pa/pa.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/pa/pa.md,v
retrieving revision 1.132
diff -u -3 -p -r1.132 pa.md
--- config/pa/pa.md	17 Oct 2003 03:32:21 -0000	1.132
+++ config/pa/pa.md	29 Oct 2003 19:11:13 -0000
@@ -32,7 +32,7 @@
 ;; type "binary" insns have two input operands (1,2) and one output (0)
 
 (define_attr "type"
-  "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,parallel_branch"
+  "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,btable_branch,branch,cbranch,fbranch,call,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,parallel_branch"
   (const_string "binary"))
 
 (define_attr "pa_combine_type"
@@ -74,7 +74,7 @@
 
 ;; For conditional branches.
 (define_attr "in_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
+  (if_then_else (and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
 		     (eq_attr "length" "4"))
 		(const_string "true")
 		(const_string "false")))
@@ -82,7 +82,7 @@
 ;; Disallow instructions which use the FPU since they will tie up the FPU
 ;; even if the instruction is nullified.
 (define_attr "in_nullified_branch_delay" "false,true"
-  (if_then_else (and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch")
+  (if_then_else (and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,parallel_branch")
 		     (eq_attr "length" "4"))
 		(const_string "true")
 		(const_string "false")))
@@ -90,7 +90,7 @@
 ;; For calls and millicode calls.  Allow unconditional branches in the
 ;; delay slot.
 (define_attr "in_call_delay" "false,true"
-  (cond [(and (eq_attr "type" "!uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
+  (cond [(and (eq_attr "type" "!uncond_branch,btable_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
 	      (eq_attr "length" "4"))
 	   (const_string "true")
 	 (eq_attr "type" "uncond_branch")
@@ -110,7 +110,7 @@
   [(eq_attr "in_call_delay" "true") (nil) (nil)])
 
 ;; Return and other similar instructions.
-(define_delay (eq_attr "type" "branch,parallel_branch")
+(define_delay (eq_attr "type" "btable_branch,branch,parallel_branch")
   [(eq_attr "in_branch_delay" "true") (nil) (nil)])
 
 ;; Floating point conditional branch delay slot description and
@@ -505,7 +505,7 @@
 ;; to assume have zero latency.
 (define_insn_reservation "Z2" 0
   (and
-    (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl")
+    (eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,btable_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl")
     (eq_attr "cpu" "8000"))
   "inm_8000,rnm_8000")
 
@@ -513,7 +513,7 @@
 ;; retirement unit.
 (define_insn_reservation "Z3" 0
   (and
-    (eq_attr "type" "uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
+    (eq_attr "type" "uncond_branch,btable_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
     (eq_attr "cpu" "8000"))
   "inm0_8000+inm1_8000,rnm0_8000+rnm1_8000")
 
@@ -2453,9 +2473,9 @@
 ;; Note since this pattern can be created at reload time (via movsi), all
 ;; the same rules for movsi apply here.  (no new pseudos, no temporaries).
 (define_insn ""
-  [(set (match_operand 0 "pmode_register_operand" "=a")
+  [(set (match_operand 0 "pmode_register_operand" "=r")
 	(match_operand 1 "pic_label_operand" ""))]
-  ""
+  "TARGET_PA_20"
   "*
 {
   rtx xoperands[3];
@@ -2463,14 +2483,11 @@
 
   xoperands[0] = operands[0];
   xoperands[1] = operands[1];
-  if (TARGET_SOM || ! TARGET_GAS)
-    xoperands[2] = gen_label_rtx ();
+  xoperands[2] = gen_label_rtx ();
 
-  output_asm_insn (\"{bl|b,l} .+8,%0\", xoperands);
-  output_asm_insn (\"{depi|depwi} 0,31,2,%0\", xoperands);
-  if (TARGET_SOM || ! TARGET_GAS)
-    (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
-			       CODE_LABEL_NUMBER (xoperands[2]));
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				     CODE_LABEL_NUMBER (xoperands[2]));
+  output_asm_insn (\"mfia %0\", xoperands);
 
   /* If we're trying to load the address of a label that happens to be
      close, then we can use a shorter sequence.  */
@@ -2478,27 +2495,46 @@
       && INSN_ADDRESSES_SET_P ()
       && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0)))
 	        - INSN_ADDRESSES (INSN_UID (insn))) < 8100)
+    output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
+  else
     {
-      /* Prefixing with R% here is wrong, it extracts just 11 bits and is
-	 always non-negative.  */
-      if (TARGET_SOM || ! TARGET_GAS)
-	output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
-      else
-	output_asm_insn (\"ldo %1-$PIC_pcrel$0+8(%0),%0\", xoperands);
+      output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
+      output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
     }
+  return \"\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])		; 8 or 12
+
+(define_insn ""
+  [(set (match_operand 0 "pmode_register_operand" "=a")
+	(match_operand 1 "pic_label_operand" ""))]
+  "!TARGET_PA_20"
+  "*
+{
+  rtx xoperands[3];
+  extern FILE *asm_out_file;
+
+  xoperands[0] = operands[0];
+  xoperands[1] = operands[1];
+  xoperands[2] = gen_label_rtx ();
+
+  output_asm_insn (\"bl .+8,%0\", xoperands);
+  output_asm_insn (\"depi 0,31,2,%0\", xoperands);
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+				     CODE_LABEL_NUMBER (xoperands[2]));
+
+  /* If we're trying to load the address of a label that happens to be
+     close, then we can use a shorter sequence.  */
+  if (GET_CODE (operands[1]) == LABEL_REF
+      && INSN_ADDRESSES_SET_P ()
+      && abs (INSN_ADDRESSES (INSN_UID (XEXP (operands[1], 0)))
+	        - INSN_ADDRESSES (INSN_UID (insn))) < 8100)
+    output_asm_insn (\"ldo %1-%2(%0),%0\", xoperands);
   else
     {
-      if (TARGET_SOM || ! TARGET_GAS)
-	{
-	  output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
-	  output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
-	}
-      else
-	{
-	  output_asm_insn (\"addil L%%%1-$PIC_pcrel$0+8,%0\", xoperands);
-	  output_asm_insn (\"ldo R%%%1-$PIC_pcrel$0+12(%0),%0\",
-	  		   xoperands);
-	}
+      output_asm_insn (\"addil L%%%1-%2,%0\", xoperands);
+      output_asm_insn (\"ldo R%%%1-%2(%0),%0\", xoperands);
     }
   return \"\";
 }"
@@ -5735,9 +5771,6 @@
   ""
   "*
 {
-  if (GET_MODE (insn) == SImode)
-    return \"b %l0%#\";
-
   /* An unconditional branch which can reach its target.  */
   if (get_attr_length (insn) != 24
       && get_attr_length (insn) != 16)
@@ -5761,6 +5794,24 @@
 			 (const_int 24))]
 	  (const_int 4)))])
 
+;;; Hope this is only within a function...
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand 0 "register_operand" "r"))]
+  "GET_MODE (operands[0]) == word_mode"
+  "bv%* %%r0(%0)"
+  [(set_attr "type" "branch")
+   (set_attr "length" "4")])
+
+;;; This jump is used in branch tables where the insn length is fixed.
+;;; The length of this insn is adjusted if the delay slot is not filled.
+(define_insn "short_jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))
+   (const_int 0)]
+  ""
+  "b%* %l0%#"
+  [(set_attr "type" "btable_branch")
+   (set_attr "length" "4")])
+
 ;; Subroutines of "casesi".
 ;; operand 0 is index
 ;; operand 1 is the minimum bound
@@ -5782,14 +5833,13 @@
 
   if (operands[1] != const0_rtx)
     {
-      rtx reg = gen_reg_rtx (SImode);
+      rtx index = gen_reg_rtx (SImode);
 
       operands[1] = GEN_INT (-INTVAL (operands[1]));
       if (!INT_14_BITS (operands[1]))
 	operands[1] = force_reg (SImode, operands[1]);
-      emit_insn (gen_addsi3 (reg, operands[0], operands[1]));
-
-      operands[0] = reg;
+      emit_insn (gen_addsi3 (index, operands[0], operands[1]));
+      operands[0] = index;
     }
 
   /* In 64bit mode we must make sure to wipe the upper bits of the register
@@ -5797,38 +5847,118 @@
      high part of the register.  */
   if (TARGET_64BIT)
     {
-      rtx reg = gen_reg_rtx (DImode);
-      emit_insn (gen_extendsidi2 (reg, operands[0]));
-      operands[0] = gen_rtx_SUBREG (SImode, reg, 4);
+      rtx index = gen_reg_rtx (DImode);
+
+      emit_insn (gen_extendsidi2 (index, operands[0]));
+      operands[0] = gen_rtx_SUBREG (SImode, index, 4);
     }
 
   if (!INT_5_BITS (operands[2]))
     operands[2] = force_reg (SImode, operands[2]);
 
+  /* This branch prevents us finding an insn for the delay slot of the
+     following vectored branch.  It might be possible to use the delay
+     slot if an index value of -1 was used to transfer to the out-of-range
+     label.  In order to do this, we would have to output the -1 vector
+     element after the delay insn.  The casesi output code would have to
+     check if the casesi insn is in a delay branch sequence and output
+     the delay insn if one is found.  If this was done, then it might
+     then be worthwhile to split the casesi patterns to improve scheduling.
+     However, it's not clear that all this extra complexity is worth
+     the effort.  */
   emit_insn (gen_cmpsi (operands[0], operands[2]));
   emit_jump_insn (gen_bgtu (operands[4]));
+
   if (TARGET_BIG_SWITCH)
     {
-      rtx temp = gen_reg_rtx (SImode);
-      emit_move_insn (temp, gen_rtx_PLUS (SImode, operands[0], operands[0]));
-      operands[0] = temp;
+      if (TARGET_64BIT)
+	{
+          rtx tmp1 = gen_reg_rtx (DImode);
+          rtx tmp2 = gen_reg_rtx (DImode);
+
+          emit_jump_insn (gen_casesi64p (operands[0], operands[3],
+                                         tmp1, tmp2));
+	}
+      else
+	{
+	  rtx tmp1 = gen_reg_rtx (SImode);
+
+	  if (flag_pic)
+	    {
+	      rtx tmp2 = gen_reg_rtx (SImode);
+
+	      emit_jump_insn (gen_casesi32p (operands[0], operands[3],
+					     tmp1, tmp2));
+	    }
+	  else
+	    emit_jump_insn (gen_casesi32 (operands[0], operands[3], tmp1));
+	}
     }
-  emit_jump_insn (gen_casesi0 (operands[0], operands[3]));
+  else
+    emit_jump_insn (gen_casesi0 (operands[0], operands[3]));
   DONE;
 }")
 
+;;; The rtl for this pattern doesn't accurately describe what the insn
+;;; actually does, particularly when case-vector elements are exploded
+;;; in pa_reorg.  However, the initial SET in these patterns must show
+;;; the connection of the insn to the following jump table.
 (define_insn "casesi0"
-  [(set (pc) (plus:SI
-	       (mem:SI (plus:SI (pc)
-				(match_operand:SI 0 "register_operand" "r")))
-	       (label_ref (match_operand 1 "" ""))))]
+  [(set (pc) (mem:SI (plus:SI
+		       (mult:SI (match_operand:SI 0 "register_operand" "r")
+				(const_int 4))
+		       (label_ref (match_operand 1 "" "")))))]
   ""
-  "blr %0,%%r0\;nop"
+  "blr,n %0,%%r0\;nop"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
 
-;; Need nops for the calls because execution is supposed to continue
-;; past; we don't want to nullify an instruction that we need.
+;;; 32-bit code, absolute branch table.
+(define_insn "casesi32"
+  [(set (pc) (mem:SI (plus:SI
+		       (mult:SI (match_operand:SI 0 "register_operand" "r")
+				(const_int 4))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))]
+  "!TARGET_64BIT && TARGET_BIG_SWITCH"
+  "ldil L'%l1,%2\;ldo R'%l1(%2),%2\;{ldwx|ldw},s %0(%2),%2\;bv,n %%r0(%2)"
+  [(set_attr "type" "multi")
+   (set_attr "length" "16")])
+
+;;; 32-bit code, relative branch table.
+(define_insn "casesi32p"
+  [(set (pc) (mem:SI (plus:SI
+		       (mult:SI (match_operand:SI 0 "register_operand" "r")
+				(const_int 4))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_operand:SI 2 "register_operand" "=&a"))
+   (clobber (match_operand:SI 3 "register_operand" "=&r"))]
+  "!TARGET_64BIT && TARGET_BIG_SWITCH"
+  "{bl .+8,%2\;depi 0,31,2,%2|mfia %2}\;ldo {16|20}(%2),%2\;\
+{ldwx|ldw},s %0(%2),%3\;{addl|add,l} %2,%3,%3\;bv,n %%r0(%3)"
+  [(set_attr "type" "multi")
+   (set (attr "length")
+     (if_then_else (ne (symbol_ref "TARGET_PA_20") (const_int 0))
+	(const_int 20)
+	(const_int 24)))])
+
+;;; 64-bit code, 32-bit relative branch table.
+(define_insn "casesi64p"
+  [(set (pc) (mem:DI (plus:DI
+		       (mult:DI (sign_extend:DI
+				  (match_operand:SI 0 "register_operand" "r"))
+				(const_int 8))
+		       (label_ref (match_operand 1 "" "")))))
+   (clobber (match_operand:DI 2 "register_operand" "=&r"))
+   (clobber (match_operand:DI 3 "register_operand" "=&r"))]
+  "TARGET_64BIT && TARGET_BIG_SWITCH"
+  "mfia %2\;ldo 24(%2),%2\;ldw,s %0(%2),%3\;extrd,s %3,63,32,%3\;\
+add,l %2,%3,%3\;bv,n %%r0(%3)"
+  [(set_attr "type" "multi")
+   (set_attr "length" "24")])
+
+
+;; Call patterns.
 ;;- jump to subroutine
 
 (define_expand "call"
@@ -7136,14 +7266,6 @@
   emit_barrier ();
   DONE;
 }")
-
-;;; Hope this is only within a function...
-(define_insn "indirect_jump"
-  [(set (pc) (match_operand 0 "register_operand" "r"))]
-  "GET_MODE (operands[0]) == word_mode"
-  "bv%* %%r0(%0)"
-  [(set_attr "type" "branch")
-   (set_attr "length" "4")])
 
 ;;; Operands 2 and 3 are assumed to be CONST_INTs.
 (define_expand "extzv"



More information about the Gcc-patches mailing list