ia64 branch register opts

Richard Henderson rth@twiddle.net
Fri Jul 25 15:47:00 GMT 2003


As discussed at OLS, wrt optimizing indirect function calls.
Will you try this and see if it provides enough improvement
to warrent cleaning up properly?


r~
-------------- next part --------------
Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.814
diff -u -p -r1.814 toplev.c
--- toplev.c	25 Jul 2003 09:52:26 -0000	1.814
+++ toplev.c	25 Jul 2003 15:38:34 -0000
@@ -260,6 +260,7 @@ enum dump_file_index
   DFI_cse,
   DFI_addressof,
   DFI_gcse,
+  DFI_pdl,
   DFI_loop,
   DFI_bypass,
   DFI_cfg,
@@ -313,6 +314,7 @@ static struct dump_file_info dump_file[D
   { "cse",	's', 0, 0, 0 },
   { "addressof", 'F', 0, 0, 0 },
   { "gcse",	'G', 1, 0, 0 },
+  { "pdl",	'Z', 1, 0, 0 },
   { "loop",	'L', 1, 0, 0 },
   { "bypass",   'G', 1, 0, 0 }, /* Yes, duplicate enable switch.  */
   { "cfg",	'f', 1, 0, 0 },
@@ -2907,6 +2909,11 @@ rest_of_handle_gcse (tree decl, rtx insn
     }
 
   close_dump_file (DFI_gcse, print_rtl_with_bb, insns);
+
+  open_dump_file (DFI_pdl, NULL);
+  ia64_place_descriptor_loads ();
+  close_dump_file (DFI_pdl, print_rtl_with_bb, insns);
+
   timevar_pop (TV_GCSE);
 
   ggc_collect ();
Index: config/ia64/ia64-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64-protos.h,v
retrieving revision 1.59
diff -u -p -r1.59 ia64-protos.h
--- config/ia64/ia64-protos.h	19 Jun 2003 21:47:13 -0000	1.59
+++ config/ia64/ia64-protos.h	25 Jul 2003 15:38:34 -0000
@@ -83,7 +83,7 @@ extern rtx ia64_split_timode PARAMS((rtx
 extern rtx spill_tfmode_operand PARAMS((rtx, int));
 extern rtx ia64_expand_compare PARAMS((enum rtx_code, enum machine_mode));
 extern void ia64_expand_call PARAMS((rtx, rtx, rtx, int));
-extern void ia64_split_call PARAMS((rtx, rtx, rtx, rtx, rtx, int, int));
+extern void ia64_split_call PARAMS((rtx, rtx, rtx, rtx, rtx, rtx, int, int));
 extern void ia64_reload_gp PARAMS((void));
 
 extern HOST_WIDE_INT ia64_initial_elimination_offset PARAMS((int, int));
@@ -153,3 +153,5 @@ extern enum direction ia64_hpux_function
 #endif /* ARGS_SIZE_RTX */
 
 extern void ia64_hpux_handle_builtin_pragma PARAMS ((struct cpp_reader *));
+
+extern void ia64_place_descriptor_loads (void);
Index: config/ia64/ia64.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.c,v
retrieving revision 1.237
diff -u -p -r1.237 ia64.c
--- config/ia64/ia64.c	17 Jul 2003 16:33:22 -0000	1.237
+++ config/ia64/ia64.c	25 Jul 2003 15:38:35 -0000
@@ -140,7 +140,7 @@ struct ia64_frame_info
 				   registers or long-term scratches.  */
   int n_spilled;		/* number of spilled registers.  */
   int reg_fp;			/* register for fp.  */
-  int reg_save_b0;		/* save register for b0.  */
+  int reg_save_b[8];		/* save register for b0.  */
   int reg_save_pr;		/* save register for prs.  */
   int reg_save_ar_pfs;		/* save register for ar.pfs.  */
   int reg_save_ar_unat;		/* save register for ar.unat.  */
@@ -262,6 +262,9 @@ static void ia64_hpux_add_extern_decl PA
 static void ia64_hpux_file_end PARAMS ((void))
      ATTRIBUTE_UNUSED;
 
+static int ia64_branch_target_register_class (void);
+static bool ia64_branch_target_register_callee_saved (bool);
+
 
 /* Table of valid machine attributes.  */
 static const struct attribute_spec ia64_attribute_table[] =
@@ -368,6 +371,13 @@ static const struct attribute_spec ia64_
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
 
+#undef TARGET_BRANCH_TARGET_REGISTER_CLASS
+#define TARGET_BRANCH_TARGET_REGISTER_CLASS \
+  ia64_branch_target_register_class
+#undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
+#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
+  ia64_branch_target_register_callee_saved
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
@@ -1543,55 +1553,58 @@ ia64_reload_gp ()
 }
 
 void
-ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
+ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b, gp,
 		 noreturn_p, sibcall_p)
-     rtx retval, addr, retaddr, scratch_r, scratch_b;
+     rtx retval, addr, gp, retaddr, scratch_r, scratch_b;
      int noreturn_p, sibcall_p;
 {
   rtx insn;
   bool is_desc = false;
 
-  /* If we find we're calling through a register, then we're actually
-     calling through a descriptor, so load up the values.  */
-  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
+  if (REG_P (addr))
     {
-      rtx tmp;
-      bool addr_dead_p;
-
-      /* ??? We are currently constrained to *not* use peep2, because
-	 we can legitimately change the global lifetime of the GP
-	 (in the form of killing where previously live).  This is 
-	 because a call through a descriptor doesn't use the previous
-	 value of the GP, while a direct call does, and we do not
-	 commit to either form until the split here.
-
-	 That said, this means that we lack precise life info for
-	 whether ADDR is dead after this call.  This is not terribly
-	 important, since we can fix things up essentially for free
-	 with the POST_DEC below, but it's nice to not use it when we
-	 can immediately tell it's not necessary.  */
-      addr_dead_p = ((noreturn_p || sibcall_p
-		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
-					    REGNO (addr)))
-		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
-
-      /* Load the code address into scratch_b.  */
-      tmp = gen_rtx_POST_INC (Pmode, addr);
-      tmp = gen_rtx_MEM (Pmode, tmp);
-      emit_move_insn (scratch_r, tmp);
-      emit_move_insn (scratch_b, scratch_r);
+      if (GR_REGNO_P (REGNO (addr)))
+	{
+	  rtx tmp;
+	  bool addr_dead_p;
 
-      /* Load the GP address.  If ADDR is not dead here, then we must
-	 revert the change made above via the POST_INCREMENT.  */
-      if (!addr_dead_p)
-	tmp = gen_rtx_POST_DEC (Pmode, addr);
+	  /* ??? We are currently constrained to *not* use peep2, because
+	     can legitimately change the global lifetime of the GP
+	     (in the form of killing where previously live).  This is 
+	     because a call through a descriptor doesn't use the previous
+	     value of the GP, while a direct call does, and we do not
+	     commit to either form until the split here.
+
+	     That said, this means that we lack precise life info for
+	     whether ADDR is dead after this call.  This is not terribly
+	     important, since we can fix things up essentially for free
+	     with the POST_DEC below, but it's nice to not use it when we
+	     can immediately tell it's not necessary.  */
+	  addr_dead_p = ((noreturn_p || sibcall_p
+		          || TEST_HARD_REG_BIT (regs_invalidated_by_call,
+					        REGNO (addr)))
+		         && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
+
+	  /* Load the code address into scratch_b.  */
+	  tmp = gen_rtx_POST_INC (Pmode, addr);
+	  tmp = gen_rtx_MEM (Pmode, tmp);
+	  emit_move_insn (scratch_r, tmp);
+	  emit_move_insn (scratch_b, scratch_r);
+
+	  /* Load the GP address.  If ADDR is not dead here, then we must
+	     revert the change made above via the POST_INCREMENT.  */
+	  if (!addr_dead_p)
+	    tmp = gen_rtx_POST_DEC (Pmode, addr);
+	  else
+	    tmp = addr;
+	  tmp = gen_rtx_MEM (Pmode, tmp);
+	  emit_move_insn (pic_offset_table_rtx, tmp);
+	  addr = scratch_b;
+	}
       else
-	tmp = addr;
-      tmp = gen_rtx_MEM (Pmode, tmp);
-      emit_move_insn (pic_offset_table_rtx, tmp);
+        emit_move_insn (pic_offset_table_rtx, gp);
 
       is_desc = true;
-      addr = scratch_b;
     }
 
   if (sibcall_p)
@@ -1842,14 +1855,6 @@ ia64_compute_frame_size (size)
 	spilled_gr_p = 1;
       }
 
-  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
-    if (regs_ever_live[regno] && ! call_used_regs[regno])
-      {
-	SET_HARD_REG_BIT (mask, regno);
-	spill_size += 8;
-	n_spilled += 1;
-      }
-
   /* Now come all special registers that might get saved in other
      general registers.  */
   
@@ -1874,8 +1879,8 @@ ia64_compute_frame_size (size)
 	 able to unwind the stack.  */
       SET_HARD_REG_BIT (mask, BR_REG (0));
 
-      current_frame_info.reg_save_b0 = find_gr_spill (1);
-      if (current_frame_info.reg_save_b0 == 0)
+      current_frame_info.reg_save_b[0] = find_gr_spill (1);
+      if (current_frame_info.reg_save_b[0] == 0)
 	{
 	  spill_size += 8;
 	  n_spilled += 1;
@@ -1929,10 +1934,10 @@ ia64_compute_frame_size (size)
      happen to be allocated hard regs, and are consecutive, rearrange them
      into the preferred order now.  */
   if (current_frame_info.reg_fp != 0
-      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
+      && current_frame_info.reg_save_b[0] == current_frame_info.reg_fp + 1
       && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
     {
-      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
+      current_frame_info.reg_save_b[0] = current_frame_info.reg_fp;
       current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
       current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
     }
@@ -1984,6 +1989,18 @@ ia64_compute_frame_size (size)
 	}
     }
 
+  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
+    if (regs_ever_live[regno] && ! call_used_regs[regno])
+      {
+	SET_HARD_REG_BIT (mask, regno);
+	current_frame_info.reg_save_b[regno - BR_REG (0)] = find_gr_spill (1);
+	if (current_frame_info.reg_save_b[regno - BR_REG (0)] == 0)
+	  {
+	    spill_size += 8;
+	    n_spilled += 1;
+	  }
+      }
+
   /* If we have an odd number of words of pretend arguments written to
      the stack, then the FR save area will be unaligned.  We round the
      size of this area up to keep things 16 byte aligned.  */
@@ -2416,8 +2433,8 @@ ia64_expand_prologue ()
      to care if an eliminable register isn't used, and "eliminates" it
      anyway.  */
   if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
-      && current_frame_info.reg_save_b0 != 0)
-    XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
+      && current_frame_info.reg_save_b[0] != 0)
+    XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b[0];
 
   /* We don't need an alloc instruction if we've used no outputs or locals.  */
   if (current_frame_info.n_local_regs == 0
@@ -2644,42 +2661,31 @@ ia64_expand_prologue ()
 	cfa_off -= 8;
       }
 
-  /* Handle BR0 specially -- it may be getting stored permanently in
-     some GR register.  */
-  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
-    {
-      reg = gen_rtx_REG (DImode, BR_REG (0));
-      if (current_frame_info.reg_save_b0 != 0)
-	{
-	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
-	  insn = emit_move_insn (alt_reg, reg);
-	  RTX_FRAME_RELATED_P (insn) = 1;
-
-	  /* Even if we're not going to generate an epilogue, we still
-	     need to save the register so that EH works.  */
-	  if (! epilogue_p)
-	    emit_insn (gen_prologue_use (alt_reg));
-	}
-      else
-	{
-	  alt_regno = next_scratch_gr_reg ();
-	  alt_reg = gen_rtx_REG (DImode, alt_regno);
-	  emit_move_insn (alt_reg, reg);
-	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
-	  cfa_off -= 8;
-	}
-    }
-
   /* Spill the rest of the BR registers.  */
-  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
+  for (regno = BR_REG (0); regno <= BR_REG (7); ++regno)
     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
       {
-	alt_regno = next_scratch_gr_reg ();
-	alt_reg = gen_rtx_REG (DImode, alt_regno);
 	reg = gen_rtx_REG (DImode, regno);
-	emit_move_insn (alt_reg, reg);
-	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
-	cfa_off -= 8;
+	i = regno - BR_REG (0);
+	if (current_frame_info.reg_save_b[i] != 0)
+	  {
+	    alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b[i]);
+	    insn = emit_move_insn (alt_reg, reg);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+
+	    /* Even if we're not going to generate an epilogue, we still
+	       need to save the register so that EH works.  */
+	    if (! epilogue_p)
+	      emit_insn (gen_prologue_use (alt_reg));
+	  }
+	else
+	  {
+	    alt_regno = next_scratch_gr_reg ();
+	    alt_reg = gen_rtx_REG (DImode, alt_regno);
+	    emit_move_insn (alt_reg, reg);
+	    do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+	    cfa_off -= 8;
+	  }
       }
 
   /* Align the frame and spill all FR registers.  */
@@ -2825,32 +2831,22 @@ ia64_expand_epilogue (sibcall_p)
 	cfa_off -= 8;
       }
   
-  /* Restore the branch registers.  Handle B0 specially, as it may
-     have gotten stored in some GR register.  */
-  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
-    {
-      if (current_frame_info.reg_save_b0 != 0)
-	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
-      else
-	{
-	  alt_regno = next_scratch_gr_reg ();
-	  alt_reg = gen_rtx_REG (DImode, alt_regno);
-	  do_restore (gen_movdi_x, alt_reg, cfa_off);
-	  cfa_off -= 8;
-	}
-      reg = gen_rtx_REG (DImode, BR_REG (0));
-      emit_move_insn (reg, alt_reg);
-    }
-    
-  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
+  /* Restore the branch registers.  */
+  for (regno = BR_REG (0); regno <= BR_REG (7); ++regno)
     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
       {
-	alt_regno = next_scratch_gr_reg ();
-	alt_reg = gen_rtx_REG (DImode, alt_regno);
-	do_restore (gen_movdi_x, alt_reg, cfa_off);
-	cfa_off -= 8;
-	reg = gen_rtx_REG (DImode, regno);
-	emit_move_insn (reg, alt_reg);
+	int i = regno - BR_REG (0);
+	if (current_frame_info.reg_save_b[i] != 0)
+	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b[i]);
+	else
+	  {
+	    alt_regno = next_scratch_gr_reg ();
+	    alt_reg = gen_rtx_REG (DImode, alt_regno);
+	    do_restore (gen_movdi_x, alt_reg, cfa_off);
+	    cfa_off -= 8;
+	  }
+        reg = gen_rtx_REG (DImode, regno);
+        emit_move_insn (reg, alt_reg);
       }
 
   /* Restore floating point registers.  */
@@ -2966,7 +2962,7 @@ ia64_direct_return ()
 
       return (current_frame_info.total_size == 0
 	      && current_frame_info.n_spilled == 0
-	      && current_frame_info.reg_save_b0 == 0
+	      && current_frame_info.reg_save_b[0] == 0
 	      && current_frame_info.reg_save_pr == 0
 	      && current_frame_info.reg_save_ar_pfs == 0
 	      && current_frame_info.reg_save_ar_unat == 0
@@ -2982,7 +2978,7 @@ ia64_hard_regno_rename_ok (from, to)
 {
   /* Don't clobber any of the registers we reserved for the prologue.  */
   if (to == current_frame_info.reg_fp
-      || to == current_frame_info.reg_save_b0
+      || to == current_frame_info.reg_save_b[0]
       || to == current_frame_info.reg_save_pr
       || to == current_frame_info.reg_save_ar_pfs
       || to == current_frame_info.reg_save_ar_unat
@@ -2990,7 +2986,7 @@ ia64_hard_regno_rename_ok (from, to)
     return 0;
 
   if (from == current_frame_info.reg_fp
-      || from == current_frame_info.reg_save_b0
+      || from == current_frame_info.reg_save_b[0]
       || from == current_frame_info.reg_save_pr
       || from == current_frame_info.reg_save_ar_pfs
       || from == current_frame_info.reg_save_ar_unat
@@ -3057,10 +3053,10 @@ ia64_output_function_prologue (file, siz
 
   mask = 0;
   grsave = grsave_prev = 0;
-  if (current_frame_info.reg_save_b0 != 0)
+  if (current_frame_info.reg_save_b[0] != 0)
     {
       mask |= 8;
-      grsave = grsave_prev = current_frame_info.reg_save_b0;
+      grsave = grsave_prev = current_frame_info.reg_save_b[0];
     }
   if (current_frame_info.reg_save_ar_pfs != 0
       && (grsave_prev == 0
@@ -7391,8 +7387,8 @@ ia64_eh_uses (regno)
   if (! reload_completed)
     return 0;
 
-  if (current_frame_info.reg_save_b0
-      && regno == current_frame_info.reg_save_b0)
+  if (current_frame_info.reg_save_b[0]
+      && regno == current_frame_info.reg_save_b[0])
     return 1;
   if (current_frame_info.reg_save_pr
       && regno == current_frame_info.reg_save_pr)
@@ -7540,13 +7536,28 @@ process_set (asm_out_file, pat)
       switch (src_regno)
 	{
 	case BR_REG (0):
-	  /* Saving return address pointer.  */
-	  if (dest_regno != current_frame_info.reg_save_b0)
+	  /* Saving the return pointer.  */
+	  if (dest_regno != current_frame_info.reg_save_b[0])
 	    abort ();
 	  fprintf (asm_out_file, "\t.save rp, r%d\n",
 		   ia64_dbx_register_number (dest_regno));
 	  return 1;
 
+	case BR_REG (1):
+	case BR_REG (2):
+	case BR_REG (3):
+	case BR_REG (4):
+	case BR_REG (5):
+	  /* Saving branch registers.  */
+	  if (dest_regno
+	      != current_frame_info.reg_save_b[src_regno - BR_REG (0)])
+	    abort ();
+	  fprintf (asm_out_file, "\t.save.b %d, r%d\n",
+		   1 << (src_regno - BR_REG (1)),
+		   ia64_dbx_register_number (dest_regno));
+	  return 1;
+
+
 	case PR_REG (0):
 	  if (dest_regno != current_frame_info.reg_save_pr)
 	    abort ();
@@ -7617,7 +7628,7 @@ process_set (asm_out_file, pat)
       switch (src_regno)
 	{
 	case BR_REG (0):
-	  if (current_frame_info.reg_save_b0 != 0)
+	  if (current_frame_info.reg_save_b[0] != 0)
 	    abort ();
 	  fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
 	  return 1;
@@ -8559,6 +8570,302 @@ ia64_output_mi_thunk (file, thunk, delta
   reload_completed = 0;
   epilogue_completed = 0;
   no_new_pseudos = 0;
+}
+
+struct collect_pdl_data
+{
+  sbitmap *transp, *avloc, *antloc, *kill;
+  int bb_index;
+};
+
+  /* An expression is transparent in a block if its operands are not modified
+   in the block.
+
+   An expression is computed (locally available) in a block if it is computed
+   at least once and expression would contain the same value if the
+   computation was moved to the end of the block.
+
+   An expression is locally anticipatable in a block if it is computed at
+   least once and expression would contain the same value if the computation
+   was moved to the beginning of the block.  */
+
+static void
+collect_pdl_data_regno (unsigned int regno, struct collect_pdl_data *d)
+{
+  RESET_BIT (d->transp[d->bb_index], regno);
+  RESET_BIT (d->avloc[d->bb_index], regno);
+  SET_BIT (d->kill[d->bb_index], regno);
+}
+
+static void
+collect_pdl_data_ns (rtx x, rtx set ATTRIBUTE_UNUSED, void *data)
+{
+  if (GET_CODE (x) == REG)
+    collect_pdl_data_regno (REGNO (x), data);
+}
+
+static void
+collect_pdl_data_call (rtx insn ATTRIBUTE_UNUSED, struct collect_pdl_data *d)
+{
+  bool clobbers_all = false;
+  unsigned int regno;
+
+#ifdef NON_SAVING_SETJMP
+  if (NON_SAVING_SETJMP && find_reg_note (insn, REG_SETJMP, NULL_RTX))
+    clobbers_all = true;
+#endif
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (clobbers_all || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
+      collect_pdl_data_regno (regno, d);
+}
+
+static unsigned int
+pdl_indirect_call (rtx insn)
+{
+  int code = INSN_CODE (insn);
+
+  if (code == CODE_FOR_call_gp
+      || code == CODE_FOR_call_value_gp
+      || code == CODE_FOR_sibcall_gp)
+    {
+      rtx addr;
+
+      addr = XVECEXP (PATTERN (insn), 0, 0);
+      if (GET_CODE (addr) == SET)
+	addr = SET_SRC (addr);
+      addr = XEXP (XEXP (addr, 0), 0);
+
+      if (REG_P (addr))
+	return REGNO (addr);
+    }
+
+  return INVALID_REGNUM;
+}
+
+#define TEMPS_PER_DESCRIPTOR_LOAD 3
+static rtx
+generate_descriptor_load (rtx src, varray_type replacements)
+{
+  unsigned int regno = REGNO (src);
+  rtx t1, t2, t3, tmp;
+
+  t1 = VARRAY_RTX (replacements, regno * TEMPS_PER_DESCRIPTOR_LOAD);
+  if (t1 == NULL_RTX)
+    {
+      t1 = gen_reg_rtx (Pmode);
+      t2 = gen_reg_rtx (Pmode);
+      t3 = gen_reg_rtx (Pmode);
+      VARRAY_RTX (replacements, regno * TEMPS_PER_DESCRIPTOR_LOAD + 0) = t1;
+      VARRAY_RTX (replacements, regno * TEMPS_PER_DESCRIPTOR_LOAD + 1) = t2;
+      VARRAY_RTX (replacements, regno * TEMPS_PER_DESCRIPTOR_LOAD + 2) = t3;
+    }
+  else
+    {
+      t2 = VARRAY_RTX (replacements, regno * TEMPS_PER_DESCRIPTOR_LOAD + 1);
+      t3 = VARRAY_RTX (replacements, regno * TEMPS_PER_DESCRIPTOR_LOAD + 2);
+    }
+
+  start_sequence ();
+  emit_move_insn (t3, src);
+
+  tmp = gen_rtx_MEM (Pmode, t3);
+  RTX_UNCHANGING_P (tmp) = 1;
+  emit_move_insn (t1, tmp);
+
+  emit_insn (gen_adddi3 (t3, t3, GEN_INT (UNITS_PER_WORD)));
+
+  tmp = gen_rtx_MEM (Pmode, t3);
+  RTX_UNCHANGING_P (tmp) = 1;
+  emit_move_insn (t2, tmp);
+
+  tmp = get_insns ();
+  end_sequence ();
+
+  return tmp;
+}
+
+static void
+munge_call_for_descriptor (rtx insn, varray_type replacements)
+{
+  rtx addr, t1, t2, tmp, pat;
+  unsigned int regno;
+
+  addr = XVECEXP (PATTERN (insn), 0, 0);
+  if (GET_CODE (addr) == SET)
+    addr = SET_SRC (addr);
+  addr = XEXP (XEXP (addr, 0), 0);
+
+  regno = REGNO (addr);
+
+  t1 = VARRAY_RTX (replacements, regno * TEMPS_PER_DESCRIPTOR_LOAD);
+  t2 = VARRAY_RTX (replacements, regno * TEMPS_PER_DESCRIPTOR_LOAD + 1);
+  if (t1 == NULL)
+    abort ();
+
+  pat = PATTERN (insn);
+
+  tmp = XVECEXP (pat, 0, 0);
+  if (GET_CODE (tmp) == SET)
+    tmp = SET_SRC (tmp);
+  XEXP (XEXP (tmp, 0), 0) = t1;
+  XEXP (tmp, 1) = const2_rtx;
+
+  tmp = gen_rtx_USE (VOIDmode, t2);
+  XVECEXP (pat, 0, 2) = tmp;
+
+  PUT_NUM_ELEM (XVEC (pat, 0), 3);
+  INSN_CODE (insn) = -1;
+}
+
+void
+ia64_place_descriptor_loads (void)
+{
+  struct collect_pdl_data data;
+  sbitmap *insert, *delete;
+  rtx insn, end;
+  size_t regno, n_regs, e, n_edges;
+  basic_block bb;
+  struct edge_list *el;
+  varray_type replacements;
+  bool insertted;
+
+  n_regs = max_reg_num ();
+  data.transp = sbitmap_vector_alloc (last_basic_block, n_regs);
+  data.avloc = sbitmap_vector_alloc (last_basic_block, n_regs);
+  data.antloc = sbitmap_vector_alloc (last_basic_block, n_regs);
+  data.kill = sbitmap_vector_alloc (last_basic_block, n_regs);
+
+  sbitmap_vector_ones (data.transp, last_basic_block);
+  sbitmap_vector_zero (data.avloc, last_basic_block);
+  sbitmap_vector_zero (data.antloc, last_basic_block);
+  sbitmap_vector_zero (data.kill, last_basic_block);
+
+  /* Compute local properties.  We optimistically set TRANSP and AVLOC
+     and zap them as we find counter-examples.  */
+  FOR_EACH_BB (bb)
+    {
+      data.bb_index = bb->index;
+      end = NEXT_INSN (bb->end);
+      for (insn = bb->head; insn != end; insn = NEXT_INSN (insn))
+	{
+	  switch (GET_CODE (insn))
+	    {
+	    case CALL_INSN:
+	      regno = pdl_indirect_call (insn);
+	      if (regno != INVALID_REGNUM)
+		{
+		  if (!TEST_BIT (data.kill[bb->index], regno))
+		    SET_BIT (data.antloc[bb->index], regno);
+		  SET_BIT (data.avloc[bb->index], regno);
+		}
+
+	      collect_pdl_data_call (insn, &data);
+	      /* FALLTHRU */
+
+	    case INSN:
+	    case JUMP_INSN:
+	      note_stores (PATTERN (insn), collect_pdl_data_ns, &data);
+	      break;
+
+	    default:
+	      break;
+	    }
+        }
+    }
+
+  el = pre_edge_lcm (rtl_dump_file, n_regs, data.transp, data.avloc,
+		     data.antloc, data.kill, &insert, &delete);
+
+  VARRAY_RTX_INIT (replacements, TEMPS_PER_DESCRIPTOR_LOAD * n_regs,
+		   "replacements");
+
+  /* Place descriptor loads.  */
+  insertted = false;
+  for (e = 0, n_edges = NUM_EDGES (el); e < n_edges; e++)
+    {
+      EXECUTE_IF_SET_IN_SBITMAP(insert[e], 0, regno,
+	{
+	  edge eg = INDEX_EDGE (el, e);
+
+	  insn = generate_descriptor_load (regno_reg_rtx[regno], replacements);
+
+	  if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL)
+	    abort (); /* insert_insn_end_bb (insn, eg->src, 0); */
+	  else
+	    {
+	      if (rtl_dump_file)
+		fprintf (rtl_dump_file,
+			 "Hoisting descriptor for reg %lu to %d->%d\n",
+			 (unsigned long)regno, eg->src->index,
+			 eg->dest->index);
+	      insert_insn_on_edge (insn, eg);
+	      insertted = true;
+	    }
+	});
+    }
+
+  /* Munge call patterns to use said descriptor loads.  Add loads for
+     for those call patterns whose descriptors weren't ANTLOC.  */
+
+  for (e = last_basic_block; e-- > 0; )
+    sbitmap_not (data.kill[e], delete[e]);
+
+  FOR_EACH_BB (bb)
+    {
+      data.bb_index = bb->index;
+      end = NEXT_INSN (bb->end);
+
+      for (insn = bb->head; insn != end; insn = NEXT_INSN (insn))
+	{
+	  switch (GET_CODE (insn))
+	    {
+	    case CALL_INSN:
+	      regno = pdl_indirect_call (insn);
+	      if (regno != INVALID_REGNUM)
+		{
+		  if (TEST_BIT (data.kill[bb->index], regno))
+		    {
+		      rtx new;
+		      new = generate_descriptor_load (regno_reg_rtx[regno],
+						      replacements);
+		      emit_insn_before (new, insn);
+		      RESET_BIT (data.kill[bb->index], regno);
+		    }
+
+		  munge_call_for_descriptor (insn, replacements);
+		}
+
+	      collect_pdl_data_call (insn, &data);
+	      /* FALLTHRU */
+
+	    case INSN:
+	    case JUMP_INSN:
+	      note_stores (PATTERN (insn), collect_pdl_data_ns, &data);
+	      break;
+
+	    default:
+	      break;
+	    }
+        }
+    }
+
+  if (insertted)
+    commit_edge_insertions ();
+}
+
+
+
+static int
+ia64_branch_target_register_class (void)
+{
+  return BR_REGS;
+}
+
+static bool
+ia64_branch_target_register_callee_saved (bool after_prologue_epilogue_gen)
+{
+  return !after_prologue_epilogue_gen;
 }
 
 #include "gt-ia64.h"
Index: config/ia64/ia64.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.md,v
retrieving revision 1.109
diff -u -p -r1.109 ia64.md
--- config/ia64/ia64.md	21 Jul 2003 17:52:21 -0000	1.109
+++ config/ia64/ia64.md	25 Jul 2003 15:38:35 -0000
@@ -4720,10 +4720,10 @@
   "#"
   [(set_attr "itanium_class" "br,scall")])
 
-;; Irritatingly, we don't have access to INSN within the split body.
-;; See commentary in ia64_split_call as to why these aren't peep2.
+;; ??? Irritating that INSN is not passed to gen_split_N, so we have
+;; to duplicate this pattern with and without the NORETURN check.
 (define_split
-  [(call (mem (match_operand 0 "call_operand" ""))
+  [(call (mem:DI (match_operand 0 "call_operand" ""))
 	 (const_int 1))
    (clobber (match_operand:DI 1 "register_operand" ""))
    (clobber (match_scratch:DI 2 ""))
@@ -4732,12 +4732,12 @@
   [(const_int 0)]
 {
   ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
-		   operands[3], true, false);
+		   operands[3], NULL_RTX, true, false);
   DONE;
 })
 
 (define_split
-  [(call (mem (match_operand 0 "call_operand" ""))
+  [(call (mem:DI (match_operand 0 "call_operand" ""))
 	 (const_int 1))
    (clobber (match_operand:DI 1 "register_operand" ""))
    (clobber (match_scratch:DI 2 ""))
@@ -4746,7 +4746,42 @@
   [(const_int 0)]
 {
   ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
-		   operands[3], false, false);
+		   operands[3], NULL_RTX, false, false);
+  DONE;
+})
+
+(define_insn "*call_gp_desc"
+  [(call (mem:DI (match_operand 0 "call_operand" "b"))
+	 (const_int 2))
+   (clobber (match_operand:DI 1 "register_operand" "=b"))
+   (use (match_operand:DI 2 "general_operand" "rm"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "br")])
+
+(define_split
+  [(call (mem:DI (match_operand 0 "call_operand" ""))
+	 (const_int 2))
+   (clobber (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:DI 2 "general_operand" ""))]
+  "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1],
+		   NULL_RTX, NULL_RTX, operands[2], true, false);
+  DONE;
+})
+
+(define_split
+  [(call (mem:DI (match_operand 0 "call_operand" ""))
+	 (const_int 2))
+   (clobber (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:DI 2 "general_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1],
+		   NULL_RTX, NULL_RTX, operands[2], false, false);
   DONE;
 })
 
@@ -4771,8 +4806,8 @@
   "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
   [(const_int 0)]
 {
-  ia64_split_call (operands[0], operands[1], operands[2], operands[3],
-		   operands[4], true, false);
+  ia64_split_call (operands[0], operands[1], operands[2],
+		   operands[3], operands[4], NULL_RTX, true, false);
   DONE;
 })
 
@@ -4786,8 +4821,46 @@
   "reload_completed"
   [(const_int 0)]
 {
-  ia64_split_call (operands[0], operands[1], operands[2], operands[3],
-		   operands[4], false, false);
+  ia64_split_call (operands[0], operands[1], operands[2],
+		   operands[3], operands[4], NULL_RTX, false, false);
+  DONE;
+})
+
+(define_insn "*call_value_gp_desc"
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" "b"))
+	      (const_int 2)))
+   (clobber (match_operand:DI 2 "register_operand" "=b"))
+   (use (match_operand:DI 3 "general_operand" "rm"))]
+  ""
+  "#"
+  [(set_attr "itanium_class" "br")])
+
+(define_split
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" ""))
+	      (const_int 2)))
+   (clobber (match_operand:DI 2 "register_operand" ""))
+   (use (match_operand:DI 3 "general_operand" ""))]
+  "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(const_int 0)]
+{
+  ia64_split_call (operands[0], operands[1], operands[2],
+		   NULL_RTX, NULL_RTX, operands[3], true, false);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" ""))
+	      (const_int 2)))
+   (clobber (match_operand:DI 2 "register_operand" ""))
+   (use (match_operand:DI 3 "general_operand" ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (operands[0], operands[1], operands[2],
+		   NULL_RTX, NULL_RTX, operands[3], false, false);
   DONE;
 })
 
@@ -4802,6 +4875,22 @@
   [(const_int 0)]
 {
   ia64_split_call (NULL_RTX, operands[0], NULL_RTX, operands[1],
+		   operands[2], NULL_RTX, true, true);
+  DONE;
+}
+  [(set_attr "itanium_class" "br")])
+
+(define_insn_and_split "*sibcall_gp_desc"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "b"))
+	 (const_int 2))
+   (clobber (match_operand:DI 1 "register_operand" "=b"))
+   (use (match_operand:DI 2 "general_operand" "rm"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1], NULL_RTX, NULL_RTX,
 		   operands[2], true, true);
   DONE;
 }


More information about the Gcc-patches mailing list