This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
rfc: unwinder, ppc-spe dual sized registers, and a patch...
- From: Aldy Hernandez <aldyh at redhat dot com>
- To: gcc at gcc dot gnu dot org
- Cc: geoffk at geoffk dot org, dje at watson dot ibm dot com, rth at redhat dot com, jason at redhat dot com
- Date: Sun, 9 Mar 2003 19:04:09 -0800
- Subject: rfc: unwinder, ppc-spe dual sized registers, and a patch...
- Reply-to: aldyh at redhat dot com
Exception handling has been broken forever on the e500 because of the
duality of register sizes in the GPRs. GPRs are 64-bits, but the upper
half is only available to SIMD operations.
In the current unwinder infrastructure it's not possible to describe the
size of a register. The file unwind.h just assumes Unwind_Word is always
the size of a word. For the e500, this is 32-bits, though the GPRs
are 64-bits.
For the RedHat tree I had a kludge to the unwinder hardcoding the registers
to 64-bits, and as can be seen by the current frame code in the backend,
all saves are in V2SI mode. This is not only a hack, but it wastes space
on the frame. Ideally, as per the ABI, we should save registers in
64-bits only for the registers that have been used in 64 bits, otherwise
they should be saved in 32-bits.
The Right Thing is harder to do than expected, because scanning the
instruction chain for the mode in which a register is used, is dubious for
calculating the frame size. This is because the frame size is calculated at
register elimination time, and reload might come later and make additional
uses of registers in a mode we did not expect. As a compromise, I've decided
to scan the instruction chain at register elimination time, when we are
calculating frame size, and saving ALL registers in 64-bits if there is
but ONE use of a register in 64-bits, otherwise the traditional 32-bit
saves/restores are done in the epi/prologue. This is not ideal, but saves
us plenty of bytes in the frame for the average case (which doesn't use
SIMD operations).
Now, this still leaves the problem of the unwinder not knowing what size
it should save registers in. Jason suggested adding frame related notes
of a parallel of 2 32-bit saves. The first element of the parallel being
the register, and the second element being a synthetic register, in this
case N+113, which is just past FIRST_PSEUDO_REGISTER. This works peachily,
and fixes all the EH failures.
There is one heads up-- the ABI says the register pairs should be N and
N+1200 but this creates a huge hole in the unwinder tables, thus defeating
the whole point, which was (well, to fix exception handling and...)
to save frame size. I've set the pairs to N and N+113, unless someone
has a huge objection. This creates a bit of a confusion with the
debugging info which is ABI compliant and has DW_OP_pieces of N and
N+1200. I'm willing to live with that. Suggestions welcome.
There is one actual issue I'd like discussed. The change to
DWARF_FRAME_REGISTERS will increase the size of the unwinder tables,
but it will not break ABI compatability (right RTH?). I see no other
way around it. Any suggestions?
I'd like some feedback on the above paragraphs so I'm not committing
this just yet.
2003-03-09 Aldy Hernandez <aldyh at redhat dot com>
* config/rs6000/rs6000.h (DWARF_FRAME_REGISTERS): Define.
(rs6000_stack_t): Add spe_64bit_regs_used.
* config/rs6000/rs6000.c (rs6000_stack_info): Calculate
spe_64bit_regs_used, and use it to determine the size of the
frame.
(spe_func_has_64bit_regs_p): New.
(spe_synthesize_frame_save): New.
(rs6000_frame_related): Handle SPE synthetic registers.
(rs6000_emit_prologue): Only save in 64-bits if the function used
any registers in 64-bit mode.
(rs6000_emit_epilogue): Same, but for restore.
Index: config/rs6000/rs6000.h
===================================================================
RCS file: /cvs/uberbaum/gcc/config/rs6000/rs6000.h,v
retrieving revision 1.253
diff -c -p -r1.253 rs6000.h
*** config/rs6000/rs6000.h 25 Feb 2003 23:11:54 -0000 1.253
--- config/rs6000/rs6000.h 10 Mar 2003 02:27:39 -0000
*************** extern int rs6000_default_long_calls;
*** 704,709 ****
--- 704,713 ----
/* This must be included for pre gcc 3.0 glibc compatibility. */
#define PRE_GCC3_DWARF_FRAME_REGISTERS 77
+ /* Add 32 dwarf columns for synthetic SPE registers. The SPE
+ synthetic registers are 113 through 145. */
+ #define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER + 32)
+
/* 1 for registers that have pervasive standard uses
and are not available for the register allocator.
*************** typedef struct rs6000_stack {
*** 1395,1400 ****
--- 1399,1405 ----
int spe_padding_size;
int toc_size; /* size to hold TOC if not in save_size */
int total_size; /* total bytes allocated for stack */
+ int spe_64bit_regs_used;
} rs6000_stack_t;
/* Define this if pushing a word on the stack
Index: config/rs6000/rs6000.c
===================================================================
RCS file: /cvs/uberbaum/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.430
diff -c -p -r1.430 rs6000.c
*** config/rs6000/rs6000.c 5 Mar 2003 22:37:52 -0000 1.430
--- config/rs6000/rs6000.c 10 Mar 2003 02:28:04 -0000
*************** static rtx rs6000_generate_compare PARAM
*** 175,180 ****
--- 175,182 ----
static void rs6000_maybe_dead PARAMS ((rtx));
static void rs6000_emit_stack_tie PARAMS ((void));
static void rs6000_frame_related PARAMS ((rtx, rtx, HOST_WIDE_INT, rtx, rtx));
+ static rtx spe_synthesize_frame_save PARAMS ((rtx));
+ static bool spe_func_has_64bit_regs_p ();
static void emit_frame_save PARAMS ((rtx, rtx, enum machine_mode,
unsigned int, int, int));
static rtx gen_frame_mem_offset PARAMS ((enum machine_mode, rtx, int));
*************** rs6000_stack_info ()
*** 9134,9143 ****
--- 9136,9154 ----
enum rs6000_abi abi;
int ehrd_size;
int total_raw_size;
+ static int insn_chain_scanned = -1;
/* Zero all fields portably. */
info = zero_info;
+ if (TARGET_SPE)
+ {
+ /* Cache value so we don't rescan instruction chain over and over. */
+ if (insn_chain_scanned < 0)
+ insn_chain_scanned = (int) spe_func_has_64bit_regs_p ();
+ info_ptr->spe_64bit_regs_used = insn_chain_scanned;
+ }
+
/* Select which calling sequence. */
info_ptr->abi = abi = DEFAULT_ABI;
*************** rs6000_stack_info ()
*** 9159,9170 ****
registers live (not the size they are used in), this proves
difficult because we'd have to traverse the instruction chain at
the right time, taking reload into account. This is a real pain,
! so we opt to save the GPRs in 64-bits always. Anyone overly
! concerned with frame size can fix this. ;-).
! So... since we save all GPRs (except the SP) in 64-bits, the
traditional GP save area will be empty. */
! if (TARGET_SPE_ABI)
info_ptr->gp_size = 0;
info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
--- 9170,9182 ----
registers live (not the size they are used in), this proves
difficult because we'd have to traverse the instruction chain at
the right time, taking reload into account. This is a real pain,
! so we opt to save the GPRs in 64-bits always if but one register
! gets used in 64-bits. Otherwise, all the registers in the frame
! get saved in 32-bits.
! So... since when we save all GPRs (except the SP) in 64-bits, the
traditional GP save area will be empty. */
! if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
info_ptr->gp_size = 0;
info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
*************** rs6000_stack_info ()
*** 9217,9223 ****
continue;
/* SPE saves EH registers in 64-bits. */
! ehrd_size = i * (TARGET_SPE_ABI ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
}
else
ehrd_size = 0;
--- 9229,9237 ----
continue;
/* SPE saves EH registers in 64-bits. */
! ehrd_size = i * (TARGET_SPE_ABI
! && info_ptr->spe_64bit_regs_used != 0
! ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
}
else
ehrd_size = 0;
*************** rs6000_stack_info ()
*** 9230,9236 ****
info_ptr->parm_size = RS6000_ALIGN (current_function_outgoing_args_size,
8);
! if (TARGET_SPE_ABI)
info_ptr->spe_gp_size = 8 * (32 - info_ptr->first_gp_reg_save);
else
info_ptr->spe_gp_size = 0;
--- 9244,9250 ----
info_ptr->parm_size = RS6000_ALIGN (current_function_outgoing_args_size,
8);
! if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
info_ptr->spe_gp_size = 8 * (32 - info_ptr->first_gp_reg_save);
else
info_ptr->spe_gp_size = 0;
*************** rs6000_stack_info ()
*** 9290,9296 ****
info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
! if (TARGET_SPE_ABI)
{
/* Align stack so SPE GPR save area is aligned on a
double-word boundary. */
--- 9304,9310 ----
info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
! if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
{
/* Align stack so SPE GPR save area is aligned on a
double-word boundary. */
*************** rs6000_stack_info ()
*** 9398,9404 ****
if (! TARGET_ALTIVEC_ABI || info_ptr->vrsave_mask == 0)
info_ptr->vrsave_save_offset = 0;
! if (! TARGET_SPE_ABI || info_ptr->spe_gp_size == 0)
info_ptr->spe_gp_save_offset = 0;
if (! info_ptr->lr_save_p)
--- 9412,9420 ----
if (! TARGET_ALTIVEC_ABI || info_ptr->vrsave_mask == 0)
info_ptr->vrsave_save_offset = 0;
! if (! TARGET_SPE_ABI
! || info_ptr->spe_64bit_regs_used == 0
! || info_ptr->spe_gp_size == 0)
info_ptr->spe_gp_save_offset = 0;
if (! info_ptr->lr_save_p)
*************** rs6000_stack_info ()
*** 9413,9418 ****
--- 9429,9467 ----
return info_ptr;
}
+ /* Return true if the current function uses any GPRs in 64-bit SIMD
+ mode. */
+
+ static bool
+ spe_func_has_64bit_regs_p ()
+ {
+ rtx insns, insn;
+
+ /* Functions that save and restore all the call-saved registers will
+ need to save/restore the registers in 64-bits. */
+ if (current_function_calls_eh_return
+ || current_function_calls_setjmp
+ || current_function_has_nonlocal_goto)
+ return true;
+
+ insns = get_insns ();
+
+ for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
+ {
+ if (INSN_P (insn))
+ {
+ rtx i;
+
+ i = PATTERN (insn);
+ if (GET_CODE (i) == SET
+ && SPE_VECTOR_MODE (GET_MODE (SET_SRC (i))))
+ return true;
+ }
+ }
+
+ return false;
+ }
+
void
debug_stack_info (info)
rs6000_stack_t *info;
*************** rs6000_frame_related (insn, reg, val, re
*** 10159,10171 ****
}
else
abort ();
!
RTX_FRAME_RELATED_P (insn) = 1;
REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
real,
REG_NOTES (insn));
}
/* Returns an insn that has a vrsave set operation with the
appropriate CLOBBERs. */
--- 10208,10288 ----
}
else
abort ();
!
! if (TARGET_SPE)
! real = spe_synthesize_frame_save (real);
!
RTX_FRAME_RELATED_P (insn) = 1;
REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
real,
REG_NOTES (insn));
}
+ /* Given an SPE frame note, return a PARALLEL of SETs with the
+ original note, plus a synthetic register save. */
+
+ static rtx
+ spe_synthesize_frame_save (real)
+ rtx real;
+ {
+ rtx synth, offset, reg, real2;
+
+ if (GET_CODE (real) != SET
+ || GET_MODE (SET_SRC (real)) != V2SImode)
+ return real;
+
+ /* For the SPE, registers saved in 64-bits, get a PARALLEL for their
+ frame related note. The parallel contains a set of the register
+ being saved, and another set to a synthetic register (n+113).
+ This is so we can differentiate between 64-bit and 32-bit saves.
+ Words cannot describe this nastiness. */
+
+ if (GET_CODE (SET_DEST (real)) != MEM
+ || GET_CODE (XEXP (SET_DEST (real), 0)) != PLUS
+ || GET_CODE (SET_SRC (real)) != REG)
+ abort ();
+
+ /* Transform:
+ (set (mem (plus (reg x) (const y)))
+ (reg z))
+ into:
+ (set (mem (plus (reg x) (const y+4)))
+ (reg z+113))
+ */
+
+ real2 = copy_rtx (real);
+ PUT_MODE (SET_DEST (real2), SImode);
+ reg = SET_SRC (real2);
+ real2 = replace_rtx (real2, reg, gen_rtx_REG (SImode, REGNO (reg)));
+ synth = copy_rtx (real2);
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ offset = XEXP (XEXP (SET_DEST (real2), 0), 1);
+ real2 = replace_rtx (real2, offset, GEN_INT (INTVAL (offset) + 4));
+ }
+
+ reg = SET_SRC (synth);
+ /* FIXME: the ABI says REGNO+1200, but this creates a huge hole
+ in the unwinder tables. I'm still unsure what to do. */
+ synth = replace_rtx (synth, reg,
+ gen_rtx_REG (SImode, REGNO (reg) + 113));
+
+ offset = XEXP (XEXP (SET_DEST (synth), 0), 1);
+ synth = replace_rtx (synth, offset,
+ GEN_INT (INTVAL (offset)
+ + (BYTES_BIG_ENDIAN ? 0 : 4)));
+
+ RTX_FRAME_RELATED_P (synth) = 1;
+ RTX_FRAME_RELATED_P (real2) = 1;
+ if (BYTES_BIG_ENDIAN)
+ real = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, synth, real2));
+ else
+ real = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, real2, synth));
+
+ return real;
+ }
+
/* Returns an insn that has a vrsave set operation with the
appropriate CLOBBERs. */
*************** rs6000_emit_prologue ()
*** 10317,10330 ****
int using_store_multiple;
HOST_WIDE_INT sp_offset = 0;
! if (TARGET_SPE_ABI)
{
reg_mode = V2SImode;
reg_size = 8;
}
using_store_multiple = (TARGET_MULTIPLE && ! TARGET_POWERPC64
! && !TARGET_SPE_ABI
&& info->first_gp_reg_save < 31);
saving_FPRs_inline = (info->first_fp_reg_save == 64
|| FP_SAVE_INLINE (info->first_fp_reg_save));
--- 10434,10448 ----
int using_store_multiple;
HOST_WIDE_INT sp_offset = 0;
! if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
{
reg_mode = V2SImode;
reg_size = 8;
}
using_store_multiple = (TARGET_MULTIPLE && ! TARGET_POWERPC64
! && (!TARGET_SPE_ABI
! || info->spe_64bit_regs_used == 0)
&& info->first_gp_reg_save < 31);
saving_FPRs_inline = (info->first_fp_reg_save == 64
|| FP_SAVE_INLINE (info->first_fp_reg_save));
*************** rs6000_emit_prologue ()
*** 10510,10516 ****
rtx addr, reg, mem;
reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
! if (TARGET_SPE_ABI)
{
int offset = info->spe_gp_save_offset + sp_offset + 8 * i;
rtx b;
--- 10628,10634 ----
rtx addr, reg, mem;
reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
! if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
{
int offset = info->spe_gp_save_offset + sp_offset + 8 * i;
rtx b;
*************** rs6000_emit_epilogue (sibcall)
*** 10752,10766 ****
int reg_size = TARGET_POWERPC64 ? 8 : 4;
int i;
! if (TARGET_SPE_ABI)
{
reg_mode = V2SImode;
reg_size = 8;
}
- info = rs6000_stack_info ();
using_load_multiple = (TARGET_MULTIPLE && ! TARGET_POWERPC64
! && !TARGET_SPE_ABI
&& info->first_gp_reg_save < 31);
restoring_FPRs_inline = (sibcall
|| current_function_calls_eh_return
--- 10870,10886 ----
int reg_size = TARGET_POWERPC64 ? 8 : 4;
int i;
! info = rs6000_stack_info ();
!
! if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
{
reg_mode = V2SImode;
reg_size = 8;
}
using_load_multiple = (TARGET_MULTIPLE && ! TARGET_POWERPC64
! && (!TARGET_SPE_ABI
! || info->spe_64bit_regs_used == 0)
&& info->first_gp_reg_save < 31);
restoring_FPRs_inline = (sibcall
|| current_function_calls_eh_return
*************** rs6000_emit_epilogue (sibcall)
*** 10930,10936 ****
rtx mem = gen_rtx_MEM (reg_mode, addr);
/* Restore 64-bit quantities for SPE. */
! if (TARGET_SPE_ABI)
{
int offset = info->spe_gp_save_offset + sp_offset + 8 * i;
rtx b;
--- 11050,11056 ----
rtx mem = gen_rtx_MEM (reg_mode, addr);
/* Restore 64-bit quantities for SPE. */
! if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
{
int offset = info->spe_gp_save_offset + sp_offset + 8 * i;
rtx b;