[arm] Fix for VFP errata
Paul Brook
paul@codesourcery.com
Thu Mar 25 00:13:00 GMT 2004
The following patch works around the ARM10 VFPr1 store-multiple errata.
Ok?
Paul
2004-03-25 Paul Brook <paul@codesourcery.com>
* arm.c (vfp_print_multi): Remove.
(arm_output_fldmx): New function.
(vfp_emit_fstmx): Return block size, not insn. Add ARM10 VFPr1 bugfix.
(arm_expand_prologue): Update to match.
(arm_get_vfp_saved_size): New Function.
(arm_get_frame_offsets): Use it.
(arm_output_epilogue): Use new functions.
Index: arm.c
===================================================================
RCS file: /var/cvsroot/gcc-cvs/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.340
diff -c -p -r1.340 arm.c
*** a/arm.c 24 Mar 2004 17:20:13 -0000 1.340
--- b/arm.c 25 Mar 2004 00:09:35 -0000
*************** print_multi_reg (FILE *stream, const cha
*** 7809,7837 ****
}
! /* Output the operands of a FLDM/FSTM instruction to STREAM.
! REG is the base register,
! INSTR is the possibly suffixed load or store instruction.
! FMT specifies now to print the register name.
! START and COUNT specify the register range. */
static void
! vfp_print_multi (FILE *stream, const char *instr, int reg,
! const char * fmt, int start, int count)
{
int i;
fputc ('\t', stream);
! asm_fprintf (stream, instr, reg);
! fputs (", {", stream);
! for (i = start; i < start + count; i++)
{
! if (i > start)
fputs (", ", stream);
! asm_fprintf (stream, fmt, i);
}
fputs ("}\n", stream);
}
--- 7809,7843 ----
}
! /* Output a FLDMX instruction to STREAM.
! BASE if the register containing the address.
! REG and COUNT specify the register range.
! Extra registers may be added to avoid hardware bugs. */
static void
! arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
{
int i;
+ /* Workaround ARM10 VFPr1 bug. */
+ if (count == 2 && !arm_arch6)
+ {
+ if (reg == 15)
+ reg--;
+ count++;
+ }
+
fputc ('\t', stream);
! asm_fprintf (stream, "fldmfdx\t%r!, {", base);
! for (i = reg; i < reg + count; i++)
{
! if (i > reg)
fputs (", ", stream);
! asm_fprintf (stream, "d%d", i);
}
fputs ("}\n", stream);
+
}
*************** vfp_output_fstmx (rtx * operands)
*** 7863,7871 ****
}
! /* Emit RTL to save block of VFP register pairs to the stack. */
! static rtx
vfp_emit_fstmx (int base_reg, int count)
{
rtx par;
--- 7869,7878 ----
}
! /* Emit RTL to save block of VFP register pairs to the stack. Returns the
! number of bytes pushed. */
! static int
vfp_emit_fstmx (int base_reg, int count)
{
rtx par;
*************** vfp_emit_fstmx (int base_reg, int count)
*** 7873,7878 ****
--- 7880,7895 ----
rtx tmp, reg;
int i;
+ /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
+ register pairs are stored by a store multiple insn. We avoid this
+ by pushing an extra pair. */
+ if (count == 2 && !arm_arch6)
+ {
+ if (base_reg == LAST_VFP_REGNUM - 3)
+ base_reg -= 2;
+ count++;
+ }
+
/* ??? The frame layout is implementation defined. We describe
standard format 1 (equivalent to a FSTMD insn and unused pad word).
We really need some way of representing the whole block so that the
*************** vfp_emit_fstmx (int base_reg, int count)
*** 7922,7928 ****
par = emit_insn (par);
REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
REG_NOTES (par));
! return par;
}
--- 7939,7947 ----
par = emit_insn (par);
REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
REG_NOTES (par));
! RTX_FRAME_RELATED_P (par) = 1;
!
! return count * 8 + 4;
}
*************** arm_compute_save_reg_mask (void)
*** 8864,8869 ****
--- 8883,8932 ----
return save_reg_mask;
}
+
+ /* Return the number of bytes required to save VFP registers. */
+ static int
+ arm_get_vfp_saved_size (void)
+ {
+ unsigned int regno;
+ int count;
+ int saved;
+
+ saved = 0;
+ /* Space for saved VFP registers. */
+ if (TARGET_HARD_FLOAT && TARGET_VFP)
+ {
+ count = 0;
+ for (regno = FIRST_VFP_REGNUM;
+ regno < LAST_VFP_REGNUM;
+ regno += 2)
+ {
+ if ((!regs_ever_live[regno] || call_used_regs[regno])
+ && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
+ {
+ if (count > 0)
+ {
+ /* Workaround ARM10 VFPr1 bug. */
+ if (count == 2 && !arm_arch6)
+ count++;
+ saved += count * 8 + 4;
+ }
+ count = 0;
+ }
+ else
+ count++;
+ }
+ if (count > 0)
+ {
+ if (count == 2 && !arm_arch6)
+ count++;
+ saved += count * 8 + 4;
+ }
+ }
+ return saved;
+ }
+
+
/* Generate a function exit sequence. If REALLY_RETURN is false, then do
everything bar the final return instruction. */
const char *
*************** arm_output_epilogue (rtx sibling)
*** 9306,9339 ****
if (TARGET_HARD_FLOAT && TARGET_VFP)
{
! int nregs = 0;
! /* We save regs in pairs. */
! /* A special insn for saving/restoring VFP registers. This does
! not have base+offset addressing modes, so we use IP to
! hold the address. Each block requires nregs*2+1 words. */
! start_reg = FIRST_VFP_REGNUM;
! /* Count how many blocks of registers need saving. */
! for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
! {
! if ((!regs_ever_live[reg] || call_used_regs[reg])
! && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
! {
! if (start_reg != reg)
! floats_offset += 4;
! start_reg = reg + 2;
! }
! else
! {
! floats_offset += 8;
! nregs++;
! }
! }
! if (start_reg != reg)
! floats_offset += 4;
! if (nregs > 0)
{
asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
FP_REGNUM, floats_offset - vfp_offset);
}
--- 9369,9383 ----
if (TARGET_HARD_FLOAT && TARGET_VFP)
{
! int saved_size;
! /* The fldmx insn does not have base+offset addressing modes,
! so we use IP to hold the address. */
! saved_size = arm_get_vfp_saved_size ();
! if (saved_size > 0)
{
+ floats_offset += saved_size;
asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
FP_REGNUM, floats_offset - vfp_offset);
}
*************** arm_output_epilogue (rtx sibling)
*** 9344,9363 ****
&& (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
{
if (start_reg != reg)
! {
! vfp_print_multi (f, "fldmfdx\t%r!", IP_REGNUM, "d%d",
! (start_reg - FIRST_VFP_REGNUM) / 2,
! (reg - start_reg) / 2);
! }
start_reg = reg + 2;
}
}
if (start_reg != reg)
! {
! vfp_print_multi (f, "fldmfdx\t%r!", IP_REGNUM, "d%d",
! (start_reg - FIRST_VFP_REGNUM) / 2,
! (reg - start_reg) / 2);
! }
}
if (TARGET_IWMMXT)
--- 9388,9403 ----
&& (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
{
if (start_reg != reg)
! arm_output_fldmx (f, IP_REGNUM,
! (start_reg - FIRST_VFP_REGNUM) / 2,
! (reg - start_reg) / 2);
start_reg = reg + 2;
}
}
if (start_reg != reg)
! arm_output_fldmx (f, IP_REGNUM,
! (start_reg - FIRST_VFP_REGNUM) / 2,
! (reg - start_reg) / 2);
}
if (TARGET_IWMMXT)
*************** arm_output_epilogue (rtx sibling)
*** 9478,9497 ****
&& (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
{
if (start_reg != reg)
! {
! vfp_print_multi (f, "fldmfdx\t%r!", SP_REGNUM, "d%d",
! (start_reg - FIRST_VFP_REGNUM) / 2,
! (reg - start_reg) / 2);
! }
start_reg = reg + 2;
}
}
if (start_reg != reg)
! {
! vfp_print_multi (f, "fldmfdx\t%r!", SP_REGNUM, "d%d",
! (start_reg - FIRST_VFP_REGNUM) / 2,
! (reg - start_reg) / 2);
! }
}
if (TARGET_IWMMXT)
for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
--- 9518,9533 ----
&& (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
{
if (start_reg != reg)
! arm_output_fldmx (f, SP_REGNUM,
! (start_reg - FIRST_VFP_REGNUM) / 2,
! (reg - start_reg) / 2);
start_reg = reg + 2;
}
}
if (start_reg != reg)
! arm_output_fldmx (f, SP_REGNUM,
! (start_reg - FIRST_VFP_REGNUM) / 2,
! (reg - start_reg) / 2);
}
if (TARGET_IWMMXT)
for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
*************** arm_get_frame_offsets (void)
*** 9855,9861 ****
struct arm_stack_offsets *offsets;
unsigned long func_type;
int leaf;
- bool new_block;
int saved;
HOST_WIDE_INT frame_size;
--- 9891,9896 ----
*************** arm_get_frame_offsets (void)
*** 9915,9941 ****
/* Space for saved VFP registers. */
if (TARGET_HARD_FLOAT && TARGET_VFP)
! {
! new_block = TRUE;
! for (regno = FIRST_VFP_REGNUM;
! regno < LAST_VFP_REGNUM;
! regno += 2)
! {
! if ((regs_ever_live[regno] && !call_used_regs[regno])
! || (regs_ever_live[regno + 1]
! && !call_used_regs[regno + 1]))
! {
! if (new_block)
! {
! saved += 4;
! new_block = FALSE;
! }
! saved += 8;
! }
! else
! new_block = TRUE;
! }
! }
}
}
else /* TARGET_THUMB */
--- 9950,9956 ----
/* Space for saved VFP registers. */
if (TARGET_HARD_FLOAT && TARGET_VFP)
! saved += arm_get_vfp_saved_size ();
}
}
else /* TARGET_THUMB */
*************** arm_expand_prologue (void)
*** 10317,10338 ****
&& (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
{
if (start_reg != reg)
! {
! insn = vfp_emit_fstmx (start_reg,
! (reg - start_reg) / 2);
! RTX_FRAME_RELATED_P (insn) = 1;
! saved_regs += (start_reg - reg) * 4 + 4;
! }
start_reg = reg + 2;
}
}
if (start_reg != reg)
! {
! insn = vfp_emit_fstmx (start_reg,
! (reg - start_reg) / 2);
! RTX_FRAME_RELATED_P (insn) = 1;
! saved_regs += (start_reg - reg) * 4 + 4;
! }
}
}
--- 10332,10345 ----
&& (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
{
if (start_reg != reg)
! saved_regs += vfp_emit_fstmx (start_reg,
! (reg - start_reg) / 2);
start_reg = reg + 2;
}
}
if (start_reg != reg)
! saved_regs += vfp_emit_fstmx (start_reg,
! (reg - start_reg) / 2);
}
}
More information about the Gcc-patches
mailing list