[arm] Fix for VFP errata

Paul Brook paul@codesourcery.com
Thu Mar 25 00:13:00 GMT 2004


The following patch works around the ARM10 VFPr1 store-multiple errata.

Ok?

Paul

2004-03-25  Paul Brook  <paul@codesourcery.com>

	* arm.c (vfp_print_multi): Remove.
	(arm_output_fldmx): New function.
	(vfp_emit_fstmx): Return block size, not insn. Add ARM10 VFPr1 bugfix.
	(arm_expand_prologue): Update to match.
	(arm_get_vfp_saved_size): New Function.
	(arm_get_frame_offsets): Use it.
	(arm_output_epilogue): Use new functions.

Index: arm.c
===================================================================
RCS file: /var/cvsroot/gcc-cvs/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.340
diff -c -p -r1.340 arm.c
*** a/arm.c	24 Mar 2004 17:20:13 -0000	1.340
--- b/arm.c	25 Mar 2004 00:09:35 -0000
*************** print_multi_reg (FILE *stream, const cha
*** 7809,7837 ****
  }
  
  
! /* Output the operands of a FLDM/FSTM instruction to STREAM.
!    REG is the base register,
!    INSTR is the possibly suffixed load or store instruction.
!    FMT specifies now to print the register name.
!    START and COUNT specify the register range.  */
  
  static void
! vfp_print_multi (FILE *stream, const char *instr, int reg,
! 		 const char * fmt, int start, int count)
  {
    int i;
  
    fputc ('\t', stream);
!   asm_fprintf (stream, instr, reg);
!   fputs (", {", stream);
  
!   for (i = start; i < start + count; i++)
      {
!       if (i > start)
  	fputs (", ", stream);
!       asm_fprintf (stream, fmt, i);
      }
    fputs ("}\n", stream);
  }
  
  
--- 7809,7843 ----
  }
  
  
! /* Output a FLDMX instruction to STREAM.
!    BASE if the register containing the address.
!    REG and COUNT specify the register range.
!    Extra registers may be added to avoid hardware bugs.  */
  
  static void
! arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
  {
    int i;
  
+   /* Workaround ARM10 VFPr1 bug.  */
+   if (count == 2 && !arm_arch6)
+     {
+       if (reg == 15)
+ 	reg--;
+       count++;
+     }
+ 
    fputc ('\t', stream);
!   asm_fprintf (stream, "fldmfdx\t%r!, {", base);
  
!   for (i = reg; i < reg + count; i++)
      {
!       if (i > reg)
  	fputs (", ", stream);
!       asm_fprintf (stream, "d%d", i);
      }
    fputs ("}\n", stream);
+ 
  }
  
  
*************** vfp_output_fstmx (rtx * operands)
*** 7863,7871 ****
  }
  
  
! /* Emit RTL to save block of VFP register pairs to the stack.  */
  
! static rtx
  vfp_emit_fstmx (int base_reg, int count)
  {
    rtx par;
--- 7869,7878 ----
  }
  
  
! /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
!    number of bytes pushed.  */
  
! static int
  vfp_emit_fstmx (int base_reg, int count)
  {
    rtx par;
*************** vfp_emit_fstmx (int base_reg, int count)
*** 7873,7878 ****
--- 7880,7895 ----
    rtx tmp, reg;
    int i;
  
+   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
+      register pairs are stored by a store multiple insn.  We avoid this
+      by pushing an extra pair.  */
+   if (count == 2 && !arm_arch6)
+     {
+       if (base_reg == LAST_VFP_REGNUM - 3)
+ 	base_reg -= 2;
+       count++;
+     }
+ 
    /* ??? The frame layout is implementation defined.  We describe
       standard format 1 (equivalent to a FSTMD insn and unused pad word).
       We really need some way of representing the whole block so that the
*************** vfp_emit_fstmx (int base_reg, int count)
*** 7922,7928 ****
    par = emit_insn (par);
    REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
  				       REG_NOTES (par));
!   return par;
  }
  
  
--- 7939,7947 ----
    par = emit_insn (par);
    REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
  				       REG_NOTES (par));
!   RTX_FRAME_RELATED_P (par) = 1;
! 
!   return count * 8 + 4;
  }
  
  
*************** arm_compute_save_reg_mask (void)
*** 8864,8869 ****
--- 8883,8932 ----
    return save_reg_mask;
  }
  
+ 
+ /* Return the number of bytes required to save VFP registers.  */
+ static int
+ arm_get_vfp_saved_size (void)
+ {
+   unsigned int regno;
+   int count;
+   int saved;
+ 
+   saved = 0;
+   /* Space for saved VFP registers.  */
+   if (TARGET_HARD_FLOAT && TARGET_VFP)
+     {
+       count = 0;
+       for (regno = FIRST_VFP_REGNUM;
+ 	   regno < LAST_VFP_REGNUM;
+ 	   regno += 2)
+ 	{
+ 	  if ((!regs_ever_live[regno] || call_used_regs[regno])
+ 	      && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
+ 	    {
+ 	      if (count > 0)
+ 		{
+ 		  /* Workaround ARM10 VFPr1 bug.  */
+ 		  if (count == 2 && !arm_arch6)
+ 		    count++;
+ 		  saved += count * 8 + 4;
+ 		}
+ 	      count = 0;
+ 	    }
+ 	  else
+ 	    count++;
+ 	}
+       if (count > 0)
+ 	{
+ 	  if (count == 2 && !arm_arch6)
+ 	    count++;
+ 	  saved += count * 8 + 4;
+ 	}
+     }
+   return saved;
+ }
+ 
+ 
  /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
     everything bar the final return instruction.  */
  const char *
*************** arm_output_epilogue (rtx sibling)
*** 9306,9339 ****
  
        if (TARGET_HARD_FLOAT && TARGET_VFP)
  	{
! 	  int nregs = 0;
  
! 	  /* We save regs in pairs.  */
! 	  /* A special insn for saving/restoring VFP registers.  This does
! 	     not have base+offset addressing modes, so we use IP to
! 	     hold the address.  Each block requires nregs*2+1 words.  */
! 	  start_reg = FIRST_VFP_REGNUM;
! 	  /* Count how many blocks of registers need saving.  */
! 	  for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
! 	    {
! 	      if ((!regs_ever_live[reg] || call_used_regs[reg])
! 		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
! 		{
! 		  if (start_reg != reg)
! 		    floats_offset += 4;
! 		  start_reg = reg + 2;
! 		}
! 	      else
! 		{
! 		  floats_offset += 8;
! 		  nregs++;
! 		}
! 	    }
! 	  if (start_reg != reg)
! 	    floats_offset += 4;
  
! 	  if (nregs > 0)
  	    {
  	      asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
  			   FP_REGNUM, floats_offset - vfp_offset);
  	    }
--- 9369,9383 ----
  
        if (TARGET_HARD_FLOAT && TARGET_VFP)
  	{
! 	  int saved_size;
  
! 	  /* The fldmx insn does not have base+offset addressing modes,
! 	     so we use IP to hold the address.  */
! 	  saved_size = arm_get_vfp_saved_size ();
  
! 	  if (saved_size > 0)
  	    {
+ 	      floats_offset += saved_size;
  	      asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
  			   FP_REGNUM, floats_offset - vfp_offset);
  	    }
*************** arm_output_epilogue (rtx sibling)
*** 9344,9363 ****
  		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
  		{
  		  if (start_reg != reg)
! 		    {
! 		      vfp_print_multi (f, "fldmfdx\t%r!", IP_REGNUM, "d%d",
! 				       (start_reg - FIRST_VFP_REGNUM) / 2,
! 				       (reg - start_reg) / 2);
! 		    }
  		  start_reg = reg + 2;
  		}
  	    }
  	  if (start_reg != reg)
! 	    {
! 	      vfp_print_multi (f, "fldmfdx\t%r!", IP_REGNUM, "d%d",
! 			       (start_reg - FIRST_VFP_REGNUM) / 2,
! 			       (reg - start_reg) / 2);
! 	    }
  	}
  
        if (TARGET_IWMMXT)
--- 9388,9403 ----
  		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
  		{
  		  if (start_reg != reg)
! 		    arm_output_fldmx (f, IP_REGNUM,
! 				      (start_reg - FIRST_VFP_REGNUM) / 2,
! 				      (reg - start_reg) / 2);
  		  start_reg = reg + 2;
  		}
  	    }
  	  if (start_reg != reg)
! 	    arm_output_fldmx (f, IP_REGNUM,
! 			      (start_reg - FIRST_VFP_REGNUM) / 2,
! 			      (reg - start_reg) / 2);
  	}
  
        if (TARGET_IWMMXT)
*************** arm_output_epilogue (rtx sibling)
*** 9478,9497 ****
  		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
  		{
  		  if (start_reg != reg)
! 		    {
! 		      vfp_print_multi (f, "fldmfdx\t%r!", SP_REGNUM, "d%d",
! 				       (start_reg - FIRST_VFP_REGNUM) / 2,
! 				       (reg - start_reg) / 2);
! 		    }
  		  start_reg = reg + 2;
  		}
  	    }
  	  if (start_reg != reg)
! 	    {
! 	      vfp_print_multi (f, "fldmfdx\t%r!", SP_REGNUM, "d%d",
! 			       (start_reg - FIRST_VFP_REGNUM) / 2,
! 			       (reg - start_reg) / 2);
! 	    }
  	}
        if (TARGET_IWMMXT)
  	for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
--- 9518,9533 ----
  		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
  		{
  		  if (start_reg != reg)
! 		    arm_output_fldmx (f, SP_REGNUM,
! 				      (start_reg - FIRST_VFP_REGNUM) / 2,
! 				      (reg - start_reg) / 2);
  		  start_reg = reg + 2;
  		}
  	    }
  	  if (start_reg != reg)
! 	    arm_output_fldmx (f, SP_REGNUM,
! 			      (start_reg - FIRST_VFP_REGNUM) / 2,
! 			      (reg - start_reg) / 2);
  	}
        if (TARGET_IWMMXT)
  	for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
*************** arm_get_frame_offsets (void)
*** 9855,9861 ****
    struct arm_stack_offsets *offsets;
    unsigned long func_type;
    int leaf;
-   bool new_block;
    int saved;
    HOST_WIDE_INT frame_size;
  
--- 9891,9896 ----
*************** arm_get_frame_offsets (void)
*** 9915,9941 ****
  
  	  /* Space for saved VFP registers.  */
  	  if (TARGET_HARD_FLOAT && TARGET_VFP)
! 	    {
! 	      new_block = TRUE;
! 	      for (regno = FIRST_VFP_REGNUM;
! 		   regno < LAST_VFP_REGNUM;
! 		   regno += 2)
! 		{
! 		  if ((regs_ever_live[regno] && !call_used_regs[regno])
! 		      || (regs_ever_live[regno + 1]
! 			  && !call_used_regs[regno + 1]))
! 		    {
! 		      if (new_block)
! 			{
! 			  saved += 4;
! 			  new_block = FALSE;
! 			}
! 		      saved += 8;
! 		    }
! 		  else
! 		    new_block = TRUE;
! 		}
! 	    }
  	}
      }
    else /* TARGET_THUMB */
--- 9950,9956 ----
  
  	  /* Space for saved VFP registers.  */
  	  if (TARGET_HARD_FLOAT && TARGET_VFP)
! 	    saved += arm_get_vfp_saved_size ();
  	}
      }
    else /* TARGET_THUMB */
*************** arm_expand_prologue (void)
*** 10317,10338 ****
  		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
  		{
  		  if (start_reg != reg)
! 		    {
! 		      insn = vfp_emit_fstmx (start_reg,
! 					    (reg - start_reg) / 2);
! 		      RTX_FRAME_RELATED_P (insn) = 1;
! 		      saved_regs += (start_reg - reg) * 4 + 4;
! 		    }
  		  start_reg = reg + 2;
  		}
  	    }
  	  if (start_reg != reg)
! 	    {
! 	      insn = vfp_emit_fstmx (start_reg,
! 				    (reg - start_reg) / 2);
! 	      RTX_FRAME_RELATED_P (insn) = 1;
! 	      saved_regs += (start_reg - reg) * 4 + 4;
! 	    }
  	}
      }
  
--- 10332,10345 ----
  		  && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
  		{
  		  if (start_reg != reg)
! 		    saved_regs += vfp_emit_fstmx (start_reg,
! 						  (reg - start_reg) / 2);
  		  start_reg = reg + 2;
  		}
  	    }
  	  if (start_reg != reg)
! 	    saved_regs += vfp_emit_fstmx (start_reg,
! 					  (reg - start_reg) / 2);
  	}
      }
  



More information about the Gcc-patches mailing list