This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC - ARM] - Fix PR43440 - Fix Neon inline asm register aliasing issues.


On Sat, 2010-03-20 at 14:17 +0000, Ramana Radhakrishnan wrote:
> Hi,
> 
> This is a bug that has come up a couple of times in the past few months 
> and is one of the issues with writing inline assembler for Neon as 
> described in the audit trail.
> 
> This patch has the unhappy side effect of clobbering s0, s1 and s2 
> if s3 is used because that's the only way we can indicate that q0 is 
> clobbered by the write to s0. Unless we generate the modes for the 
> register in the RTL clobber list  or figure out a different way of 
> representing this aliasing in the backend both of which I think are 
> slightly invasive for stage4, we'll probably have to live with this. If 
> anyone can think of a better and less invasive way of doing this, I'm all 
> ears.
> 
> I've added a testcase for this and will run this through a full 
> regression test on arm-eabi for cortex-a8. With some other simple 
> testcases that I've tried that exercises quite a bit of the code,
> I haven't seen any issues show up.
> 
> Comments if any are welcome.
> 
> Cheers
> Ramana
> 
>         * config/arm/arm.c (TARGET_MD_ASM_CLOBBERS): Define.
>          (arm_get_clobbers_for_sregs): Likewise.
>          (arm_get_clobbers_for_dregs): Likewise.
>          (arm_get_clobbers_for_qregs): Likewise.
>          (arm_md_asm_clobbers): Likewise.
> 
>         * gcc.target/arm/pr43440.c: New testcase.
> 


Having discussed this with Ramana, I think there's a better way of doing
this which is easier to implement on other machines if needed as well.
It involves defining a new macro[1] that allows us do describe precisely
how many machine registers an alias clobbers.


	* tm.texi (OVERLAPPING_REGISTER_NAMES): Document new macro.
	* output.h (decode_reg_name_and_count): Declare.
	* varasm.c (decode_reg_name_and_count): New function.
	(decode_reg_name): Reimplement using decode_reg_name_and_count.
	* reginfo.c (fix_register): Use decode_reg_name_and_count and 
	iterate over all regs used.
	* stmt.c (expand_asm_operands): Likewise.
	* arm/aout.h (OVERLAPPING_REGISTER_NAMES): Define.
	(ADDITIONAL_REGISTER_NAMES): Remove aliases that overlap
	multiple machine registers.

Comments please?  OK to apply?

R.

[1] well, we could get away with changing the definition of
ADDITIONAL_REGISTER_NAMES, but I'd rather not as it would be somewhat of
a testing nightmare at this point in time.
*** config/arm/aout.h	(revision 157592)
--- config/arm/aout.h	(local)
***************
*** 163,193 ****
    {"mvdx12", 39},				\
    {"mvdx13", 40},				\
    {"mvdx14", 41},				\
!   {"mvdx15", 42},				\
!   {"d0", 63}, {"q0", 63},			\
!   {"d1", 65},					\
!   {"d2", 67}, {"q1", 67},			\
!   {"d3", 69},					\
!   {"d4", 71}, {"q2", 71},			\
!   {"d5", 73},					\
!   {"d6", 75}, {"q3", 75},			\
!   {"d7", 77},					\
!   {"d8", 79}, {"q4", 79},			\
!   {"d9", 81},					\
!   {"d10", 83}, {"q5", 83},			\
!   {"d11", 85},					\
!   {"d12", 87}, {"q6", 87},			\
!   {"d13", 89},					\
!   {"d14", 91}, {"q7", 91},			\
!   {"d15", 93},					\
!   {"q8", 95},					\
!   {"q9", 99},					\
!   {"q10", 103},					\
!   {"q11", 107},					\
!   {"q12", 111},					\
!   {"q13", 115},					\
!   {"q14", 119},					\
!   {"q15", 123}					\
  }
  #endif
  
--- 163,207 ----
    {"mvdx12", 39},				\
    {"mvdx13", 40},				\
    {"mvdx14", 41},				\
!   {"mvdx15", 42}				\
! }
! #endif
! 
! #ifndef OVERLAPPING_REGISTER_NAMES
! #define OVERLAPPING_REGISTER_NAMES		\
! {						\
!   {"d0", 63, 2},				\
!   {"d1", 65, 2},				\
!   {"d2", 67, 2},				\
!   {"d3", 69, 2},				\
!   {"d4", 71, 2},				\
!   {"d5", 73, 2},				\
!   {"d6", 75, 2},				\
!   {"d7", 77, 2},				\
!   {"d8", 79, 2},				\
!   {"d9", 81, 2},				\
!   {"d10", 83, 2},				\
!   {"d11", 85, 2},				\
!   {"d12", 87, 2},				\
!   {"d13", 89, 2},				\
!   {"d14", 91, 2},				\
!   {"d15", 93, 2},				\
!   {"q0", 63, 4},				\
!   {"q1", 67, 4},				\
!   {"q2", 71, 4},				\
!   {"q3", 75, 4},				\
!   {"q4", 79, 4},				\
!   {"q5", 83, 4},				\
!   {"q6", 87, 4},				\
!   {"q7", 91, 4},				\
!   {"q8", 95, 4},				\
!   {"q9", 99, 4},				\
!   {"q10", 103, 4},				\
!   {"q11", 107, 4},				\
!   {"q12", 111, 4},				\
!   {"q13", 115, 4},				\
!   {"q14", 119, 4},				\
!   {"q15", 123, 4}				\
  }
  #endif
  
*** output.h	(revision 157592)
--- output.h	(local)
*************** extern void emutls_finish (void);
*** 173,178 ****
--- 173,183 ----
     Prefixes such as % are optional.  */
  extern int decode_reg_name (const char *);
  
+ /* Similar to decode_reg_name, but takes an extra parameter that is a
+    pointer to the number of (internal) registers described by the
+    external name.  */
+ extern int decode_reg_name_and_count (const char *, int *);
+ 
  extern void assemble_alias (tree, tree);
  
  extern void default_assemble_visibility (tree, int);
*** reginfo.c	(revision 157592)
--- reginfo.c	(local)
*************** void
*** 797,832 ****
  fix_register (const char *name, int fixed, int call_used)
  {
    int i;
  
    /* Decode the name and update the primary form of
       the register info.  */
  
!   if ((i = decode_reg_name (name)) >= 0)
      {
!       if ((i == STACK_POINTER_REGNUM
  #ifdef HARD_FRAME_POINTER_REGNUM
! 	   || i == HARD_FRAME_POINTER_REGNUM
  #else
! 	   || i == FRAME_POINTER_REGNUM
  #endif
! 	   )
! 	  && (fixed == 0 || call_used == 0))
! 	{
! 	  static const char * const what_option[2][2] = {
! 	    { "call-saved", "call-used" },
! 	    { "no-such-option", "fixed" }};
! 
! 	  error ("can't use '%s' as a %s register", name,
! 		 what_option[fixed][call_used]);
! 	}
!       else
! 	{
! 	  fixed_regs[i] = fixed;
! 	  call_used_regs[i] = call_used;
  #ifdef CALL_REALLY_USED_REGISTERS
! 	  if (fixed == 0)
! 	    call_really_used_regs[i] = call_used;
  #endif
  	}
      }
    else
--- 797,837 ----
  fix_register (const char *name, int fixed, int call_used)
  {
    int i;
+   int reg, nregs;
  
    /* Decode the name and update the primary form of
       the register info.  */
  
!   if ((reg = decode_reg_name_and_count (name, &nregs)) >= 0)
      {
!       gcc_assert (nregs >= 1);
!       for (i = reg; i < reg + nregs; i++)
! 	{
! 	  if ((i == STACK_POINTER_REGNUM
  #ifdef HARD_FRAME_POINTER_REGNUM
! 	       || i == HARD_FRAME_POINTER_REGNUM
  #else
! 	       || i == FRAME_POINTER_REGNUM
  #endif
! 	       )
! 	      && (fixed == 0 || call_used == 0))
! 	    {
! 	      static const char * const what_option[2][2] = {
! 		{ "call-saved", "call-used" },
! 		{ "no-such-option", "fixed" }};
! 
! 	      error ("can't use '%s' as a %s register", name,
! 		     what_option[fixed][call_used]);
! 	    }
! 	  else
! 	    {
! 	      fixed_regs[i] = fixed;
! 	      call_used_regs[i] = call_used;
  #ifdef CALL_REALLY_USED_REGISTERS
! 	      if (fixed == 0)
! 		call_really_used_regs[i] = call_used;
  #endif
+ 	    }
  	}
      }
    else
*** stmt.c	(revision 157592)
--- stmt.c	(local)
*************** expand_asm_operands (tree string, tree o
*** 684,696 ****
    for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
      {
        const char *regname;
  
        if (TREE_VALUE (tail) == error_mark_node)
  	return;
        regname = TREE_STRING_POINTER (TREE_VALUE (tail));
  
!       i = decode_reg_name (regname);
!       if (i >= 0 || i == -4)
  	++nclobbers;
        else if (i == -2)
  	error ("unknown register name %qs in %<asm%>", regname);
--- 684,697 ----
    for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
      {
        const char *regname;
+       int nregs;
  
        if (TREE_VALUE (tail) == error_mark_node)
  	return;
        regname = TREE_STRING_POINTER (TREE_VALUE (tail));
  
!       i = decode_reg_name_and_count (regname, &nregs);
!       if (i == -4)
  	++nclobbers;
        else if (i == -2)
  	error ("unknown register name %qs in %<asm%>", regname);
*************** expand_asm_operands (tree string, tree o
*** 698,711 ****
        /* Mark clobbered registers.  */
        if (i >= 0)
          {
! 	  /* Clobbering the PIC register is an error.  */
! 	  if (i == (int) PIC_OFFSET_TABLE_REGNUM)
  	    {
! 	      error ("PIC register %qs clobbered in %<asm%>", regname);
! 	      return;
! 	    }
  
! 	  SET_HARD_REG_BIT (clobbered_regs, i);
  	}
      }
  
--- 699,719 ----
        /* Mark clobbered registers.  */
        if (i >= 0)
          {
! 	  int reg;
! 
! 	  for (reg = i; reg < i + nregs; reg++)
  	    {
! 	      ++nclobbers;
! 
! 	      /* Clobbering the PIC register is an error.  */
! 	      if (reg == (int) PIC_OFFSET_TABLE_REGNUM)
! 		{
! 		  error ("PIC register clobbered by %qs in %<asm%>", regname);
! 		  return;
! 		}
  
! 	      SET_HARD_REG_BIT (clobbered_regs, reg);
! 	    }
  	}
      }
  
*************** expand_asm_operands (tree string, tree o
*** 1026,1032 ****
        for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
  	{
  	  const char *regname = TREE_STRING_POINTER (TREE_VALUE (tail));
! 	  int j = decode_reg_name (regname);
  	  rtx clobbered_reg;
  
  	  if (j < 0)
--- 1034,1041 ----
        for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
  	{
  	  const char *regname = TREE_STRING_POINTER (TREE_VALUE (tail));
! 	  int reg, nregs;
! 	  int j = decode_reg_name_and_count (regname, &nregs);
  	  rtx clobbered_reg;
  
  	  if (j < 0)
*************** expand_asm_operands (tree string, tree o
*** 1048,1077 ****
  	      continue;
  	    }
  
! 	  /* Use QImode since that's guaranteed to clobber just one reg.  */
! 	  clobbered_reg = gen_rtx_REG (QImode, j);
  
! 	  /* Do sanity check for overlap between clobbers and respectively
! 	     input and outputs that hasn't been handled.  Such overlap
! 	     should have been detected and reported above.  */
! 	  if (!clobber_conflict_found)
! 	    {
! 	      int opno;
! 
! 	      /* We test the old body (obody) contents to avoid tripping
! 		 over the under-construction body.  */
! 	      for (opno = 0; opno < noutputs; opno++)
! 		if (reg_overlap_mentioned_p (clobbered_reg, output_rtx[opno]))
! 		  internal_error ("asm clobber conflict with output operand");
! 
! 	      for (opno = 0; opno < ninputs - ninout; opno++)
! 		if (reg_overlap_mentioned_p (clobbered_reg,
! 					     ASM_OPERANDS_INPUT (obody, opno)))
! 		  internal_error ("asm clobber conflict with input operand");
  	    }
- 
- 	  XVECEXP (body, 0, i++)
- 	    = gen_rtx_CLOBBER (VOIDmode, clobbered_reg);
  	}
  
        if (nlabels > 0)
--- 1057,1095 ----
  	      continue;
  	    }
  
! 	  for (reg = j; reg < j + nregs; reg++)
! 	    {
! 	      /* Use QImode since that's guaranteed to clobber just
! 	       * one reg.  */
! 	      clobbered_reg = gen_rtx_REG (QImode, reg);
! 
! 	      /* Do sanity check for overlap between clobbers and
! 		 respectively input and outputs that hasn't been
! 		 handled.  Such overlap should have been detected and
! 		 reported above.  */
! 	      if (!clobber_conflict_found)
! 		{
! 		  int opno;
! 
! 		  /* We test the old body (obody) contents to avoid
! 		     tripping over the under-construction body.  */
! 		  for (opno = 0; opno < noutputs; opno++)
! 		    if (reg_overlap_mentioned_p (clobbered_reg,
! 						 output_rtx[opno]))
! 		      internal_error
! 			("asm clobber conflict with output operand");
! 
! 		  for (opno = 0; opno < ninputs - ninout; opno++)
! 		    if (reg_overlap_mentioned_p (clobbered_reg,
! 						 ASM_OPERANDS_INPUT (obody,
! 								     opno)))
! 		      internal_error
! 			("asm clobber conflict with input operand");
! 		}
  
! 	      XVECEXP (body, 0, i++)
! 		= gen_rtx_CLOBBER (VOIDmode, clobbered_reg);
  	    }
  	}
  
        if (nlabels > 0)
*** varasm.c	(revision 157592)
--- varasm.c	(local)
*************** set_user_assembler_name (tree decl, cons
*** 1043,1050 ****
     Prefixes such as % are optional.  */
  
  int
! decode_reg_name (const char *asmspec)
  {
    if (asmspec != 0)
      {
        int i;
--- 1043,1053 ----
     Prefixes such as % are optional.  */
  
  int
! decode_reg_name_and_count (const char *asmspec, int *pnregs)
  {
+   /* Presume just one register is clobbered.  */
+   *pnregs = 1;
+ 
    if (asmspec != 0)
      {
        int i;
*************** decode_reg_name (const char *asmspec)
*** 1070,1075 ****
--- 1073,1097 ----
  	    && ! strcmp (asmspec, strip_reg_name (reg_names[i])))
  	  return i;
  
+ #ifdef OVERLAPPING_REGISTER_NAMES
+       {
+ 	static const struct
+ 	{
+ 	  const char *const name;
+ 	  const int number;
+ 	  const int nregs;
+ 	} table[] = OVERLAPPING_REGISTER_NAMES;
+ 
+ 	for (i = 0; i < (int) ARRAY_SIZE (table); i++)
+ 	  if (table[i].name[0]
+ 	      && ! strcmp (asmspec, table[i].name))
+ 	    {
+ 	      *pnregs = table[i].nregs;
+ 	      return table[i].number;
+ 	    }
+       }
+ #endif /* OVERLAPPING_REGISTER_NAMES */
+ 
  #ifdef ADDITIONAL_REGISTER_NAMES
        {
  	static const struct { const char *const name; const int number; } table[]
*************** decode_reg_name (const char *asmspec)
*** 1093,1098 ****
--- 1115,1128 ----
  
    return -1;
  }
+ 
+ int
+ decode_reg_name (const char *name)
+ {
+   int count;
+   return decode_reg_name_and_count (name, &count);
+ }
+ 
  
  /* Return true if DECL's initializer is suitable for a BSS section.  */
  
*** doc/tm.texi	(revision 157592)
--- doc/tm.texi	(local)
*************** registers, thus allowing the @code{asm} 
*** 8307,8312 ****
--- 8307,8328 ----
  to registers using alternate names.
  @end defmac
  
+ @defmac OVERLAPPING_REGISTER_NAMES
+ If defined, a C initializer for an array of structures containing a
+ name, a register number and a count of the number of consecutive
+ machine registers the name overlaps.  This macro defines additional
+ names for hard registers, thus allowing the @code{asm} option in
+ declarations to refer to registers using alternate names.  Unlike
+ @code{ADDITIONAL_REGISTER_NAMES}, this macro should be used when the
+ register name implies multiple underlying registers.
+ 
+ This macro should be used when it is important that a clobber in an
+ @code{asm} statement clobbers all the underlying values implied by the
+ register name.  For example, on ARM, clobbering the double-precision
+ VFP register ``d0'' implies clobbering both single-precision registers
+ ``s0'' and ``s1''.
+ @end defmac
+ 
  @defmac ASM_OUTPUT_OPCODE (@var{stream}, @var{ptr})
  Define this macro if you are using an unusual assembler that
  requires different names for the machine instructions.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]