This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [RFC - ARM] - Fix PR43440 - Fix Neon inline asm register aliasing issues.
- From: Richard Earnshaw <rearnsha at arm dot com>
- To: ramana dot radhakrishnan at arm dot com
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 22 Mar 2010 14:34:44 +0000
- Subject: Re: [RFC - ARM] - Fix PR43440 - Fix Neon inline asm register aliasing issues.
- References: <alpine.DEB.2.00.1003200039160.9269@liliput>
On Sat, 2010-03-20 at 14:17 +0000, Ramana Radhakrishnan wrote:
> Hi,
>
> This is a bug that has come up a couple of times in the past few months
> and is one of the issues with writing inline assembler for Neon as
> described in the audit trail.
>
> This patch has the unhappy side effect of clobbering s0, s1 and s2
> if s3 is used because that's the only way we can indicate that q0 is
> clobbered by the write to s0. Unless we generate the modes for the
> register in the RTL clobber list or figure out a different way of
> representing this aliasing in the backend both of which I think are
> slightly invasive for stage4, we'll probably have to live with this. If
> anyone can think of a better and less invasive way of doing this, I'm all
> ears.
>
> I've added a testcase for this and will run this through a full
> regression test on arm-eabi for cortex-a8. With some other simple
> testcases that I've tried that exercises quite a bit of the code,
> I haven't seen any issues show up.
>
> Comments if any are welcome.
>
> Cheers
> Ramana
>
> * config/arm/arm.c (TARGET_MD_ASM_CLOBBERS): Define.
> (arm_get_clobbers_for_sregs): Likewise.
> (arm_get_clobbers_for_dregs): Likewise.
> (arm_get_clobbers_for_qregs): Likewise.
> (arm_md_asm_clobbers): Likewise.
>
> * gcc.target/arm/pr43440.c: New testcase.
>
Having discussed this with Ramana, I think there's a better way of doing
this which is easier to implement on other machines if needed as well.
It involves defining a new macro[1] that allows us do describe precisely
how many machine registers an alias clobbers.
* tm.texi (OVERLAPPING_REGISTER_NAMES): Document new macro.
* output.h (decode_reg_name_and_count): Declare.
* varasm.c (decode_reg_name_and_count): New function.
(decode_reg_name): Reimplement using decode_reg_name_and_count.
* reginfo.c (fix_register): Use decode_reg_name_and_count and
iterate over all regs used.
* stmt.c (expand_asm_operands): Likewise.
* arm/aout.h (OVERLAPPING_REGISTER_NAMES): Define.
(ADDITIONAL_REGISTER_NAMES): Remove aliases that overlap
multiple machine registers.
Comments please? OK to apply?
R.
[1] well, we could get away with changing the definition of
ADDITIONAL_REGISTER_NAMES, but I'd rather not as it would be somewhat of
a testing nightmare at this point in time.
*** config/arm/aout.h (revision 157592)
--- config/arm/aout.h (local)
***************
*** 163,193 ****
{"mvdx12", 39}, \
{"mvdx13", 40}, \
{"mvdx14", 41}, \
! {"mvdx15", 42}, \
! {"d0", 63}, {"q0", 63}, \
! {"d1", 65}, \
! {"d2", 67}, {"q1", 67}, \
! {"d3", 69}, \
! {"d4", 71}, {"q2", 71}, \
! {"d5", 73}, \
! {"d6", 75}, {"q3", 75}, \
! {"d7", 77}, \
! {"d8", 79}, {"q4", 79}, \
! {"d9", 81}, \
! {"d10", 83}, {"q5", 83}, \
! {"d11", 85}, \
! {"d12", 87}, {"q6", 87}, \
! {"d13", 89}, \
! {"d14", 91}, {"q7", 91}, \
! {"d15", 93}, \
! {"q8", 95}, \
! {"q9", 99}, \
! {"q10", 103}, \
! {"q11", 107}, \
! {"q12", 111}, \
! {"q13", 115}, \
! {"q14", 119}, \
! {"q15", 123} \
}
#endif
--- 163,207 ----
{"mvdx12", 39}, \
{"mvdx13", 40}, \
{"mvdx14", 41}, \
! {"mvdx15", 42} \
! }
! #endif
!
! #ifndef OVERLAPPING_REGISTER_NAMES
! #define OVERLAPPING_REGISTER_NAMES \
! { \
! {"d0", 63, 2}, \
! {"d1", 65, 2}, \
! {"d2", 67, 2}, \
! {"d3", 69, 2}, \
! {"d4", 71, 2}, \
! {"d5", 73, 2}, \
! {"d6", 75, 2}, \
! {"d7", 77, 2}, \
! {"d8", 79, 2}, \
! {"d9", 81, 2}, \
! {"d10", 83, 2}, \
! {"d11", 85, 2}, \
! {"d12", 87, 2}, \
! {"d13", 89, 2}, \
! {"d14", 91, 2}, \
! {"d15", 93, 2}, \
! {"q0", 63, 4}, \
! {"q1", 67, 4}, \
! {"q2", 71, 4}, \
! {"q3", 75, 4}, \
! {"q4", 79, 4}, \
! {"q5", 83, 4}, \
! {"q6", 87, 4}, \
! {"q7", 91, 4}, \
! {"q8", 95, 4}, \
! {"q9", 99, 4}, \
! {"q10", 103, 4}, \
! {"q11", 107, 4}, \
! {"q12", 111, 4}, \
! {"q13", 115, 4}, \
! {"q14", 119, 4}, \
! {"q15", 123, 4} \
}
#endif
*** output.h (revision 157592)
--- output.h (local)
*************** extern void emutls_finish (void);
*** 173,178 ****
--- 173,183 ----
Prefixes such as % are optional. */
extern int decode_reg_name (const char *);
+ /* Similar to decode_reg_name, but takes an extra parameter that is a
+ pointer to the number of (internal) registers described by the
+ external name. */
+ extern int decode_reg_name_and_count (const char *, int *);
+
extern void assemble_alias (tree, tree);
extern void default_assemble_visibility (tree, int);
*** reginfo.c (revision 157592)
--- reginfo.c (local)
*************** void
*** 797,832 ****
fix_register (const char *name, int fixed, int call_used)
{
int i;
/* Decode the name and update the primary form of
the register info. */
! if ((i = decode_reg_name (name)) >= 0)
{
! if ((i == STACK_POINTER_REGNUM
#ifdef HARD_FRAME_POINTER_REGNUM
! || i == HARD_FRAME_POINTER_REGNUM
#else
! || i == FRAME_POINTER_REGNUM
#endif
! )
! && (fixed == 0 || call_used == 0))
! {
! static const char * const what_option[2][2] = {
! { "call-saved", "call-used" },
! { "no-such-option", "fixed" }};
!
! error ("can't use '%s' as a %s register", name,
! what_option[fixed][call_used]);
! }
! else
! {
! fixed_regs[i] = fixed;
! call_used_regs[i] = call_used;
#ifdef CALL_REALLY_USED_REGISTERS
! if (fixed == 0)
! call_really_used_regs[i] = call_used;
#endif
}
}
else
--- 797,837 ----
fix_register (const char *name, int fixed, int call_used)
{
int i;
+ int reg, nregs;
/* Decode the name and update the primary form of
the register info. */
! if ((reg = decode_reg_name_and_count (name, &nregs)) >= 0)
{
! gcc_assert (nregs >= 1);
! for (i = reg; i < reg + nregs; i++)
! {
! if ((i == STACK_POINTER_REGNUM
#ifdef HARD_FRAME_POINTER_REGNUM
! || i == HARD_FRAME_POINTER_REGNUM
#else
! || i == FRAME_POINTER_REGNUM
#endif
! )
! && (fixed == 0 || call_used == 0))
! {
! static const char * const what_option[2][2] = {
! { "call-saved", "call-used" },
! { "no-such-option", "fixed" }};
!
! error ("can't use '%s' as a %s register", name,
! what_option[fixed][call_used]);
! }
! else
! {
! fixed_regs[i] = fixed;
! call_used_regs[i] = call_used;
#ifdef CALL_REALLY_USED_REGISTERS
! if (fixed == 0)
! call_really_used_regs[i] = call_used;
#endif
+ }
}
}
else
*** stmt.c (revision 157592)
--- stmt.c (local)
*************** expand_asm_operands (tree string, tree o
*** 684,696 ****
for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
{
const char *regname;
if (TREE_VALUE (tail) == error_mark_node)
return;
regname = TREE_STRING_POINTER (TREE_VALUE (tail));
! i = decode_reg_name (regname);
! if (i >= 0 || i == -4)
++nclobbers;
else if (i == -2)
error ("unknown register name %qs in %<asm%>", regname);
--- 684,697 ----
for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
{
const char *regname;
+ int nregs;
if (TREE_VALUE (tail) == error_mark_node)
return;
regname = TREE_STRING_POINTER (TREE_VALUE (tail));
! i = decode_reg_name_and_count (regname, &nregs);
! if (i == -4)
++nclobbers;
else if (i == -2)
error ("unknown register name %qs in %<asm%>", regname);
*************** expand_asm_operands (tree string, tree o
*** 698,711 ****
/* Mark clobbered registers. */
if (i >= 0)
{
! /* Clobbering the PIC register is an error. */
! if (i == (int) PIC_OFFSET_TABLE_REGNUM)
{
! error ("PIC register %qs clobbered in %<asm%>", regname);
! return;
! }
! SET_HARD_REG_BIT (clobbered_regs, i);
}
}
--- 699,719 ----
/* Mark clobbered registers. */
if (i >= 0)
{
! int reg;
!
! for (reg = i; reg < i + nregs; reg++)
{
! ++nclobbers;
!
! /* Clobbering the PIC register is an error. */
! if (reg == (int) PIC_OFFSET_TABLE_REGNUM)
! {
! error ("PIC register clobbered by %qs in %<asm%>", regname);
! return;
! }
! SET_HARD_REG_BIT (clobbered_regs, reg);
! }
}
}
*************** expand_asm_operands (tree string, tree o
*** 1026,1032 ****
for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
{
const char *regname = TREE_STRING_POINTER (TREE_VALUE (tail));
! int j = decode_reg_name (regname);
rtx clobbered_reg;
if (j < 0)
--- 1034,1041 ----
for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
{
const char *regname = TREE_STRING_POINTER (TREE_VALUE (tail));
! int reg, nregs;
! int j = decode_reg_name_and_count (regname, &nregs);
rtx clobbered_reg;
if (j < 0)
*************** expand_asm_operands (tree string, tree o
*** 1048,1077 ****
continue;
}
! /* Use QImode since that's guaranteed to clobber just one reg. */
! clobbered_reg = gen_rtx_REG (QImode, j);
! /* Do sanity check for overlap between clobbers and respectively
! input and outputs that hasn't been handled. Such overlap
! should have been detected and reported above. */
! if (!clobber_conflict_found)
! {
! int opno;
!
! /* We test the old body (obody) contents to avoid tripping
! over the under-construction body. */
! for (opno = 0; opno < noutputs; opno++)
! if (reg_overlap_mentioned_p (clobbered_reg, output_rtx[opno]))
! internal_error ("asm clobber conflict with output operand");
!
! for (opno = 0; opno < ninputs - ninout; opno++)
! if (reg_overlap_mentioned_p (clobbered_reg,
! ASM_OPERANDS_INPUT (obody, opno)))
! internal_error ("asm clobber conflict with input operand");
}
-
- XVECEXP (body, 0, i++)
- = gen_rtx_CLOBBER (VOIDmode, clobbered_reg);
}
if (nlabels > 0)
--- 1057,1095 ----
continue;
}
! for (reg = j; reg < j + nregs; reg++)
! {
! /* Use QImode since that's guaranteed to clobber just
! * one reg. */
! clobbered_reg = gen_rtx_REG (QImode, reg);
!
! /* Do sanity check for overlap between clobbers and
! respectively input and outputs that hasn't been
! handled. Such overlap should have been detected and
! reported above. */
! if (!clobber_conflict_found)
! {
! int opno;
!
! /* We test the old body (obody) contents to avoid
! tripping over the under-construction body. */
! for (opno = 0; opno < noutputs; opno++)
! if (reg_overlap_mentioned_p (clobbered_reg,
! output_rtx[opno]))
! internal_error
! ("asm clobber conflict with output operand");
!
! for (opno = 0; opno < ninputs - ninout; opno++)
! if (reg_overlap_mentioned_p (clobbered_reg,
! ASM_OPERANDS_INPUT (obody,
! opno)))
! internal_error
! ("asm clobber conflict with input operand");
! }
! XVECEXP (body, 0, i++)
! = gen_rtx_CLOBBER (VOIDmode, clobbered_reg);
}
}
if (nlabels > 0)
*** varasm.c (revision 157592)
--- varasm.c (local)
*************** set_user_assembler_name (tree decl, cons
*** 1043,1050 ****
Prefixes such as % are optional. */
int
! decode_reg_name (const char *asmspec)
{
if (asmspec != 0)
{
int i;
--- 1043,1053 ----
Prefixes such as % are optional. */
int
! decode_reg_name_and_count (const char *asmspec, int *pnregs)
{
+ /* Presume just one register is clobbered. */
+ *pnregs = 1;
+
if (asmspec != 0)
{
int i;
*************** decode_reg_name (const char *asmspec)
*** 1070,1075 ****
--- 1073,1097 ----
&& ! strcmp (asmspec, strip_reg_name (reg_names[i])))
return i;
+ #ifdef OVERLAPPING_REGISTER_NAMES
+ {
+ static const struct
+ {
+ const char *const name;
+ const int number;
+ const int nregs;
+ } table[] = OVERLAPPING_REGISTER_NAMES;
+
+ for (i = 0; i < (int) ARRAY_SIZE (table); i++)
+ if (table[i].name[0]
+ && ! strcmp (asmspec, table[i].name))
+ {
+ *pnregs = table[i].nregs;
+ return table[i].number;
+ }
+ }
+ #endif /* OVERLAPPING_REGISTER_NAMES */
+
#ifdef ADDITIONAL_REGISTER_NAMES
{
static const struct { const char *const name; const int number; } table[]
*************** decode_reg_name (const char *asmspec)
*** 1093,1098 ****
--- 1115,1128 ----
return -1;
}
+
+ int
+ decode_reg_name (const char *name)
+ {
+ int count;
+ return decode_reg_name_and_count (name, &count);
+ }
+
/* Return true if DECL's initializer is suitable for a BSS section. */
*** doc/tm.texi (revision 157592)
--- doc/tm.texi (local)
*************** registers, thus allowing the @code{asm}
*** 8307,8312 ****
--- 8307,8328 ----
to registers using alternate names.
@end defmac
+ @defmac OVERLAPPING_REGISTER_NAMES
+ If defined, a C initializer for an array of structures containing a
+ name, a register number and a count of the number of consecutive
+ machine registers the name overlaps. This macro defines additional
+ names for hard registers, thus allowing the @code{asm} option in
+ declarations to refer to registers using alternate names. Unlike
+ @code{ADDITIONAL_REGISTER_NAMES}, this macro should be used when the
+ register name implies multiple underlying registers.
+
+ This macro should be used when it is important that a clobber in an
+ @code{asm} statement clobbers all the underlying values implied by the
+ register name. For example, on ARM, clobbering the double-precision
+ VFP register ``d0'' implies clobbering both single-precision registers
+ ``s0'' and ``s1''.
+ @end defmac
+
@defmac ASM_OUTPUT_OPCODE (@var{stream}, @var{ptr})
Define this macro if you are using an unusual assembler that
requires different names for the machine instructions.