This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch] PR33552, match_asm_constraints failures and miscompiles
- From: Michael Matz <matz at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 27 Sep 2007 16:48:30 +0200 (CEST)
- Subject: [patch] PR33552, match_asm_constraints failures and miscompiles
Hi,
this patch fixes two bugs and changes another thing:
1) 4.3 can't compile glibc anymore on x86 with -O2 -fPIC , as
string-inlines.c get's reload failures because match_asm_constraints
generates a situation with one additional input operand, which reload
can't handle anymore
2) PR 33552 is about a miscompile of gmp, also caused by
match_asm_constraints, which doesn't check the validity of the
transform it does, in particular it overwrites an output operand
without checking that it isn't used also as input.
3) We now restrict ourself to only doing the transformation on REG_P
operands, based on an older discussion. We could also restrict us to
!MEM_P or to REG_P and subregs of REG_P, but the situation which this
pass is trying to solve in practice only seems to be with REG_P. This
let's us also not use copy_rtx.
A former version of this patch was bootstrapped and regtested on
ia64,ppc,x86_64 and i686. I've restarted the process with this patch.
There are two testcase, from Jakub's comment in PR33552, and one extracted
from glibc.
Ciao,
Michael.
PR rtl-optimization/33552
* function.c (match_asm_constraints_1): Check for overlap in
inputs and replace all occurences.
dg.target/i386/pr33552.c: New runtime test.
dg.target/i386/strinline.c: New compile time test.
Index: gcc/function.c
===================================================================
--- gcc/function.c (revision 128829)
+++ gcc/function.c (working copy)
@@ -5663,7 +5663,7 @@ match_asm_constraints_1 (rtx insn, rtx *
rtx input, output, insns;
const char *constraint = ASM_OPERANDS_INPUT_CONSTRAINT (op, i);
char *end;
- int match;
+ int match, j;
match = strtoul (constraint, &end, 10);
if (end == constraint)
@@ -5672,18 +5672,59 @@ match_asm_constraints_1 (rtx insn, rtx *
gcc_assert (match < noutputs);
output = SET_DEST (p_sets[match]);
input = RTVEC_ELT (inputs, i);
- if (rtx_equal_p (output, input)
+ /* Only do the transformation for pseudos. */
+ if (! REG_P (output)
+ || rtx_equal_p (output, input)
|| (GET_MODE (input) != VOIDmode
&& GET_MODE (input) != GET_MODE (output)))
continue;
+ /* We can't do anything if the output is also used as input,
+ as we're going to overwrite it. */
+ for (j = 0; j < ninputs; j++)
+ if (reg_overlap_mentioned_p (output, RTVEC_ELT (inputs, j)))
+ break;
+ if (j != ninputs)
+ continue;
+
start_sequence ();
- emit_move_insn (copy_rtx (output), input);
- RTVEC_ELT (inputs, i) = copy_rtx (output);
+ emit_move_insn (output, input);
insns = get_insns ();
end_sequence ();
-
emit_insn_before (insns, insn);
+
+ /* Now replace all mentions of the input with output. We can't
+ just replace the occurence in inputs[i], as the register might
+ also be used in some other input (or even in an address of an
+ output), which would mean possibly increasing the number of
+ inputs by one (namely 'output' in addition), which might pose
+ a too complicated problem for reload to solve. E.g. this situation:
+
+ asm ("" : "=r" (output), "=m" (input) : "0" (input))
+
+ Here 'input' is used in two occurences as input (once for the
+ input operand, once for the address in the second output operand).
+ If we would replace only the occurence of the input operand (to
+ make the matching) we would be left with this:
+
+ output = input
+ asm ("" : "=r" (output), "=m" (input) : "0" (output))
+
+ Now we suddenly have two different input values (containing the same
+ value, but different pseudos) where we formerly had only one.
+ With more complicated asms this might lead to reload failures
+ which wouldn't have happen without this pass. So, iterate over
+ all operands and replace all occurences of the register used. */
+ for (j = 0; j < noutputs; j++)
+ if (!rtx_equal_p (SET_DEST (p_sets[j]), output)
+ && reg_overlap_mentioned_p (input, SET_DEST (p_sets[j])))
+ SET_DEST (p_sets[j]) = replace_rtx (SET_DEST (p_sets[j]),
+ input, output);
+ for (j = 0; j < ninputs; j++)
+ if (reg_overlap_mentioned_p (input, RTVEC_ELT (inputs, j)))
+ RTVEC_ELT (inputs, j) = replace_rtx (RTVEC_ELT (inputs, j),
+ input, output);
+
changed = true;
}
/* PR rtl-optimization/33552 */
/* { dg-do run } */
/* { dg-options "-O2" } */
extern void abort (void);
void
__attribute__((noinline))
foo (unsigned long *wp, unsigned long *up, long un, unsigned long *vp)
{
long j;
unsigned long prod_low, prod_high;
unsigned long cy_dig;
unsigned long v_limb;
v_limb = vp[0];
cy_dig = 64;
for (j = un; j > 0; j--)
{
unsigned long u_limb, w_limb;
u_limb = *up++;
__asm__ (""
: "=r" (prod_low), "=r" (prod_high)
: "0" (u_limb), "1" (v_limb));
__asm__ ("mov %5, %1; add %5, %0"
: "=r" (cy_dig), "=&r" (w_limb)
: "0" (prod_high), "rm" (0), "1" (prod_low), "rm" (cy_dig));
*wp++ = w_limb;
}
}
int
main (void)
{
unsigned long wp[4];
unsigned long up[4] = { 0x1248, 0x248a, 0x1745, 0x1853 };
unsigned long vp = 0xdead;
foo (wp, up, 4, &vp);
if (wp[0] != 0x40 || wp[1] != 0xdeed || wp[2] != 0x1bd9a || wp[3] != 0x29c47)
abort ();
return 0;
}
/* { dg-do compile } */
/* { dg-options "-O2 -fPIC" } */
typedef unsigned int size_t;
char *
__mempcpy_by2 (char *__dest, __const char *__src, size_t __srclen)
{
register char *__tmp = __dest;
register unsigned long int __d0, __d1;
__asm__ __volatile__
("shrl $1,%3\n\t"
"jz 2f\n"
"1:\n\t"
"movl (%2),%0\n\t"
"leal 4(%2),%2\n\t"
"movl %0,(%1)\n\t"
"leal 4(%1),%1\n\t"
"decl %3\n\t"
"jnz 1b\n"
"2:\n\t"
"movw (%2),%w0\n\t"
"movw %w0,(%1)"
: "=&q" (__d0), "=r" (__tmp), "=&r" (__src), "=&r" (__d1),
"=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest)
: "1" (__tmp), "2" (__src), "3" (__srclen / 2),
"m" ( *(struct { __extension__ char __x[__srclen]; } *)__src)
: "cc");
return __tmp + 2;
}