This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Fix reload failures on inline asm from mplayer SVN (PR rtl-optimization/39543, take 2)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Steven Bosscher <stevenb dot gcc at gmail dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 27 Mar 2009 12:25:54 +0100
- Subject: [PATCH] Fix reload failures on inline asm from mplayer SVN (PR rtl-optimization/39543, take 2)
- References: <571f6b510903260952q29560dbaha96a3ee5405c145@mail.gmail.com>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Thu, Mar 26, 2009 at 05:52:16PM +0100, Steven Bosscher wrote:
> [ xf. http://gcc.gnu.org/ml/gcc-patches/2009-03/msg01315.html ]
>
> > +static int
> > +check_reg_count_callback (rtx *px, void *data)
>
> Misses comment before the function.
Function gone.
> > +{
> > + int *regnop = (int *) data;
> > +
> > + if (!REG_P (*px))
> > + return 0;
> > +
> > + if (*regnop < 0 || *regnop == (int) REGNO (*px))
>
> Eh, when can you have *regnop < 0?
As Paolo already said, this was being initialized to -1.
> > +/* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */
> > +
> > +static bool
> > +forward_propagate_asm (df_ref use, rtx def_set, rtx reg)
>
> Where is DEF_INSN? You mean DEF_SET in the comment, I think.
I'm now passing also def_insn.
> > + /* In __asm don't replace if src might need more registers than
> > + reg, as that could increase register pressure on the __asm. */
> > + regno = -1;
> > + if (for_each_rtx (&src, check_reg_count_callback, ®no) > 0)
> > + return false;
>
> Can you use DF_INSN_USES of DEF_SET instead here? All for_each_rtx and
> note_stores should die in places where the DF cache can use :-)
Yep, that works too. Updated patch, bootstrapped/regtested on x86_64-linux
and i686-linux. Ok for trunk?
2009-03-27 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/39543
* fwprop.c (forward_propagate_asm): New function.
(forward_propagate_and_simplify): Propagate also into __asm, if it
doesn't increase the number of referenced registers.
* gcc.target/i386/pr39543-1.c: New test.
* gcc.target/i386/pr39543-2.c: New test.
* gcc.target/i386/pr39543-3.c: New test.
--- gcc/fwprop.c.jj 2009-03-27 07:55:33.000000000 +0100
+++ gcc/fwprop.c 2009-03-27 10:00:48.000000000 +0100
@@ -1,5 +1,5 @@
/* RTL-based forward propagation pass for GNU compiler.
- Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
Contributed by Paolo Bonzini and Steven Bosscher.
This file is part of GCC.
@@ -852,6 +852,73 @@ forward_propagate_subreg (df_ref use, rt
return false;
}
+/* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */
+
+static bool
+forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
+{
+ rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
+ int speed_p, i;
+ df_ref *use_vec;
+
+ gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
+
+ src = SET_SRC (def_set);
+ use_pat = PATTERN (use_insn);
+
+ /* In __asm don't replace if src might need more registers than
+ reg, as that could increase register pressure on the __asm. */
+ use_vec = DF_INSN_USES (def_insn);
+ if (use_vec[0] && use_vec[1])
+ return false;
+
+ speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
+ asm_operands = NULL_RTX;
+ switch (GET_CODE (use_pat))
+ {
+ case ASM_OPERANDS:
+ asm_operands = use_pat;
+ break;
+ case SET:
+ loc = &SET_DEST (use_pat);
+ new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
+ if (new_rtx)
+ validate_unshare_change (use_insn, loc, new_rtx, true);
+ asm_operands = SET_SRC (use_pat);
+ break;
+ case PARALLEL:
+ for (i = 0; i < XVECLEN (use_pat, 0); i++)
+ if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
+ {
+ loc = &SET_DEST (XVECEXP (use_pat, 0, i));
+ new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
+ if (new_rtx)
+ validate_unshare_change (use_insn, loc, new_rtx, true);
+ asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
+ }
+ else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
+ asm_operands = XVECEXP (use_pat, 0, i);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
+ for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
+ {
+ loc = &ASM_OPERANDS_INPUT (asm_operands, i);
+ new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
+ if (new_rtx)
+ validate_unshare_change (use_insn, loc, new_rtx, true);
+ }
+
+ if (num_changes_pending () == 0 || !apply_change_group ())
+ return false;
+
+ num_changes++;
+ return true;
+}
+
/* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
result. */
@@ -863,12 +930,16 @@ forward_propagate_and_simplify (df_ref u
rtx src, reg, new_rtx, *loc;
bool set_reg_equal;
enum machine_mode mode;
+ int asm_use = -1;
+
+ if (INSN_CODE (use_insn) < 0)
+ asm_use = asm_noperands (PATTERN (use_insn));
- if (!use_set)
+ if (!use_set && asm_use < 0)
return false;
/* Do not propagate into PC, CC0, etc. */
- if (GET_MODE (SET_DEST (use_set)) == VOIDmode)
+ if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
return false;
/* If def and use are subreg, check if they match. */
@@ -900,7 +971,7 @@ forward_propagate_and_simplify (df_ref u
if (MEM_P (src) && MEM_READONLY_P (src))
{
rtx x = avoid_constant_pool_reference (src);
- if (x != src)
+ if (x != src && use_set)
{
rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
@@ -911,6 +982,9 @@ forward_propagate_and_simplify (df_ref u
return false;
}
+ if (asm_use >= 0)
+ return forward_propagate_asm (use, def_insn, def_set, reg);
+
/* Else try simplifying. */
if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
--- gcc/testsuite/gcc.target/i386/pr39543-1.c.jj 2009-03-25 16:40:18.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr39543-1.c 2009-03-25 16:40:50.000000000 +0100
@@ -0,0 +1,52 @@
+/* PR rtl-optimization/39543 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fomit-frame-pointer" } */
+
+float __attribute__ ((aligned (16))) s0[128];
+const float s1 = 0.707;
+float s2[8] __attribute__ ((aligned (16)));
+float s3[8] __attribute__ ((aligned (16)));
+float s4[16] __attribute__ ((aligned (16)));
+float s5[16] __attribute__ ((aligned (16)));
+
+void
+foo (int k, float *x, float *y, const float *d, const float *z)
+{
+ float *a, *b, *c, *e;
+
+ a = x + 2 * k;
+ b = a + 2 * k;
+ c = b + 2 * k;
+ e = y + 2 * k;
+ __asm__ volatile (""
+ : "=m" (x[0]), "=m" (b[0]), "=m" (a[0]), "=m" (c[0])
+ : "m" (y[0]), "m" (y[k * 2]), "m" (x[0]), "m" (a[0])
+ : "memory");
+ for (;;)
+ {
+ __asm__ volatile (""
+ :
+ : "m" (y[2]), "m" (d[2]), "m" (e[2]), "m" (z[2])
+ : "memory");
+ if (!--k)
+ break;
+ }
+ __asm__ volatile (""
+ : "=m" (x[2]), "=m" (x[10]), "=m" (x[6]), "=m" (x[14])
+ : "m" (y[2]), "m" (y[6]), "m" (x[2]), "m" (x[6]),
+ "m" (y[18]), "m" (s1)
+ : "memory");
+}
+
+void
+bar (float *a)
+{
+ foo (4, a, a + 16, s2, s3);
+ foo (8, a, a + 32, s4, s5);
+}
+
+void
+baz (void)
+{
+ bar (s0);
+}
--- gcc/testsuite/gcc.target/i386/pr39543-2.c.jj 2009-03-25 16:40:18.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr39543-2.c 2009-03-25 16:40:38.000000000 +0100
@@ -0,0 +1,51 @@
+/* PR rtl-optimization/39543 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+float __attribute__ ((aligned (16))) s0[128];
+const float s1 = 0.707;
+float s2[8] __attribute__ ((aligned (16)));
+float s3[8] __attribute__ ((aligned (16)));
+float s4[16] __attribute__ ((aligned (16)));
+float s5[16] __attribute__ ((aligned (16)));
+
+void
+foo (int k, float *x, float *y, const float *d, const float *z)
+{
+ float *a, *b, *c, *e;
+
+ a = x + 2 * k;
+ b = a + 2 * k;
+ c = b + 2 * k;
+ e = y + 2 * k;
+ __asm__ volatile (""
+ : "=m" (x[0]), "=m" (b[0]), "=m" (a[0]), "=m" (c[0])
+ : "m" (y[0]), "m" (y[k * 2]), "m" (x[0]), "m" (a[0])
+ : "memory");
+ for (;;)
+ {
+ __asm__ volatile (""
+ :
+ : "m" (y[2]), "m" (d[2]), "m" (e[2]), "m" (z[2])
+ : "memory");
+ if (!--k)
+ break;
+ }
+ __asm__ volatile (""
+ : "=m" (x[2]), "=m" (x[10]), "=m" (x[6]), "=m" (x[14])
+ : "m" (y[2]), "m" (y[6]), "m" (x[2]), "m" (x[6]), "m" (s1)
+ : "memory");
+}
+
+void
+bar (float *a)
+{
+ foo (4, a, a + 16, s2, s3);
+ foo (8, a, a + 32, s4, s5);
+}
+
+void
+baz (void)
+{
+ bar (s0);
+}
--- gcc/testsuite/gcc.target/i386/pr39543-3.c.jj 2009-03-25 16:41:29.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr39543-3.c 2009-03-25 16:41:19.000000000 +0100
@@ -0,0 +1,42 @@
+/* PR rtl-optimization/39543 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int s[128];
+
+void
+f1 (void)
+{
+ int i;
+ asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17"
+ : "=r" (i)
+ : "m" (s[0]), "m" (s[2]), "m" (s[4]), "m" (s[6]), "m" (s[8]),
+ "m" (s[10]), "m" (s[12]), "m" (s[14]), "m" (s[16]), "m" (s[18]),
+ "m" (s[20]), "m" (s[22]), "m" (s[24]), "m" (s[26]), "m" (s[28]),
+ "m" (s[30]), "m" (s[32]));
+ asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17"
+ : "=r" (i)
+ : "m" (s[0]), "m" (s[2]), "m" (s[4]), "m" (s[6]), "m" (s[8]),
+ "m" (s[10]), "m" (s[12]), "m" (s[14]), "m" (s[16]), "m" (s[18]),
+ "m" (s[20]), "m" (s[22]), "m" (s[24]), "m" (s[26]), "m" (s[28]),
+ "m" (s[30]), "m" (s[32]));
+}
+
+void
+f2 (int *q)
+{
+ int i;
+ int *p = q + 32;
+ asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17"
+ : "=r" (i)
+ : "m" (p[0]), "m" (p[2]), "m" (p[4]), "m" (p[6]), "m" (p[8]),
+ "m" (p[10]), "m" (p[12]), "m" (p[14]), "m" (p[16]), "m" (p[18]),
+ "m" (p[20]), "m" (p[22]), "m" (p[24]), "m" (p[26]), "m" (p[28]),
+ "m" (p[30]), "m" (p[32]));
+ asm volatile ("# %0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17"
+ : "=r" (i)
+ : "m" (p[0]), "m" (p[2]), "m" (p[4]), "m" (p[6]), "m" (p[8]),
+ "m" (p[10]), "m" (p[12]), "m" (p[14]), "m" (p[16]), "m" (p[18]),
+ "m" (p[20]), "m" (p[22]), "m" (p[24]), "m" (p[26]), "m" (p[28]),
+ "m" (p[30]), "m" (p[32]));
+}
Jakub