This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
patch to fix PR80481
- From: Vladimir Makarov <vmakarov at redhat dot com>
- To: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 12 Jan 2018 12:11:01 -0500
- Subject: patch to fix PR80481
- Authentication-results: sourceware.org; auth=none
The following patch fixes
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80481
During forming an allocation thread in a multi-region function a
conflict allocno was added to the thread and that resulted in generation
of additional moves. The patch prevents inclusion of conflict allocnos
into allocation threads.
The patch was successfully bootstrapped and tested on x86-64. The
patch changes x86-64 SPEC2000 rates and code size insignificantly.
Committed as rev. 256590.
Index: ChangeLog
===================================================================
--- ChangeLog (revision 256589)
+++ ChangeLog (working copy)
@@ -1,3 +1,11 @@
+2018-01-12 Vladimir Makarov <vmakarov@redhat.com>
+
+ PR rtl-optimization/80481
+ * ira-color.c (get_cap_member): New function.
+ (allocnos_conflict_by_live_ranges_p): Use it.
+ (slot_coalesced_allocno_live_ranges_intersect_p): Add assert.
+ (setup_slot_coalesced_allocno_live_ranges): Ditto.
+
2018-01-12 Uros Bizjak <ubizjak@gmail.com>
PR target/83628
Index: ira-color.c
===================================================================
--- ira-color.c (revision 256350)
+++ ira-color.c (working copy)
@@ -1905,6 +1905,18 @@ assign_hard_reg (ira_allocno_t a, bool r
/* An array used to sort copies. */
static ira_copy_t *sorted_copies;
+/* If allocno A is a cap, return non-cap allocno from which A is
+ created. Otherwise, return A. */
+static ira_allocno_t
+get_cap_member (ira_allocno_t a)
+{
+ ira_allocno_t member;
+
+ while ((member = ALLOCNO_CAP_MEMBER (a)) != NULL)
+ a = member;
+ return a;
+}
+
/* Return TRUE if live ranges of allocnos A1 and A2 intersect. It is
used to find a conflict for new allocnos or allocnos with the
different allocno classes. */
@@ -1924,6 +1936,10 @@ allocnos_conflict_by_live_ranges_p (ira_
&& ORIGINAL_REGNO (reg1) == ORIGINAL_REGNO (reg2))
return false;
+ /* We don't keep live ranges for caps because they can be quite big.
+ Use ranges of non-cap allocno from which caps are created. */
+ a1 = get_cap_member (a1);
+ a2 = get_cap_member (a2);
for (i = 0; i < n1; i++)
{
ira_object_t c1 = ALLOCNO_OBJECT (a1, i);
@@ -4027,7 +4043,7 @@ slot_coalesced_allocno_live_ranges_inter
{
int i;
int nr = ALLOCNO_NUM_OBJECTS (a);
-
+ gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
for (i = 0; i < nr; i++)
{
ira_object_t obj = ALLOCNO_OBJECT (a, i);
@@ -4057,6 +4073,7 @@ setup_slot_coalesced_allocno_live_ranges
a = ALLOCNO_COALESCE_DATA (a)->next)
{
int nr = ALLOCNO_NUM_OBJECTS (a);
+ gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
for (i = 0; i < nr; i++)
{
ira_object_t obj = ALLOCNO_OBJECT (a, i);
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog (revision 256589)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,8 @@
+2018-01-12 Vladimir Makarov <vmakarov@redhat.com>
+
+ PR rtl-optimization/80481
+ * g++.dg/pr80481.C: New.
+
2018-01-12 Uros Bizjak <ubizjak@gmail.com>
PR target/83628
Index: testsuite/g++.dg/pr80481.C
===================================================================
--- testsuite/g++.dg/pr80481.C (nonexistent)
+++ testsuite/g++.dg/pr80481.C (working copy)
@@ -0,0 +1,70 @@
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" }
+// { dg-final { scan-assembler-not "vmovaps" } }
+
+#include <math.h>
+
+#include <xmmintrin.h>
+
+#define max(a, b) ( (a) > (b) ? (a) : (b) )
+
+struct Sdata {
+ float w;
+ float s;
+ float r;
+ float t;
+ float v;
+};
+ extern int N1, N2, N3;
+
+#define func(p, up, down) ((p)*(up) + (1.0f-(p)) * (down))
+
+void foo (Sdata *in, int idx, float *out)
+{
+ float* y1 = (float*)_mm_malloc(sizeof(float) * N1,16);
+ float* y2 = (float*)_mm_malloc(sizeof(float) * N1,16);
+ float* y3 = (float*)_mm_malloc(sizeof(float) * N1,16);
+ float* y4 = (float*)_mm_malloc(sizeof(float) * N1,16);
+
+ for (int k = idx; k < idx + N3; k++) {
+ float x1 = in[k].r;
+ float x2 = in[k].s;
+ float x3 = in[k].w;
+ float x4 = in[k].v;
+ float x5 = in[k].t;
+ x5 /= N2;
+ float u = exp(x4 * sqrt(x5));
+ float d = exp(-x4 * sqrt(x5));
+ float a = exp(x1 * x5);
+ float m = exp(-x1 * x5);
+ float p = (a - d) / (u - d);
+ y2[0] = x2;
+ y3[0] = float(1.f);
+ for (int i = 1; i <= N2; i++) {
+ y2[i] = u * y2[i - 1];
+ y3[i] = d * y3[i - 1];
+ }
+#pragma omp simd
+ for (int i = 0; i <= N2; i++) {
+ y1[i] =
+ max((x3 - y2[N2 - i] * y3[i]), float(0.f));
+ }
+ for (int i = N2 - 1; i >= 0; i--) {
+#pragma omp simd
+ for (int j = 0; j <= i; j++) {
+ y4[j] = func(p,y1[j],y1[j+1]) * m;
+ }
+#pragma omp simd
+ for (int j = 0; j <= i; j++) {
+ float t1 = y2[i - j] * y3[j];
+ float t2 = max(x3 - t1, float(0.f));
+ y1[j] = max(t2, y4[j]);
+ }
+ }
+ out[k] = y1[0];
+ }
+ _mm_free(y1);
+ _mm_free(y2);
+ _mm_free(y3);
+ _mm_free(y4);
+}