This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

patch to fix PR80481


  The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80481

   During forming an allocation thread in a multi-region function a conflict allocno was added to the thread and that resulted in generation of additional moves.  The patch prevents inclusion of conflict allocnos into allocation threads.

  The patch was successfully bootstrapped and tested on x86-64. The patch changes x86-64 SPEC2000 rates and code size insignificantly.

  Committed as rev. 256590.

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 256589)
+++ ChangeLog	(working copy)
@@ -1,3 +1,11 @@
+2018-01-12  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR rtl-optimization/80481
+	* ira-color.c (get_cap_member): New function.
+	(allocnos_conflict_by_live_ranges_p): Use it.
+	(slot_coalesced_allocno_live_ranges_intersect_p): Add assert.
+	(setup_slot_coalesced_allocno_live_ranges): Ditto.
+
 2018-01-12  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/83628
Index: ira-color.c
===================================================================
--- ira-color.c	(revision 256350)
+++ ira-color.c	(working copy)
@@ -1905,6 +1905,18 @@ assign_hard_reg (ira_allocno_t a, bool r
 /* An array used to sort copies.  */
 static ira_copy_t *sorted_copies;
 
+/* If allocno A is a cap, return non-cap allocno from which A is
+   created.  Otherwise, return A.  */
+static ira_allocno_t
+get_cap_member (ira_allocno_t a)
+{
+  ira_allocno_t member;
+  
+  while ((member = ALLOCNO_CAP_MEMBER (a)) != NULL)
+    a = member;
+  return a;
+}
+
 /* Return TRUE if live ranges of allocnos A1 and A2 intersect.  It is
    used to find a conflict for new allocnos or allocnos with the
    different allocno classes.  */
@@ -1924,6 +1936,10 @@ allocnos_conflict_by_live_ranges_p (ira_
       && ORIGINAL_REGNO (reg1) == ORIGINAL_REGNO (reg2))
     return false;
 
+  /* We don't keep live ranges for caps because they can be quite big.
+     Use ranges of non-cap allocno from which caps are created.  */
+  a1 = get_cap_member (a1);
+  a2 = get_cap_member (a2);
   for (i = 0; i < n1; i++)
     {
       ira_object_t c1 = ALLOCNO_OBJECT (a1, i);
@@ -4027,7 +4043,7 @@ slot_coalesced_allocno_live_ranges_inter
     {
       int i;
       int nr = ALLOCNO_NUM_OBJECTS (a);
-
+      gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
       for (i = 0; i < nr; i++)
 	{
 	  ira_object_t obj = ALLOCNO_OBJECT (a, i);
@@ -4057,6 +4073,7 @@ setup_slot_coalesced_allocno_live_ranges
        a = ALLOCNO_COALESCE_DATA (a)->next)
     {
       int nr = ALLOCNO_NUM_OBJECTS (a);
+      gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
       for (i = 0; i < nr; i++)
 	{
 	  ira_object_t obj = ALLOCNO_OBJECT (a, i);
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 256589)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@
+2018-01-12  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR rtl-optimization/80481
+	* g++.dg/pr80481.C: New.
+
 2018-01-12  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/83628
Index: testsuite/g++.dg/pr80481.C
===================================================================
--- testsuite/g++.dg/pr80481.C	(nonexistent)
+++ testsuite/g++.dg/pr80481.C	(working copy)
@@ -0,0 +1,70 @@
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" }
+// { dg-final { scan-assembler-not "vmovaps" } }
+
+#include <math.h>
+
+#include <xmmintrin.h>
+
+#define max(a, b)   ( (a) > (b) ? (a) : (b) )
+
+struct Sdata {
+  float w; 
+  float s;
+  float r;
+  float t;
+  float v;
+};
+ extern int N1, N2, N3;
+
+#define func(p, up, down) ((p)*(up) + (1.0f-(p)) * (down))
+ 
+void foo (Sdata *in, int idx, float *out)
+{
+  float* y1 = (float*)_mm_malloc(sizeof(float) * N1,16);
+  float* y2  = (float*)_mm_malloc(sizeof(float) * N1,16);
+  float* y3  = (float*)_mm_malloc(sizeof(float) * N1,16);
+  float* y4  = (float*)_mm_malloc(sizeof(float) * N1,16); 
+
+  for (int k = idx; k < idx + N3; k++) {
+    float x1 = in[k].r;
+    float x2    = in[k].s;
+    float x3      = in[k].w;
+    float x4     = in[k].v;
+    float x5         = in[k].t;
+    x5 /= N2;
+    float u = exp(x4 * sqrt(x5));
+    float d = exp(-x4 * sqrt(x5));
+    float a = exp(x1 * x5);
+    float m = exp(-x1 * x5);
+    float p = (a - d) / (u - d);
+    y2[0] = x2;
+    y3[0] = float(1.f);
+    for (int i = 1; i <= N2; i++) {
+      y2[i] = u * y2[i - 1];
+      y3[i] = d * y3[i - 1];
+    }
+#pragma omp simd
+    for (int i = 0; i <= N2; i++) {
+      y1[i] =
+        max((x3 - y2[N2 - i] * y3[i]), float(0.f));
+    }
+    for (int i = N2 - 1; i >= 0; i--) {
+#pragma omp simd
+      for (int j = 0; j <= i; j++) {
+        y4[j] = func(p,y1[j],y1[j+1]) * m;
+      }
+#pragma omp simd
+      for (int j = 0; j <= i; j++) {
+        float t1 = y2[i - j] * y3[j];
+        float t2 = max(x3 - t1, float(0.f));
+        y1[j] = max(t2, y4[j]);
+      }
+    }
+    out[k] = y1[0];
+  }
+  _mm_free(y1);
+  _mm_free(y2);
+  _mm_free(y3);
+  _mm_free(y4);
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]