[Bug rtl-optimization/33922] [4.3 Regression] slow compilation on ia64 (postreload scheduling)

Sun Oct 28 21:11:00 GMT 2007

------- Comment #20 from jakub at gcc dot gnu dot org  2007-10-28 21:11 -------
Actually, we don't probably need to write to rws_sum array at all when in
safe_group_barried_needed and then we wouldn't need to copy it around (save and
restore it) at all.

--- config/ia64/ia64.c~ 2007-10-28 22:00:24.000000000 +0100
+++ config/ia64/ia64.c  2007-10-28 22:04:26.000000000 +0100
@@ -5353,6 +5353,7 @@ static int rtx_needs_barrier (rtx, struc
 static void init_insn_group_barriers (void);
 static int group_barrier_needed (rtx);
 static int safe_group_barrier_needed (rtx);
+static int in_safe_group_barrier;

 /* Update *RWS for REGNO, which is being written by the current instruction,
    with predicate PRED, and associated register flags in FLAGS.  */
@@ -5407,7 +5408,8 @@ rws_access_regno (int regno, struct reg_
        {
        case 0:
          /* The register has not been written yet.  */
-         rws_update (regno, flags, pred);
+         if (!in_safe_group_barrier)
+           rws_update (regno, flags, pred);
          break;

        case 1:
@@ -5421,7 +5423,8 @@ rws_access_regno (int regno, struct reg_
            ;
          else if ((rws_sum[regno].first_pred ^ 1) != pred)
            need_barrier = 1;
-         rws_update (regno, flags, pred);
+         if (!in_safe_group_barrier)
+           rws_update (regno, flags, pred);
          break;

        case 2:
@@ -5433,8 +5436,11 @@ rws_access_regno (int regno, struct reg_
            ;
          else
            need_barrier = 1;
-         rws_sum[regno].written_by_and = flags.is_and;
-         rws_sum[regno].written_by_or = flags.is_or;
+          if (!in_safe_group_barrier)
+           {
+             rws_sum[regno].written_by_and = flags.is_and;
+             rws_sum[regno].written_by_or = flags.is_or;
+           }
          break;

        default:
@@ -6099,17 +6105,16 @@ int safe_group_barrier_needed_cnt[5];
 static int
 safe_group_barrier_needed (rtx insn)
 {
-  struct reg_write_state rws_saved[NUM_REGS];
   int saved_first_instruction;
   int t;

-  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
   saved_first_instruction = first_instruction;
+  in_safe_group_barrier = 1;

   t = group_barrier_needed (insn);

-  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
   first_instruction = saved_first_instruction;
+  in_safe_group_barrier = 0;

   return t;
 }

together with the other patches gives (everything is x86_64-linux -> ia64-linux
cross, would need to measure it on ia64-linux native) 
 scheduling 2          :   5.20 (78%) usr   0.01 (50%) sys   5.20 (77%) wall   
1970 kB (15%) ggc

or ~ 45% speedup on this testcase.


-- 

jakub at gcc dot gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |wilson at gcc dot gnu dot
                   |                            |org


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33922