[Bug rtl-optimization/33922] [4.3 Regression] slow compilation on ia64 (postreload scheduling)
jakub at gcc dot gnu dot org
gcc-bugzilla@gcc.gnu.org
Sun Oct 28 21:11:00 GMT 2007
------- Comment #20 from jakub at gcc dot gnu dot org 2007-10-28 21:11 -------
Actually, we don't probably need to write to rws_sum array at all when in
safe_group_barried_needed and then we wouldn't need to copy it around (save and
restore it) at all.
--- config/ia64/ia64.c~ 2007-10-28 22:00:24.000000000 +0100
+++ config/ia64/ia64.c 2007-10-28 22:04:26.000000000 +0100
@@ -5353,6 +5353,7 @@ static int rtx_needs_barrier (rtx, struc
static void init_insn_group_barriers (void);
static int group_barrier_needed (rtx);
static int safe_group_barrier_needed (rtx);
+static int in_safe_group_barrier;
/* Update *RWS for REGNO, which is being written by the current instruction,
with predicate PRED, and associated register flags in FLAGS. */
@@ -5407,7 +5408,8 @@ rws_access_regno (int regno, struct reg_
{
case 0:
/* The register has not been written yet. */
- rws_update (regno, flags, pred);
+ if (!in_safe_group_barrier)
+ rws_update (regno, flags, pred);
break;
case 1:
@@ -5421,7 +5423,8 @@ rws_access_regno (int regno, struct reg_
;
else if ((rws_sum[regno].first_pred ^ 1) != pred)
need_barrier = 1;
- rws_update (regno, flags, pred);
+ if (!in_safe_group_barrier)
+ rws_update (regno, flags, pred);
break;
case 2:
@@ -5433,8 +5436,11 @@ rws_access_regno (int regno, struct reg_
;
else
need_barrier = 1;
- rws_sum[regno].written_by_and = flags.is_and;
- rws_sum[regno].written_by_or = flags.is_or;
+ if (!in_safe_group_barrier)
+ {
+ rws_sum[regno].written_by_and = flags.is_and;
+ rws_sum[regno].written_by_or = flags.is_or;
+ }
break;
default:
@@ -6099,17 +6105,16 @@ int safe_group_barrier_needed_cnt[5];
static int
safe_group_barrier_needed (rtx insn)
{
- struct reg_write_state rws_saved[NUM_REGS];
int saved_first_instruction;
int t;
- memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
saved_first_instruction = first_instruction;
+ in_safe_group_barrier = 1;
t = group_barrier_needed (insn);
- memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
first_instruction = saved_first_instruction;
+ in_safe_group_barrier = 0;
return t;
}
together with the other patches gives (everything is x86_64-linux -> ia64-linux
cross, would need to measure it on ia64-linux native)
scheduling 2 : 5.20 (78%) usr 0.01 (50%) sys 5.20 (77%) wall
1970 kB (15%) ggc
or ~ 45% speedup on this testcase.
--
jakub at gcc dot gnu dot org changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |wilson at gcc dot gnu dot
| |org
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33922
More information about the Gcc-bugs
mailing list