Bug 27755 - PRE confused by control flow
Summary: PRE confused by control flow
Status: RESOLVED FIXED
Alias: None
Product: gcc
Classification: Unclassified
Component: tree-optimization (show other bugs)
Version: 4.2.0
: P3 enhancement
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks: 22501
  Show dependency treegraph
 
Reported: 2006-05-24 12:59 UTC by Richard Biener
Modified: 2006-11-15 03:52 UTC (History)
2 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2006-09-12 06:25:23


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Richard Biener 2006-05-24 12:59:54 UTC
In the following testcase load-PRE is not able to hoist the load of *x
out of the loop because of the empty loop.

int foo(int k, int *x)
{
  int j=0;
  int res;
  do {
    for (int n=0;n<3;++n);
    res = *x;
  } while (++j<k);
  return res;
}
Comment 1 Richard Biener 2006-05-24 13:36:59 UTC
Not only loads:

int foo(int k, int b)
{
  int j=0;
  int res;
  do {
    for (int n=0;n<3;++n);
    res = b+1;
  } while (++j<k);
  return res;
}

b+1 is not moved out of the outer loop.  This looks like it would affect
loop invariant motion in general for nested loops in case the invariants
are computed after the inner loops.  Is this how it is supposed to be?
Comment 2 Richard Biener 2006-05-24 14:35:07 UTC
The CFG looks like

int foo(int, int) (k, b)
{
  int pretmp.21;
  int n;
  int res;
  int j;

  # BLOCK 2 freq:367
  # PRED: ENTRY [100.0%]  (fallthru,exec)
  # SUCC: 3 [100.0%]  (fallthru,exec)

  # BLOCK 3 freq:3333
  # PRED: 2 [100.0%]  (fallthru,exec) 8 [100.0%]  (fallthru)
  # j_1 = PHI <0(2), j_7(8)>;
<L0>:;
  # SUCC: 4 [100.0%]  (fallthru,exec)

  # BLOCK 4 freq:10000
  # PRED: 7 [100.0%]  (fallthru) 3 [100.0%]  (fallthru,exec)
  # n_13 = PHI <n_12(7), 0(3)>;
<L1>:;
  n_12 = n_13 + 1;
  if (n_12 <= 2) goto <L10>; else goto <L3>;
  # SUCC: 7 [66.7%]  (dfs_back,true,exec) 5 [33.3%]  (loop_exit,false,exec)

  # BLOCK 7 freq:6667
  # PRED: 4 [66.7%]  (dfs_back,true,exec)
<L10>:;
  goto <bb 4> (<L1>);
  # SUCC: 4 [100.0%]  (fallthru)

  # BLOCK 5 freq:3333
  # PRED: 4 [33.3%]  (loop_exit,false,exec)
<L3>:;
  res_6 = b_5 + 1;
  j_7 = j_1 + 1;
  if (j_7 < k_8) goto <L11>; else goto <L4>;
  # SUCC: 8 [89.0%]  (dfs_back,true,exec) 6 [11.0%]  (loop_exit,false,exec)

  # BLOCK 8 freq:2966
  # PRED: 5 [89.0%]  (dfs_back,true,exec)
<L11>:;
  goto <bb 3> (<L0>);
  # SUCC: 3 [100.0%]  (fallthru)

  # BLOCK 6 freq:367
  # PRED: 5 [11.0%]  (loop_exit,false,exec)
<L4>:;
  return res_6;
  # SUCC: EXIT [100.0%]

}

where block #7 is the problematic one because ANTIC_IN of it never get's
the value handle for b+1.
Comment 3 Richard Biener 2006-05-24 15:56:30 UTC
This seems to fix it:

Index: tree-ssa-pre.c
===================================================================
--- tree-ssa-pre.c      (revision 114040)
+++ tree-ssa-pre.c      (working copy)
@@ -1696,6 +1713,7 @@ compute_antic_aux (basic_block block, bo
 
       worklist = VEC_alloc (basic_block, heap, EDGE_COUNT (block->succs));
       FOR_EACH_EDGE (e, ei, block->succs)
+        if (!(e->flags & EDGE_DFS_BACK))
        VEC_quick_push (basic_block, worklist, e->dest);
       first = VEC_index (basic_block, worklist, 0);
       set_copy (ANTIC_OUT, ANTIC_IN (first));
Comment 4 Richard Biener 2006-05-24 17:15:18 UTC
Unfortunately it ICEs during bootstrap compiling mf-runtime.c with

mf-runtime.8.i: In function '__mfu_check':
mf-runtime.8.i:8: internal compiler error: in find_or_generate_expression, at tree-ssa-pre.c:2279

testcase:

typedef unsigned long int uintptr_t;
unsigned mudflap_mode;
typedef struct __mf_object {
  uintptr_t low, high;
} __mf_object_t;
void __mfu_check (void *ptr, unsigned obj_count,
        uintptr_t ptr_lower, uintptr_t ptr_higher, __mf_object_t** all_ovr_obj)
{
  int judgement = 0;
  uintptr_t ptr_low = (uintptr_t) ptr;
  uintptr_t ptr_high = (uintptr_t) ptr;
  switch (mudflap_mode) {
  case 0:    
    while (judgement == 0)     
      {
        unsigned i;
        unsigned uncovered = 0;
        for (i = 0; i < obj_count; i++)
          {
             __mf_object_t *obj = all_ovr_obj[i];
             int j, uncovered_low_p, uncovered_high_p;
             uncovered_low_p = ptr_low < obj->low;
             uncovered_high_p = ptr_high > obj->high;
             for (j = 0; j < obj_count; j++)
               { 
                 __mf_object_t *obj2 = all_ovr_obj[j];
                 if (ptr_lower >= obj2->low && ptr_lower <= obj2->high)
                   uncovered_low_p = 0;
                 if (ptr_high >= obj2->low && ptr_higher <= obj2->high)
                   uncovered_high_p = 0;
               }
             if (uncovered_low_p || uncovered_high_p)    
               uncovered++;
          }
        if (uncovered == 0)     
          judgement = 1;
      }
  }
}
Comment 5 Richard Biener 2006-05-24 17:30:19 UTC
Cannot get it smaller than

unsigned mudflap_mode;
void __mfu_check (unsigned obj_count,
        unsigned long ptr_lower, unsigned long* all_ovr_obj)
{
  unsigned uncovered = 1, i;
  switch (mudflap_mode) {
  case 0:
    while (uncovered)
      {
        uncovered = 0;
        for (i = 0; i < obj_count; i++)
          {
             int j, uncovered_low_p = 1, uncovered_high_p = 1;
             for (j = 0; j < obj_count; j++)
               { 
                 if (ptr_lower <= *all_ovr_obj)
                   {
                     uncovered_low_p = 0;
                     uncovered_high_p = 0;
                   }
               }
             if (uncovered_low_p || uncovered_high_p)    
               uncovered++;
          }
      }
  }
}
Comment 6 Andrew Pinski 2006-09-12 06:25:23 UTC
Confirmed, LIM works correctly though.
Comment 7 Daniel Berlin 2006-09-12 13:31:41 UTC
Subject: Re:  PRE confused by control flow

This will be fixed by the 4.3 changes.
Comment 8 Daniel Berlin 2006-11-14 18:12:35 UTC
Subject: Bug 27755

Author: dberlin
Date: Tue Nov 14 18:12:20 2006
New Revision: 118821

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=118821
Log:
2006-11-14  Daniel Berlin  <dberlin@dberlin.org>

	Fix PR tree-optimization/27755

	* tree-ssa-pre.c: Update comments.
	(bb_bitmap_sets): Add pa_in and  deferred member.
	(BB_DEFERRED): New macro.
	(maximal_set): New variable.
	(pre_stats): Add pa_insert member.
	(bitmap_set_and): Short circuit orig == dest.
	(bitmap_set_subtract_values): New function.
	(bitmap_set_contains_expr): Ditto.
	(translate_vuses_through_block): Add phiblock argument.
	(dependent_clean): New function.
	(compute_antic_aux): Update for maximal_set changes.
	(compute_partial_antic_aux): New function.
	(compute_antic): Handle partial anticipation.
	(do_partial_partial_insertion): New function.
	(insert_aux): Handle partial anticipation.
	(add_to_sets): Add to maximal set.
	(compute_avail): Ditto.
	(init_pre): Initialize maximal_set.
	(execute_pre): Do partial anticipation if -O3+.

Added:
    trunk/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-16.c
Modified:
    trunk/   (props changed)
    trunk/gcc/ChangeLog
    trunk/gcc/tree-ssa-pre.c

Propchange: trunk/
            ('svk:merge' modified)


Comment 9 Daniel Berlin 2006-11-15 03:52:23 UTC
Fixed