[PATCH] Unrolling addressing optimization

Revital Eres ERES@il.ibm.com
Wed Apr 7 09:01:00 GMT 2004


Hello, 

During the unrolling we do the following transformation
for array accesses (loads and stores):

After the unrolling for an access to an array ("a[i]")
we have the following pattern:

    i = i + 1
    load a[i]
    ....
    i = i + 1
    load a[i]
    ....
    i = i + 1
    load a[i]
    ....
    i = i + 1
    load a[i]

Our optimization computes the address of a[i] (addr =&a[i])
and we have the following code:

   load addr
   ....
   load 4(addr)
   ....
   load 8(addr)
   ....
   load 12(addr) 
   ... 

This transformation eliminates data dependencies 
(scheduling, vectorization and other optimizations 
may take advantage of this).
In most of the cases, it also reduces the number
of instructions in the loop.

For CINTSPEC2000 on POWER4 with the option -funroll-loops 
we have about 1.7% for 3.4 branch, and about 0.6% for mainline
(we are trying to find the reasons for
this performance regression on mainline). 
No significant changes for CFSPEC2000 package.

Bootstrapped & regression tests on POWER4.

Revital Eres




2004-04-07  Revital Eres  <eres@il.ibm.com>

        * cfglayout.c:
        (split_insn_iv, update_splitted_insn, handle_ivs, 
is_valid_expression, 
         can_split_insn): New functions.
        (copy_bbs): Support for the new optimization.
        Include expr.h
        * cfglayout.h (struct loop_reg_info, struct loop_insn_info): New 
structures.
          (copy_bbs): New declaration.
       * cfgloop.h (is_biv): Declare.
        * cfgloopmanip.c: (count_insn_in_bb, count_insn_in_bbs, 
mark_modified_reg,
         analyze_loop_invariants, find_shift_pattern_in_loop,
         find_shift_giv): New functions.
         (duplicate_loop_to_header_edge): Support for the new 
optimization.
        * loop-iv.c (is_biv): New function.




Index: cfglayout.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/cfglayout.c,v
retrieving revision 1.55
diff -c -3 -p -r1.55 cfglayout.c
*** cfglayout.c 20 Mar 2004 04:52:52 -0000      1.55
--- cfglayout.c 7 Apr 2004 06:23:41 -0000
*************** Software Foundation, 59 Temple Place - S
*** 35,41 ****
  #include "target.h"
  #include "ggc.h"
  #include "alloc-pool.h"
! 
  /* The contents of the current function definition are allocated
     in this obstack, and all are freed at the end of the function.  */
  extern struct obstack flow_obstack;
--- 35,41 ----
  #include "target.h"
  #include "ggc.h"
  #include "alloc-pool.h"
! #include "expr.h"
  /* The contents of the current function definition are allocated
     in this obstack, and all are freed at the end of the function.  */
  extern struct obstack flow_obstack;
*************** void verify_insn_chain (void);
*** 57,62 ****
--- 57,73 ----
  static void fixup_fallthru_exit_predecessor (void);
  static rtx duplicate_insn_chain (rtx, rtx);
  static tree insn_scope (rtx);
+ 
+ static rtx split_insn_iv (struct loop_insn_info *, int, rtx *);
+ static rtx update_splitted_insn (struct loop_insn_info *, int, int, rtx 
*);
+ static basic_block handle_ivs (struct loop_reg_info * , 
+                                struct loop_insn_info *, int ,unsigned, 
+                                basic_block *);
+ static bool is_valid_expression (struct loop_insn_info *, 
+                                  struct loop_reg_info *, rtx, int, rtx);
+ static bool can_split_insn (struct loop_insn_info *, struct 
loop_reg_info *, 
+                             rtx *, int);
+ 
   
  rtx
  unlink_insn_chain (rtx first, rtx last)
*************** end:
*** 1254,1260 ****
  void
  copy_bbs (basic_block *bbs, unsigned n, basic_block *new_bbs,
          edge *edges, unsigned n_edges, edge *new_edges,
!         struct loop *base)
  {
    unsigned i, j;
    basic_block bb, new_bb, dom_bb;
--- 1265,1273 ----
  void
  copy_bbs (basic_block *bbs, unsigned n, basic_block *new_bbs,
                edge *edges, unsigned n_edges, edge *new_edges,
!               struct loop *base,
!               struct loop_reg_info* iv_regs, struct loop_insn_info* 
loop_insns_info,
!                       unsigned iter, unsigned ndup)
  {
    unsigned i, j;
    basic_block bb, new_bb, dom_bb;
*************** copy_bbs (basic_block *bbs, unsigned n, 
*** 1266,1271 ****
--- 1279,1290 ----
        /* Duplicate.  */
        bb = bbs[i];
        new_bb = new_bbs[i] = cfg_layout_duplicate_bb (bb, NULL);
+ 
+       /* Eliminate the use of induction variables in address access.  */ 
 
+       if (just_once_each_iteration_p (base, bbs[i]) && ndup != 1)
+       {
+         new_bb = handle_ivs (iv_regs, loop_insns_info, i, iter+1, 
&new_bbs[i]);
+       }
        bb->rbi->duplicated = 1;
        /* Add to loop.  */
        add_bb_to_loop (new_bb, bb->loop_father->copy);
*************** copy_bbs (basic_block *bbs, unsigned n, 
*** 1314,1319 ****
--- 1333,1784 ----
    /* Clear information about duplicates.  */
    for (i = 0; i < n; i++)
      bbs[i]->rbi->duplicated = 0;
+ }
+ 
+ 
+ 
+ /* After the unrolling for an access to an array ("a[i]") 
+    we have the following pattern:
+     i = i + 1
+     load a[i]
+     ....
+     i = i + 1
+     load a[i]
+     ....
+     i = i + 1
+     load a[i]
+     ....
+     i = i + 1
+     load a[i]
+   the addressing optimization computes the address of a[i] 
+   (addr =&a[i]) thus the following code
+   is recieved:
+    load addr
+    ....
+    load 4(addr)
+    ....
+    load 8(addr)
+    ....
+    load 12(addr) 
+    ... 
+    This transformation enables to increment only the address
+    in each iteration of the unrollled loop without the dependency
+    in the induction variable.  */
+ static basic_block
+ handle_ivs (struct loop_reg_info *loop_regs_info,
+           struct loop_insn_info *loop_insns_info,
+           int bb_num, unsigned iter, basic_block *bb)
+ {
+   rtx* insn;
+   rtx next_insn;
+   bool cond;
+   static int insn_num;
+ 
+   /* In the first new copy of the unrolled loop locate all 
+      the splittable insns and mark them after applying the
+      split. */
+   if (iter == 1)
+     {
+       if (bb_num == 0)
+         {
+         insn_num = 0;
+         }
+ 
+       for (insn = &((*bb)->head_) ; insn != &NEXT_INSN ((*bb)->end_); 
+          insn = &(NEXT_INSN (*insn)))
+         {
+         if (can_split_insn (loop_insns_info, loop_regs_info, 
insn,insn_num))
+           {
+ 
+             loop_insns_info[insn_num].is_splittable_insn = true;
+             next_insn = split_insn_iv (loop_insns_info, insn_num, insn);
+             insn_num = insn_num+1;
+             insn = &(NEXT_INSN (next_insn));
+             continue;
+           }
+         insn_num = insn_num+1;
+         }
+       return (*bb);
+     }
+ 
+   /* In the remaining copies of the unrolled loop update
+      the splittable insns to use only the base addr with 
+      a proper offset. */
+ 
+   if (bb_num == 0)
+     {
+       insn_num = 0;
+     }
+ 
+   for (insn = &((*bb)->head_) ; insn != &NEXT_INSN ((*bb)->end_); 
+        insn = &(NEXT_INSN (*insn)))
+     {
+       cond =  loop_insns_info[insn_num].is_splittable_insn;
+ 
+       if (cond)
+       {
+         next_insn = update_splitted_insn (loop_insns_info, iter-1,
+                                           insn_num, insn);
+         insn_num = insn_num+1;
+         continue;
+       }
+       insn_num ++;
+     }
+   return (*bb);
+ }
+ 
+ 
+ /* INSN is of the form:
+    reg = Mem [ addr + iv ] (or store) 
+    and we are now transforming it to be of the form :
+    reg = Mem [new_addr + offset]
+    where new_addr is the base address (addr + iv)
+    and offset is the proper increment depending
+    on the current iteration and the stride of iv. */
+ 
+ static rtx
+ update_splitted_insn (struct loop_insn_info *loop_insns_info, int iter, 
+                     int insn_num, rtx *insn)
+ {
+   int factor;
+   enum machine_mode mem_mode1, ext_mode;
+   rtx addr, plus_insn, set, seq= NULL_RTX;
+   rtx mem_insn, mult_rtx, zero_insn;
+   rtx stride = loop_insns_info[insn_num].stride;
+ 
+   addr = loop_insns_info[insn_num].new_addr;
+   if (loop_insns_info[insn_num].splitt_side == load)
+     {
+       start_sequence ();
+       set = single_set (*insn);
+       if (!set)
+         abort();
+       factor = loop_insns_info[insn_num].incr_address;
+       mult_rtx = simplify_gen_binary (MULT, GET_MODE (addr),
+                                     GEN_INT (iter << factor), stride);
+       plus_insn = simplify_gen_binary (PLUS, GET_MODE (addr),
+                                      addr,  mult_rtx);
+       mem_mode1 = loop_insns_info[insn_num].mem_mode;
+       mem_insn = gen_rtx_MEM (mem_mode1, plus_insn);
+       if (loop_insns_info[insn_num].extend != NIL)
+       {
+         ext_mode = loop_insns_info[insn_num].extend_mode;
+         zero_insn = gen_rtx_ZERO_EXTEND (ext_mode, mem_insn);
+         emit_move_insn (SET_DEST (set), zero_insn);
+       }
+       else
+       {
+         emit_move_insn (SET_DEST (set), mem_insn);
+       }
+       seq = get_insns ();
+       end_sequence ();
+       emit_insn_after (seq, *insn);
+       return delete_insn (*insn);
+     }
+   if (loop_insns_info[insn_num].splitt_side == store)
+     {
+       start_sequence ();
+       set = single_set (*insn);
+        if (!set)
+          abort();
+       factor = loop_insns_info[insn_num].incr_address;
+       mult_rtx = simplify_gen_binary (MULT,GET_MODE(addr), 
+                                     GEN_INT(iter << factor),stride);
+       plus_insn = simplify_gen_binary (PLUS, GET_MODE(addr),
+                                      addr,  mult_rtx);
+       mem_mode1 = loop_insns_info[insn_num].mem_mode;
+       mem_insn = gen_rtx_MEM (mem_mode1, plus_insn);
+       if (loop_insns_info[insn_num].extend != NIL)
+       { 
+         ext_mode = loop_insns_info[insn_num].extend_mode;
+         zero_insn = gen_rtx_ZERO_EXTEND (ext_mode, mem_insn);
+         emit_move_insn(zero_insn, SET_SRC (set));
+       }
+       else
+       {
+         emit_move_insn (mem_insn, SET_SRC (set));
+       }
+       seq = get_insns ();
+       end_sequence ();
+       emit_insn_after (seq, *insn);
+       return delete_insn (*insn);
+     }
+     return NULL_RTX;
+ }
+ 
+ 
+ /* INSN is of the form : 
+ 
+    reg = Mem [ addr + iv ] (or store) 
+ 
+    which is splitted into two new insns:
+ 
+    new_reg = addr + iv
+    reg = Mem [new_addr]  */
+ 
+ static rtx
+ split_insn_iv (struct loop_insn_info *loop_insns_info, int insn_num, 
+                rtx *orig_insn)
+ {
+   rtx plus_insn, seq, set, targ, insn2 = NULL_RTX;
+   enum machine_mode mem_mode1, ext_mode;
+ 
+   if (loop_insns_info[insn_num].splitt_side == load)
+     {
+       start_sequence ();
+       set = single_set (*orig_insn);
+       if (!set)
+         abort();
+       if (loop_insns_info[insn_num].extend != NIL)
+       {
+           targ = gen_reg_rtx (GET_MODE (XEXP (XEXP (SET_SRC(set), 
0),0)));
+           plus_insn  = copy_rtx (XEXP (XEXP (SET_SRC(set), 0),0));
+       }
+       else
+       {
+           targ = gen_reg_rtx (GET_MODE (XEXP (SET_SRC(set), 0)));
+           plus_insn = copy_rtx (XEXP (SET_SRC(set), 0));
+       }
+       emit_move_insn (targ , plus_insn);
+       mem_mode1 = loop_insns_info[insn_num].mem_mode;
+       insn2 = gen_rtx_MEM (mem_mode1, targ);
+       if (loop_insns_info[insn_num].extend != NIL)
+       {
+         ext_mode = loop_insns_info[insn_num].extend_mode;
+         insn2 = gen_rtx_ZERO_EXTEND (ext_mode,insn2);
+       }
+       emit_move_insn (SET_DEST(set), insn2);
+       seq = get_insns ();
+       end_sequence ();
+       emit_insn_after (seq, *orig_insn);
+       loop_insns_info[insn_num].new_addr = targ;
+       return delete_insn (*orig_insn);
+     }
+   if (loop_insns_info[insn_num].splitt_side == store)
+     {
+       start_sequence ();
+       set = single_set (*orig_insn);
+       if (!set)
+       abort();
+       if (loop_insns_info[insn_num].extend != NIL)
+       {
+         targ = gen_reg_rtx (GET_MODE (XEXP (XEXP (SET_DEST(set), 0), 
0)));
+         plus_insn  = copy_rtx (XEXP (XEXP (SET_DEST(set), 0), 0));
+       }
+       else
+       {
+         targ = gen_reg_rtx (GET_MODE (XEXP (SET_DEST (set), 0)));
+         plus_insn = copy_rtx (XEXP ( SET_DEST (set), 0));
+       }
+       emit_move_insn (targ , plus_insn);
+       mem_mode1 = loop_insns_info[insn_num].mem_mode;
+       insn2 = gen_rtx_MEM (mem_mode1, targ);
+       if (loop_insns_info[insn_num].extend != NIL)
+       {
+         ext_mode = loop_insns_info[insn_num].extend_mode;
+         insn2 = gen_rtx_ZERO_EXTEND (ext_mode,insn2);
+       }
+       emit_move_insn (insn2, SET_SRC (set));
+       seq = get_insns ();
+       end_sequence ();
+       emit_insn_after (seq, *orig_insn);
+       loop_insns_info[insn_num].new_addr = targ;
+       return delete_insn (*orig_insn);
+    }
+   return NULL_RTX;
+ }
+ 
+ 
+ /* Return 1 if INSN can be splitted into two new insns in the first
+    new copy of the unrolled loop. 
+    We are looking for an insn of the form : (load and store)
+ 
+    reg = Mem [ addr + iv ] 
+ 
+    which will be splitted into two new insns:
+ 
+    new_reg = addr + iv
+    reg = Mem [new_addr]
+ 
+    Splitting the insn in such way enables to increment addr with a
+    proper offset in the next unrolled iterations without the 
+    dependency on the induction variable.  */
+ 
+ static bool
+ can_split_insn(struct loop_insn_info* loop_insns_info, 
+                struct loop_reg_info* loop_regs_info,
+                rtx* insn, int insn_num)
+ {
+   rtx set;
+   enum rtx_code code1, code2;
+ 
+   if (!*insn)
+     return false;
+   if (GET_CODE (*insn) == PARALLEL)
+     return false;
+    set = single_set (*insn);
+   if (!set)
+     return false;
+ 
+   code1 = GET_CODE (SET_DEST (set));
+   code2 = GET_CODE (SET_SRC (set));
+ 
+   /*  reg = mem [ addr + iv ] */
+   if ((code1 == REG || code1 == SUBREG) && code2 == MEM )
+     {
+       if (is_valid_expression (loop_insns_info, loop_regs_info,
+                            XEXP (SET_SRC (set), 0), insn_num, *insn))
+       {
+         loop_insns_info[insn_num].mem_mode = 
+           GET_MODE (SET_SRC (set));
+         loop_insns_info[insn_num].splitt_side = load;
+         return true;
+       }
+     }
+ 
+   /* mem [ addr + iv ] = reg */
+   if (code1 == MEM && (code2 == REG || code2 == SUBREG))
+     {
+       if (is_valid_expression(loop_insns_info,loop_regs_info,
+                            XEXP (SET_DEST (set), 0),insn_num, *insn))
+       {
+         loop_insns_info[insn_num].mem_mode = 
+           GET_MODE (SET_DEST (set));
+         loop_insns_info[insn_num].splitt_side = store;
+         return true;
+       }
+     }
+ 
+   /* zero_extend (mem [ addr + iv ]) = reg */
+   if (code1 == ZERO_EXTEND && code2 == REG)
+     {
+       if (GET_CODE (XEXP (SET_DEST (set), 0)) != MEM )
+       return false;
+ 
+       if (is_valid_expression (loop_insns_info, loop_regs_info,
+                            XEXP (XEXP (SET_DEST (set), 0), 0), insn_num, 
*insn))
+       {
+         loop_insns_info[insn_num].mem_mode = 
+           GET_MODE (XEXP (SET_DEST (set), 0));
+         loop_insns_info[insn_num].splitt_side = store;
+         loop_insns_info[insn_num].extend = ZERO_EXTEND;
+         loop_insns_info[insn_num].extend_mode = 
+           GET_MODE (SET_DEST (set));
+         return true;
+       }
+     }
+ 
+   /* reg = zero_extend (mem [ addr + iv ]) */
+   if (code1 == REG && code2 == ZERO_EXTEND)
+     {
+       if( GET_CODE (XEXP (SET_SRC (set), 0)) != MEM )
+       return false;
+ 
+       if (is_valid_expression (loop_insns_info,loop_regs_info,
+                            XEXP (XEXP (SET_SRC (set), 0), 0),insn_num, 
*insn))
+       {
+         loop_insns_info[insn_num].mem_mode = 
+           GET_MODE (XEXP (SET_SRC (set), 0));
+         loop_insns_info[insn_num].splitt_side = load;
+         loop_insns_info[insn_num].extend = ZERO_EXTEND;
+         loop_insns_info[insn_num].extend_mode = 
+           GET_MODE (SET_SRC (set));
+         return true;
+       }
+     }
+ 
+   return false;
+ }
+ 
+ /* Return 1 if X is of the form [addr + reg], where
+    reg is iv. 
+    This check is required before splitting an insn.  */
+ static bool
+ is_valid_expression (struct loop_insn_info *loop_insns_info, 
+                      struct loop_reg_info *loop_regs_info, rtx x, 
+                      int insn_num, rtx insn)
+ {
+   enum rtx_code code, code1, code2;
+   int regno1, regno2;
+   bool loop_invariant_field;
+   rtx op1, op2, insn1;
+   enum iv_status status;
+   struct rtx_iv iv;
+ 
+   if (x == 0)
+     return false;
+ 
+   code = GET_CODE (x);
+   if (code != PLUS)
+     return false;
+   op1 = XEXP (x, 0);
+   code1 = GET_CODE (op1);
+   op2 = XEXP (x, 1);
+   code2 = GET_CODE (op2);
+   if (code1 != REG || code2 != REG)
+     return false;
+   regno1 = REGNO (op1);
+   regno2 = REGNO (op2);
+ 
+   /* addr + giv.  */
+   loop_invariant_field = loop_regs_info[regno1].is_loop_invariant;
+   status = loop_regs_info[regno2].iv;
+   if ((status == shift_giv) && loop_invariant_field)
+     {
+       insn1 = iv_get_reaching_def (insn, 
loop_regs_info[regno2].biv_reg);
+       if (iv_analyze (insn1, loop_regs_info[regno2].biv_reg, &iv)
+         && !iv.first_special)
+       {
+         loop_insns_info[insn_num].stride = copy_rtx (iv.step);
+         loop_insns_info[insn_num].incr_address = 
+           loop_regs_info[regno2].shift_factor;
+         return true;
+       }
+     }
+ 
+   /* addr + biv.   */
+   loop_invariant_field = loop_regs_info[regno1].is_loop_invariant;
+   insn1 = iv_get_reaching_def (insn, op2);
+   if (iv_analyze (insn1, op2, &iv)
+       && is_biv (regno2) 
+       && !iv.first_special
+       && loop_invariant_field)
+     {
+       loop_insns_info[insn_num].stride = copy_rtx (iv.step); 
+       loop_insns_info[insn_num].incr_address = 0;
+       return true;
+     }
+ 
+   /* giv + addr.  */
+   loop_invariant_field = loop_regs_info[regno2].is_loop_invariant;
+   status = loop_regs_info[regno1].iv;
+   if (loop_invariant_field && (status == shift_giv))
+     {
+       insn1 = iv_get_reaching_def (insn, 
loop_regs_info[regno1].biv_reg);
+       if (iv_analyze (insn1, loop_regs_info[regno1].biv_reg, &iv)
+         && !iv.first_special)
+       {
+         loop_insns_info[insn_num].stride = copy_rtx (iv.step);
+         loop_insns_info[insn_num].incr_address = 
+           loop_regs_info[regno1].shift_factor;
+         return true;
+       }
+     }
+ 
+   /* biv + addr.  */
+   loop_invariant_field = loop_regs_info[regno2].is_loop_invariant;
+   insn1 = iv_get_reaching_def (insn, op1);
+   if (iv_analyze (insn1, op1, &iv)
+       && is_biv (regno1) 
+       && !iv.first_special
+       && loop_invariant_field)
+     {
+       loop_insns_info[insn_num].stride = copy_rtx (iv.step);
+       loop_insns_info[insn_num].incr_address = 0;
+       return true;
+     }
+ 
+   return false;
  }
 
  #include "gt-cfglayout.h"
Index: cfglayout.h
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/cfglayout.h,v
retrieving revision 1.10
diff -c -3 -p -r1.10 cfglayout.h
*** cfglayout.h 30 Dec 2003 10:40:51 -0000      1.10
--- cfglayout.h 7 Apr 2004 06:23:41 -0000
***************
*** 18,23 ****
--- 18,86 ----
     Software Foundation, 59 Temple Place - Suite 330, Boston, MA
     02111-1307, USA.  */
 
+ 
+ enum iv_status {shift_giv, non};
+ 
+ struct loop_reg_info
+ {
+   enum iv_status iv;
+   int shift_factor;
+   bool is_loop_invariant;
+   /* in case the reg is shift_giv the next field holds the
+      it's biv */
+   rtx biv_reg;
+ };
+ 
+ #define RESET_LOOP_REG_INFO(REG_TABLE, SIZE_OF_TABLE)                 \
+   {            \
+     int i;                          \
+     for (i=0; i<(SIZE_OF_TABLE) ;i++)          \
+       {                          \
+       (REG_TABLE)[i].iv = non;          \
+       (REG_TABLE)[i].shift_factor = -1;                          \
+       (REG_TABLE)[i].is_loop_invariant = true;          \
+       (REG_TABLE)[i].biv_reg = NULL_RTX;          \
+       }                          \
+   }
+ 
+ enum side {store,load,unknown};
+ 
+ /* This structure is used to hold information about the insns
+    in a loop and accessed via insn's serial number in the loop
+    insns chain.  */
+ 
+ struct loop_insn_info
+ {
+   /* Is_splittable_insn is true if the addressing
+      optimization can be applied on this insn.  */
+   bool is_splittable_insn;
+   enum side  splitt_side;
+   rtx  new_addr;
+   enum machine_mode mem_mode;
+   int  incr_address;
+   enum rtx_code extend;
+   enum machine_mode extend_mode;
+   rtx stride;
+ };
+ 
+ #define RESET_LOOP_INSN_INFO(INSN_TABLE, SIZE_OF_TABLE)               \
+   {          \
+     unsigned i;                          \
+     for (i=0;i<(SIZE_OF_TABLE);i++)          \
+       {                          \
+       (INSN_TABLE)[i].is_splittable_insn = false;          \
+       (INSN_TABLE)[i].new_addr = 0;          \
+       (INSN_TABLE)[i].splitt_side = unknown;          \
+       (INSN_TABLE)[i].mem_mode = VOIDmode;  \
+       (INSN_TABLE)[i].incr_address = -1;          \
+       (INSN_TABLE)[i].extend = NIL;          \
+       (INSN_TABLE)[i].extend_mode = VOIDmode;  \
+       (INSN_TABLE)[i].stride = NULL_RTX;          \
+       }                          \
+   }          \
+ 
+ 
+ 
  /* Structure to hold information about the blocks during reordering.  */
  typedef struct reorder_block_def
  {
*************** extern void insn_locators_initialize (vo
*** 43,47 ****
  extern void reemit_insn_block_notes (void);
  extern bool can_copy_bbs_p (basic_block *, unsigned);
  extern void copy_bbs (basic_block *, unsigned, basic_block *,
!                     edge *, unsigned, edge *, struct loop *);
  extern void cfg_layout_initialize_rbi (basic_block);
--- 106,112 ----
  extern void reemit_insn_block_notes (void);
  extern bool can_copy_bbs_p (basic_block *, unsigned);
  extern void copy_bbs (basic_block *, unsigned, basic_block *,
!                     edge *, unsigned, edge *, struct loop *,
!                     struct loop_reg_info *, struct loop_insn_info *,
!                     unsigned, unsigned);
  extern void cfg_layout_initialize_rbi (basic_block);
Index: cfgloop.h
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/cfgloop.h,v
retrieving revision 1.17
diff -c -3 -p -r1.17 cfgloop.h
*** cfgloop.h   18 Mar 2004 16:42:30 -0000      1.17
--- cfgloop.h   7 Apr 2004 06:23:41 -0000
*************** extern rtx get_iv_value (struct rtx_iv *
*** 392,397 ****
--- 392,398 ----
  extern void find_simple_exit (struct loop *, struct niter_desc *);
  extern void iv_number_of_iterations (struct loop *, rtx, rtx,
                                     struct niter_desc *);
+ extern bool is_biv (int);
  extern void iv_analysis_done (void);
 
  extern struct niter_desc *get_simple_loop_desc (struct loop *loop);
Index: cfgloopmanip.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/cfgloopmanip.c,v
retrieving revision 1.23
diff -c -3 -p -r1.23 cfgloopmanip.c
*** cfgloopmanip.c      24 Feb 2004 23:39:54 -0000      1.23
--- cfgloopmanip.c      7 Apr 2004 06:23:42 -0000
*************** static void scale_bbs_frequencies (basic
*** 50,55 ****
--- 50,65 ----
  static basic_block create_preheader (struct loop *, int);
  static void fix_irreducible_loops (basic_block);
 
+ static unsigned count_insn_in_bb (basic_block );
+ static unsigned count_insn_in_bbs (basic_block *, unsigned );
+ static void mark_modified_reg (struct loop_reg_info * , rtx);
+ static void analyze_loop_invariants (struct loop_reg_info *, 
+                                      basic_block * , int);
+ static void find_shift_pattern_in_loop (struct loop *, 
+                                         struct loop_reg_info *, 
+                                         basic_block *, int);
+ static void find_shift_giv (struct loop_reg_info * ,rtx);
+ 
  /* Splits basic block BB after INSN, returns created edge.  Updates 
loops
     and dominators.  */
  edge
*************** duplicate_loop_to_header_edge (struct lo
*** 838,843 ****
--- 848,858 ----
    basic_block new_bb, bb, first_active_latch = NULL;
    edge ae, latch_edge;
    edge spec_edges[2], new_spec_edges[2];
+ 
+   struct loop_insn_info* loop_insns_info = 0;
+   struct loop_reg_info* loop_regs_info = 0;
+   unsigned count = 0;
+ 
  #define SE_LATCH 0
  #define SE_ORIG 1
    unsigned i, j, n;
*************** duplicate_loop_to_header_edge (struct lo
*** 960,965 ****
--- 975,989 ----
    if (orig && TEST_BIT (wont_exit, 0))
      to_remove[(*n_to_remove)++] = orig;
 
+   count = count_insn_in_bbs (bbs,n);
+   loop_insns_info = xcalloc (count, sizeof (struct loop_insn_info));
+   loop_regs_info = xcalloc (max_reg_num(), sizeof (struct 
loop_reg_info));
+   RESET_LOOP_REG_INFO (loop_regs_info, max_reg_num());
+   RESET_LOOP_INSN_INFO (loop_insns_info, count);
+   analyze_loop_invariants (loop_regs_info, bbs, n);
+   iv_analysis_loop_init (loop);
+   find_shift_pattern_in_loop (loop, loop_regs_info, bbs, n);
+ 
    spec_edges[SE_ORIG] = orig;
    spec_edges[SE_LATCH] = latch_edge;
 
*************** duplicate_loop_to_header_edge (struct lo
*** 969,975 ****
        copy_loops_to (loops, orig_loops, n_orig_loops, target);
 
        /* Copy bbs.  */
!       copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop);
 
        /* Note whether the blocks and edges belong to an irreducible 
loop.  */
        if (add_irreducible_flag)
--- 993,1000 ----
        copy_loops_to (loops, orig_loops, n_orig_loops, target);
 
        /* Copy bbs.  */
!       copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
!               loop_regs_info,loop_insns_info, j, ndupl);
 
        /* Note whether the blocks and edges belong to an irreducible 
loop.  */
        if (add_irreducible_flag)
*************** duplicate_loop_to_header_edge (struct lo
*** 1064,1070 ****
    free (first_active);
 
    free (bbs);
! 
    return true;
  }
 
--- 1089,1098 ----
    free (first_active);
 
    free (bbs);
! 
!   free(loop_insns_info);
!   free(loop_regs_info);
!   iv_analysis_done ();
    return true;
  }
 
*************** loop_split_edge_with (edge e, rtx insns)
*** 1234,1236 ****
--- 1262,1488 ----
 
    return new_bb;
  }
+ 
+ 
+ /* Count the number of insns in BB.  */
+ static unsigned
+ count_insn_in_bb (basic_block bb)
+ {
+   rtx insn;
+   unsigned count = 0;
+   for (insn = ((bb)->head_) ; insn != NEXT_INSN ((bb)->end_);
+        insn = (NEXT_INSN (insn)))
+     {
+       count++;
+     }
+   return count;
+ }
+ 
+ /* Count the number of insns in BBS, where N is the number of basic 
blocks
+    in BBS. */
+ static unsigned
+ count_insn_in_bbs (basic_block *bbs, unsigned n)
+ {
+   unsigned num = 0;
+   unsigned i;
+ 
+   for (i = 0; i < n; i++)
+     { 
+       num += count_insn_in_bb (bbs[i]);
+     }
+   return num;
+ }
+ 
+ /* if REG is set inside INSN, 
+    mark REG as a modified reg. */ 
+ static void
+ mark_modified_reg (struct loop_reg_info *loop_regs_info, rtx insn)
+ {
+   rtx pat, sub;
+   int regno;
+   int i;
+ 
+   if (!INSN_P (insn))
+     return;
+   pat = PATTERN (insn);
+   if (GET_CODE (pat) == PARALLEL)
+     {
+       for (i = 0; i < XVECLEN (pat, 0); i++)
+       {
+         sub = XVECEXP (pat, 0, i);
+         if (GET_CODE (sub) != SET)
+           continue;
+         if (GET_CODE (SET_DEST (sub)) == REG)
+           {
+             regno =  REGNO (SET_DEST (sub));
+             loop_regs_info[regno].is_loop_invariant = false;
+           }
+       }
+     }
+   if (GET_CODE (pat) == SET)
+     {
+       if(GET_CODE (SET_DEST (pat)) == REG)
+       {
+         regno =  REGNO (SET_DEST (pat));
+         loop_regs_info[regno].is_loop_invariant = false;
+       }
+     }
+   return;
+ }
+ 
+ /* Find the loop's invariant. 
+    In the intialization phase all regs are marked as 
+    invariants.  */
+ static void
+ analyze_loop_invariants (struct loop_reg_info* loop_regs_info, 
+                       basic_block* bbs, int bb_num)
+ {
+   rtx insn;
+   int i;
+   basic_block  bb;
+ 
+   for (i=0;i<bb_num;i++)
+     {
+       bb = bbs[i];
+       for(insn = bb->head_; insn != NEXT_INSN ((bb)->end_);
+         insn = NEXT_INSN (insn))
+       {
+         mark_modified_reg (loop_regs_info,insn);
+       }
+     }
+ }
+ 
+ 
+ /* Find 'simple' givs in LOOP and record them in 
+    LOOP_REG_INFO data structure. 
+ 
+    a 'simple' giv is defined as follows:
+ 
+    -- it is set once in the blocks that dominate the latch of LOOP.
+    -- it is a result of a shift oparation of some biv. */ 
+ static void
+ find_shift_pattern_in_loop (struct loop* loop, 
+                           struct loop_reg_info* loop_regs_info, 
+                           basic_block* bbs, int bb_num)
+ {
+   rtx insn, sub, pat;
+   basic_block bb;
+   int i, k, regno;
+   rtx* single_set_regs;
+   int* number_times_set;
+ 
+   single_set_regs = xmalloc (max_reg_num () * sizeof (rtx));
+   number_times_set = xmalloc (max_reg_num () * sizeof (int));
+ 
+   for(i=0;i<max_reg_num ();i++)
+     {
+       number_times_set[i] = 0;
+       single_set_regs[i] = NULL_RTX;
+     }
+ 
+   /* First mark the insns in loop that are set only once.  */
+ 
+   for (i=0;i<bb_num;i++)
+     {
+       bb = bbs[i];
+       for(insn = bb->head_; insn != NEXT_INSN ((bb)->end_);
+         insn = NEXT_INSN (insn))
+       {
+         if (!INSN_P (insn))
+           continue; 
+         pat = PATTERN (insn);
+         if (GET_CODE (pat) == PARALLEL)
+           {
+             for (k = 0; k < XVECLEN (pat, 0); k++)
+               {
+                 sub = XVECEXP (pat, 0, k);
+                 if(GET_CODE (sub) != SET || !REG_P (SET_DEST (sub)))
+                   continue;
+                 regno = REGNO (SET_DEST (sub));
+                 /* Do not support givs inside PARALLEL */
+                 number_times_set[REGNO (SET_DEST (sub))] += 2;
+               }
+             continue;
+           }
+         if (GET_CODE (pat) == SET)
+           {
+             if (!REG_P (SET_DEST (pat)))
+                        continue;
+             regno = REGNO (SET_DEST (pat));
+             single_set_regs[REGNO (SET_DEST (pat))] = insn;
+             number_times_set[REGNO (SET_DEST (pat))] += 1; 
+           }
+       }
+     }
+ 
+   /* Find the simple givs. */ 
+   for (i=0 ; i < max_reg_num () ; i++)
+     {
+       if (single_set_regs[i] == NULL)
+       continue;
+ 
+       if (number_times_set[i] == 1)
+       {
+         if(just_once_each_iteration_p (loop, BLOCK_FOR_INSN 
(single_set_regs[i])))
+           find_shift_giv (loop_regs_info, single_set_regs[i]);
+       }
+     }
+ 
+   free (single_set_regs);
+   free (number_times_set);
+ }
+ 
+ 
+ /* Find out if the INSN is of the form:
+    reg = shift (biv) .
+    If INSN is indeed of that form mark reg as giv
+    and record the shift factor.  */
+ static void
+ find_shift_giv (struct loop_reg_info* loop_regs_info, rtx insn)
+ {
+   enum rtx_code code;
+   rtx set, insn1;
+   int regno;
+   rtx x;
+   int dest_regno;
+   int factor;
+   struct rtx_iv iv;
+ 
+   code = GET_CODE (insn);
+   if (code != INSN)
+     return;
+   set = single_set (insn);
+   if (!set)
+     return;
+   if (GET_CODE (SET_DEST (set)) != REG)
+     return;
+   dest_regno = REGNO (SET_DEST (set));
+   x = SET_SRC (set);
+   code = GET_CODE (x);
+   if (code != ASHIFT)
+     return;
+   x = XEXP(SET_SRC (set),0);
+   code = GET_CODE (x);
+   if (code != REG)
+     return;
+   regno = REGNO (x);
+   insn1 = iv_get_reaching_def (insn, XEXP(SET_SRC (set),0));
+   if (!iv_analyze (insn1, XEXP(SET_SRC (set),0), &iv)
+       || !is_biv (regno) 
+       || iv.first_special)
+     return;
+   x = XEXP (SET_SRC (set), 1);
+   code = GET_CODE (x);
+   if (code != CONST_INT)
+     return;
+   factor = XWINT (x,0);
+ 
+   loop_regs_info[dest_regno].iv = shift_giv;
+   loop_regs_info[dest_regno].shift_factor = factor;
+   loop_regs_info[dest_regno].biv_reg = XEXP (SET_SRC (set),0);
+ 
+   return;
+ }
+ 
+ 
+ 
Index: loop-iv.c
===================================================================
RCS file: /cvsroot/gcc/gcc/gcc/loop-iv.c,v
retrieving revision 2.7
diff -c -3 -p -r2.7 loop-iv.c
*** loop-iv.c   18 Mar 2004 16:42:31 -0000      2.7
--- loop-iv.c   7 Apr 2004 06:23:42 -0000
*************** free_simple_loop_desc (struct loop *loop
*** 2466,2468 ****
--- 2466,2479 ----
    free (desc);
    loop->aux = NULL;
  }
+ 
+ bool
+ is_biv (int regno)
+ {
+   if (!bivs)
+     return false;
+   if(!bivs[regno].analysed)
+     return false;
+   return true;
+ }
+ 



More information about the Gcc-patches mailing list