This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Fw: [PATCH] Unrolling addressing optimization
Hello,
> Attached is the patch updated for the current mainline.
> The patch is adjusted to Zdenek Dvorak patch for handling
> reg_equal/equiv notes.
> (http://gcc.gnu.org/ml/gcc-patches/2004-04/msg01740.html)
>
> Bootstrapped & regression tests on POWER4.
starts to look better. There are lot of coding style issues (no spaces
around operators, wrong indentation, etc.) Just few formal changes:
> Index: cfglayout.c
> ===================================================================
> RCS file: /cvsroot/gcc/gcc/gcc/cfglayout.c,v
> retrieving revision 1.59
> diff -c -3 -p -r1.59 cfglayout.c
> *** cfglayout.c 25 May 2004 12:54:53 -0000 1.59
> --- cfglayout.c 8 Jun 2004 11:00:41 -0000
> *************** end:
> *** 1248,1254 ****
> void
> copy_bbs (basic_block *bbs, unsigned n, basic_block *new_bbs,
> edge *edges, unsigned n_edges, edge *new_edges,
> ! struct loop *base)
> {
> unsigned i, j;
> basic_block bb, new_bb, dom_bb;
> --- 1248,1257 ----
> void
> copy_bbs (basic_block *bbs, unsigned n, basic_block *new_bbs,
> edge *edges, unsigned n_edges, edge *new_edges,
> ! struct loop *base,
> ! opt_function1 opt_func, void *loop_insn_data,
coding style.
> ! unsigned iter, unsigned ndupl, bool modify_original_loop,
> ! bool is_original_loop_in_latch, bool is_unrolling, rtx *reg_map)
Remove all these arguments and pass them to opt_func via void *data
pointer, as done in all other callbacks in gcc.
> {
> unsigned i, j;
> basic_block bb, new_bb, dom_bb;
> *************** copy_bbs (basic_block *bbs, unsigned n,
> *** 1260,1265 ****
> --- 1263,1280 ----
> /* Duplicate. */
> bb = bbs[i];
> new_bb = new_bbs[i] = duplicate_block (bb, NULL);
> + /* Eliminate the use of induction variables in address access. */
This comment is not appropriate here. This is a generic function, not
specific to your optimization.
> + if (opt_func != NULL
============
> + && loop_insn_data != NULL)
> + {
> + if (just_once_each_iteration_p (base, bbs[i]))
> + {
> + new_bb = (*opt_func) (loop_insn_data, i, iter+1, &new_bbs[i],
> + &bbs[i], ndupl-iter, modify_original_loop,
> + is_original_loop_in_latch, is_unrolling,
> + base, reg_map);
> + }
> + }
==============
this all should be inside the opt_func.
> bb->rbi->duplicated = 1;
> /* Add to loop. */
> add_bb_to_loop (new_bb, bb->loop_father->copy);
> Index: cfglayout.h
> ===================================================================
> RCS file: /cvsroot/gcc/gcc/gcc/cfglayout.h,v
> retrieving revision 1.14
> diff -c -3 -p -r1.14 cfglayout.h
> *** cfglayout.h 3 Jun 2004 15:01:08 -0000 1.14
> --- cfglayout.h 8 Jun 2004 11:00:41 -0000
> ***************
> *** 18,23 ****
> --- 18,68 ----
> Software Foundation, 59 Temple Place - Suite 330, Boston, MA
> 02111-1307, USA. */
>
> + typedef basic_block (*opt_function1) (void *, int, unsigned,
> + basic_block *, basic_block *,
> + unsigned, bool, bool, bool,
> + struct loop *, rtx *);
This is exactly the same as opt_function.
> + enum side {store,load,unknown};
> +
> + /* This structure is used to hold information about the insns
> + in a loop and accessed via insn's serial number in the loop
> + insns chain. */
> +
> + struct loop_insn_info
> + {
> + /* is_splittable_insn is true if the addressing
> + optimization can be applied on this insn. */
> + bool is_splittable_insn;
> + enum side splitt_side;
> + rtx new_addr;
> + enum machine_mode mem_mode;
> + enum rtx_code extend;
> + enum machine_mode extend_mode;
> + rtx stride;
> + rtx iv_reg;
> + rtx *orig_insn;
> + };
> +
> + #define RESET_LOOP_INSN_INFO(INSN_TABLE, SIZE_OF_TABLE) \
> + { \
> + unsigned i; \
> + for (i=0 ;i < (SIZE_OF_TABLE); i++) \
> + { \
> + (INSN_TABLE)[i].is_splittable_insn = false; \
> + (INSN_TABLE)[i].new_addr = NULL_RTX; \
> + (INSN_TABLE)[i].splitt_side = unknown; \
> + (INSN_TABLE)[i].mem_mode = VOIDmode; \
> + (INSN_TABLE)[i].extend = NIL; \
> + (INSN_TABLE)[i].extend_mode = VOIDmode; \
> + (INSN_TABLE)[i].iv_reg = NULL_RTX; \
> + (INSN_TABLE)[i].stride = NULL_RTX; \
> + (INSN_TABLE)[i].orig_insn = NULL; \
> + } \
> + } \
> +
> +
> +
These are specific to your optimization, and therefore they should not
be declared in a header, but just in the file where you define the
functions that use them.
> #ifndef GCC_CFGLAYOUT_H
> #define GCC_CFGLAYOUT_H
and definitely you should not place anything in the headers before this
#ifndef.
> *************** extern void insn_locators_initialize (vo
> *** 31,37 ****
> extern void reemit_insn_block_notes (void);
> extern bool can_copy_bbs_p (basic_block *, unsigned);
> extern void copy_bbs (basic_block *, unsigned, basic_block *,
> ! edge *, unsigned, edge *, struct loop *);
> extern bool scan_ahead_for_unlikely_executed_note (rtx);
> extern rtx duplicate_insn_chain (rtx, rtx);
>
> --- 76,84 ----
> extern void reemit_insn_block_notes (void);
> extern bool can_copy_bbs_p (basic_block *, unsigned);
> extern void copy_bbs (basic_block *, unsigned, basic_block *,
> ! edge *, unsigned, edge *, struct loop *,
> ! opt_function1, void *, unsigned, unsigned,
> ! bool, bool, bool, rtx *);
> extern bool scan_ahead_for_unlikely_executed_note (rtx);
> extern rtx duplicate_insn_chain (rtx, rtx);
>
> Index: cfgloop.h
> ===================================================================
> RCS file: /cvsroot/gcc/gcc/gcc/cfgloop.h,v
> retrieving revision 1.19
> diff -c -3 -p -r1.19 cfgloop.h
> *** cfgloop.h 3 Jun 2004 15:01:08 -0000 1.19
> --- cfgloop.h 8 Jun 2004 11:00:41 -0000
> *************** along with GCC; see the file COPYING. I
> *** 19,24 ****
> --- 19,31 ----
> Software Foundation, 59 Temple Place - Suite 330, Boston, MA
> 02111-1307, USA. */
>
> +
> + typedef basic_block (*opt_function) (void *, int, unsigned,
> + basic_block *, basic_block *,
> + unsigned, bool, bool, bool,
> + struct loop *, rtx *);
> +
> +
> #ifndef GCC_CFGLOOP_H
> #define GCC_CFGLOOP_H
>
> *************** extern bool can_duplicate_loop_p (struct
> *** 297,307 ****
>
> extern int duplicate_loop_to_header_edge (struct loop *, edge, struct loops *,
> unsigned, sbitmap, edge, edge *,
> ! unsigned *, int);
> extern struct loop *loopify (struct loops *, edge, edge, basic_block);
> extern void unloop (struct loops *, struct loop *);
> extern bool remove_path (struct loops *, edge);
> extern edge split_loop_bb (basic_block, rtx);
>
> /* Induction variable analysis. */
>
> --- 304,320 ----
>
> extern int duplicate_loop_to_header_edge (struct loop *, edge, struct loops *,
> unsigned, sbitmap, edge, edge *,
> ! unsigned *, int,
> ! opt_function, void *, bool, bool,
> ! bool, rtx *);
> extern struct loop *loopify (struct loops *, edge, edge, basic_block);
> extern void unloop (struct loops *, struct loop *);
> extern bool remove_path (struct loops *, edge);
> extern edge split_loop_bb (basic_block, rtx);
> + extern basic_block split_ivs_during_unrolling (void *, int, unsigned,
> + basic_block *, basic_block *,
> + unsigned, bool, bool, bool,
> + struct loop *, rtx *);
It should not be neccessary to export split_ivs_during_unrolling.
> /* Induction variable analysis. */
>
> Index: cfgloopmanip.c
> ===================================================================
> RCS file: /cvsroot/gcc/gcc/gcc/cfgloopmanip.c,v
> retrieving revision 1.26
> diff -c -3 -p -r1.26 cfgloopmanip.c
> *** cfgloopmanip.c 13 May 2004 06:39:32 -0000 1.26
> --- cfgloopmanip.c 8 Jun 2004 11:00:42 -0000
> *************** Software Foundation, 59 Temple Place - S
> *** 28,33 ****
> --- 28,34 ----
> #include "cfgloop.h"
> #include "cfglayout.h"
> #include "output.h"
> + #include "expr.h"
>
> static struct loop * duplicate_loop (struct loops *, struct loop *,
> struct loop *);
> *************** static void scale_bbs_frequencies (basic
> *** 50,55 ****
> --- 51,68 ----
> static basic_block create_preheader (struct loop *, int);
> static void fix_irreducible_loops (basic_block);
>
> + static rtx split_insn_iv (struct loop_insn_info *, int, rtx *, bool, struct loop *,
> + rtx *);
> + static rtx update_splitted_insn (struct loop_insn_info *, int, int, rtx *, bool, unsigned);
> + static bool is_valid_expression (struct loop_insn_info *,
> + rtx, int, basic_block *, int);
> + static bool can_split_insn (struct loop_insn_info *, rtx *, int, basic_block *, int);
> + void gen_new_addr (struct loop_insn_info *, int, rtx *);
> + static rtx* get_original_insn (basic_block *, int);
> + rtx emit_exp (rtx);
> +
> +
> +
These functions are specific to your optimization and only applicable for
unrolling, so I believe you should move them to loop-unroll.c.
> Index: loop-iv.c
> ===================================================================
> RCS file: /cvsroot/gcc/gcc/gcc/loop-iv.c,v
> retrieving revision 2.8
> diff -c -3 -p -r2.8 loop-iv.c
> *** loop-iv.c 19 May 2004 17:53:45 -0000 2.8
> --- loop-iv.c 8 Jun 2004 11:00:43 -0000
> *************** simple_set_p (rtx lhs, rtx rhs)
> *** 222,227 ****
> --- 222,228 ----
> case PLUS:
> case MINUS:
> case MULT:
> + case ASHIFT:
> op0 = XEXP (rhs, 0);
> op1 = XEXP (rhs, 1);
>
> *************** simple_set_p (rtx lhs, rtx rhs)
> *** 238,243 ****
> --- 239,248 ----
> && !CONSTANT_P (op1))
> return false;
>
> + if (GET_CODE (rhs) == ASHIFT
> + && CONSTANT_P (op0))
> + return false;
> +
> return true;
>
> default:
> *************** iv_mult (struct rtx_iv *iv, rtx mby)
> *** 589,594 ****
> --- 594,623 ----
> return true;
> }
>
> + static bool
> + iv_shift (struct rtx_iv *iv, rtx mby)
> + {
> + enum machine_mode mode = iv->extend_mode;
> +
> + if (GET_MODE (mby) != VOIDmode
> + && GET_MODE (mby) != mode)
> + return false;
> +
> + if (iv->extend == NIL)
> + {
> + iv->base = simplify_gen_binary (ASHIFT, mode, iv->base, mby);
> + iv->step = simplify_gen_binary (ASHIFT, mode, iv->step, mby);
> + }
> + else
> + {
> + iv->delta = simplify_gen_binary (ASHIFT, mode, iv->delta, mby);
> + iv->mult = simplify_gen_binary (ASHIFT, mode, iv->mult, mby);
> + }
> +
> + return true;
> + }
> +
> +
> /* The recursive part of get_biv_step. Gets the value of the single value
> defined in INSN wrto initial value of REG inside loop, in shape described
> at get_biv_step. */
> *************** iv_analyze (rtx insn, rtx def, struct rt
> *** 1032,1037 ****
> --- 1061,1073 ----
> mby = tmp;
> }
> break;
> +
> + case ASHIFT:
> + if (CONSTANT_P (XEXP (rhs, 0)))
> + abort ();
> + op0 = XEXP (rhs, 0);
> + mby = XEXP (rhs, 1);
> + break;
>
> default:
> abort ();
> *************** iv_analyze (rtx insn, rtx def, struct rt
> *** 1088,1093 ****
> --- 1124,1134 ----
> goto end;
> break;
>
> + case ASHIFT:
> + if (!iv_shift (&iv0, mby))
> + goto end;
> + break;
> +
> default:
> break;
> }
Please submit the loop-iv.c part as a separate patch, it is completely independent
on the rest and useful by itself.
Zdenek