? gcc/trace_strides.c Index: gcc/Makefile.in =================================================================== RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v retrieving revision 1.903.2.153 diff -c -3 -p -r1.903.2.153 Makefile.in *** gcc/Makefile.in 8 Dec 2003 13:52:58 -0000 1.903.2.153 --- gcc/Makefile.in 11 Dec 2003 23:37:34 -0000 *************** STAGESTUFF = *$(objext) insn-flags.h ins *** 935,941 **** $(LANG_STAGESTUFF) # Defined in libgcc2.c, included only in the static library. ! LIB2FUNCS_ST = _eprintf __gcc_bcmp # Defined in libgcov.c, included only in gcov library LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta --- 935,941 ---- $(LANG_STAGESTUFF) # Defined in libgcc2.c, included only in the static library. ! LIB2FUNCS_ST = _eprintf __gcc_bcmp _pm # Defined in libgcov.c, included only in gcov library LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta *************** LIBGCC_DEPS = $(GCC_PASSES) $(LANGUAGES) *** 1199,1205 **** tsystem.h $(FPBIT) $(DPBIT) $(TPBIT) $(LIB2ADD) \ $(LIB2ADD_ST) $(LIB2ADDEH) $(LIB2ADDEHDEP) $(EXTRA_PARTS) \ $(srcdir)/config/$(LIB1ASMSRC) \ ! $(srcdir)/gcov-io.h $(srcdir)/gcov-io.c gcov-iov.h libgcov.a: libgcc.a; @true --- 1199,1206 ---- tsystem.h $(FPBIT) $(DPBIT) $(TPBIT) $(LIB2ADD) \ $(LIB2ADD_ST) $(LIB2ADDEH) $(LIB2ADDEHDEP) $(EXTRA_PARTS) \ $(srcdir)/config/$(LIB1ASMSRC) \ ! $(srcdir)/gcov-io.h $(srcdir)/gcov-io.c gcov-iov.h \ ! $(srcdir)/trace_strides.c libgcov.a: libgcc.a; @true Index: gcc/common.opt =================================================================== RCS file: /cvs/gcc/gcc/gcc/common.opt,v retrieving revision 1.14.2.13 diff -c -3 -p -r1.14.2.13 common.opt *** gcc/common.opt 6 Dec 2003 12:31:27 -0000 1.14.2.13 --- gcc/common.opt 11 Dec 2003 23:37:34 -0000 *************** fprefetch-loop-arrays *** 521,526 **** --- 521,534 ---- Common Generate prefetch instructions, if available, for arrays in loops + fprefetch-loop-strides + Common + Generate prefetch instructionws, if available, for strided loads/stores in loops + + fprefetch-stores-only + Common + Generate prefetch instructions, if available, for strided stores in loops + fprofile Common Enable basic program profiling code Index: gcc/flags.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/flags.h,v retrieving revision 1.86.2.41 diff -c -3 -p -r1.86.2.41 flags.h *** gcc/flags.h 6 Dec 2003 12:31:27 -0000 1.86.2.41 --- gcc/flags.h 11 Dec 2003 23:37:34 -0000 *************** extern int flag_move_all_movables; *** 298,303 **** --- 298,313 ---- extern int flag_prefetch_loop_arrays; + /* Nonzero enables feedback based prefetch optimization for strided + loads/stores in loops. */ + + extern int flag_prefetch_loop_strides; + + /* Nonzero enables feedback based prefetch optimization for strided + stores in loops. */ + + extern int flag_prefetch_stores_only; + /* Nonzero forces all general induction variables in loops to be strength reduced. */ Index: gcc/libgcc2.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/libgcc2.c,v retrieving revision 1.143.2.17 diff -c -3 -p -r1.143.2.17 libgcc2.c *** gcc/libgcc2.c 25 Nov 2003 02:09:49 -0000 1.143.2.17 --- gcc/libgcc2.c 11 Dec 2003 23:37:35 -0000 *************** __eprintf (const char *string, const cha *** 1475,1480 **** --- 1475,1484 ---- #endif + #ifdef L_pm + #include "trace_strides.c" + #endif + #ifdef L_clear_cache /* Clear part of an instruction cache. */ Index: gcc/loop.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/loop.c,v retrieving revision 1.410.2.28 diff -c -3 -p -r1.410.2.28 loop.c *** gcc/loop.c 25 Nov 2003 02:09:49 -0000 1.410.2.28 --- gcc/loop.c 11 Dec 2003 23:37:35 -0000 *************** Software Foundation, 59 Temple Place - S *** 71,77 **** #define SIMULTANEOUS_PREFETCHES 3 #endif #ifndef PREFETCH_BLOCK ! #define PREFETCH_BLOCK 32 #endif #ifndef HAVE_prefetch #define HAVE_prefetch 0 --- 71,77 ---- #define SIMULTANEOUS_PREFETCHES 3 #endif #ifndef PREFETCH_BLOCK ! #define PREFETCH_BLOCK 128 #endif #ifndef HAVE_prefetch #define HAVE_prefetch 0 *************** Software Foundation, 59 Temple Place - S *** 85,93 **** #define MAX_PREFETCHES 100 /* The number of prefetch blocks that are beneficial to fetch at once before a loop with a known (and low) iteration count. */ ! #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6 ! /* For very tiny loops it is not worthwhile to prefetch even before the loop, ! since it is likely that the data are already in the cache. */ #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2 /* Parameterize some prefetch heuristics so they can be turned on and off --- 85,94 ---- #define MAX_PREFETCHES 100 /* The number of prefetch blocks that are beneficial to fetch at once before a loop with a known (and low) iteration count. */ ! #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 3 ! /* For very tiny loops it is not worthwhile to prefetch even before ! the loop, since it is likely that the data are already in the ! cache. */ #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2 /* Parameterize some prefetch heuristics so they can be turned on and off *************** Software Foundation, 59 Temple Place - S *** 96,108 **** /* Prefetch is worthwhile only when loads/stores are dense. */ #ifndef PREFETCH_ONLY_DENSE_MEM ! #define PREFETCH_ONLY_DENSE_MEM 1 #endif ! /* Define what we mean by "dense" loads and stores; This value divided by 256 ! is the minimum percentage of memory references that worth prefetching. */ #ifndef PREFETCH_DENSE_MEM #define PREFETCH_DENSE_MEM 220 #endif /* Do not prefetch for a loop whose iteration count is known to be low. */ --- 97,111 ---- /* Prefetch is worthwhile only when loads/stores are dense. */ #ifndef PREFETCH_ONLY_DENSE_MEM ! #define PREFETCH_ONLY_DENSE_MEM 0 #endif ! /* Define what we mean by "dense" loads and stores; This value divided ! by 256 is the minimum percentage of memory references that worth ! prefetching. */ #ifndef PREFETCH_DENSE_MEM #define PREFETCH_DENSE_MEM 220 + #define PREFETCH_DENSE_MEM_PM 110 #endif /* Do not prefetch for a loop whose iteration count is known to be low. */ *************** Software Foundation, 59 Temple Place - S *** 123,129 **** /* Do not prefetch accesses with an extreme stride. */ #ifndef PREFETCH_NO_EXTREME_STRIDE ! #define PREFETCH_NO_EXTREME_STRIDE 1 #endif /* Define what we mean by an "extreme" stride. */ --- 126,132 ---- /* Do not prefetch accesses with an extreme stride. */ #ifndef PREFETCH_NO_EXTREME_STRIDE ! #define PREFETCH_NO_EXTREME_STRIDE 0 #endif /* Define what we mean by an "extreme" stride. */ *************** Software Foundation, 59 Temple Place - S *** 134,140 **** /* Define a limit to how far apart indices can be and still be merged into a single prefetch. */ #ifndef PREFETCH_EXTREME_DIFFERENCE ! #define PREFETCH_EXTREME_DIFFERENCE 4096 #endif /* Issue prefetch instructions before the loop to fetch data to be used --- 137,143 ---- /* Define a limit to how far apart indices can be and still be merged into a single prefetch. */ #ifndef PREFETCH_EXTREME_DIFFERENCE ! #define PREFETCH_EXTREME_DIFFERENCE 2048 #endif /* Issue prefetch instructions before the loop to fetch data to be used *************** Software Foundation, 59 Temple Place - S *** 145,151 **** /* Do not handle reversed order prefetches (negative stride). */ #ifndef PREFETCH_NO_REVERSE_ORDER ! #define PREFETCH_NO_REVERSE_ORDER 1 #endif /* Prefetch even if the GIV is in conditional code. */ --- 148,154 ---- /* Do not handle reversed order prefetches (negative stride). */ #ifndef PREFETCH_NO_REVERSE_ORDER ! #define PREFETCH_NO_REVERSE_ORDER 0 #endif /* Prefetch even if the GIV is in conditional code. */ *************** Software Foundation, 59 Temple Place - S *** 153,158 **** --- 156,232 ---- #define PREFETCH_CONDITIONAL 1 #endif + #define MAX_CANDIDATE_PM_INSNS 3000 + + /* The higher the number below, the more strict we are i.e. fewer + prefetch insns will pass. */ + + #define CORRECT_RATIO_THRESHOLD 0.24 + + #define DENSITY_THRESHOLD 0.24 + + /* The following number indicates the percentage of total issued loads for + profiling. E.g. if 0.2, greater than 20% of the total issued loads for + profiling. */ + + #define GLOBAL_DENSITY_THRESHOLD 0.05 + + /* Global variables for prefetching. */ + + FILE *profile_info_file; + char line_buf[120]; + int do_once = 1; + int strides_pm_info_available = 0; + int my_fn_id = 0; + int pm_cand_cnt = 0; + int name_idx, name_length; + int tf_fn_id, pref_insn_id, mem_insn_id, num_invoked, num_correct; + int pm_idx; + int candidate_mem_insns[MAX_CANDIDATE_PM_INSNS]; + int correct_count_insns[MAX_CANDIDATE_PM_INSNS]; + int pm_num_invoked[MAX_CANDIDATE_PM_INSNS]; + int this_mem_insn; + rtx pm_init_call; + + /* Function local density threshold comparator */ + + #define LT_LDT(a,b,c) \ + ( ((((a * 1.0 / b *1.0)) < CORRECT_RATIO_THRESHOLD) && (a < b)) \ + || (((b * 1.0)/(c *1.0)) < DENSITY_THRESHOLD)) + + /* If either b or c are zero disqualify insn. If a > b and b/c > + density then qualif. */ + + #define LT_ACCURACY_THRESHOLD(a,b,c) \ + (!b || !c || LT_LDT(a,b,c)) + + /* Global density threshold. Return true if a/b is less than the + GLOBAL_DENSITY_THRESHOLD. */ + + #define LT_GDT(a,b) ( ((a * 1.0) / (b * 1.0)) < GLOBAL_DENSITY_THRESHOLD ) + + /* Following code uses start_sequence and end_sequence .. CARE must be + taken to observe this fact. 'addr' is an rtx register that will + hold the address of the issued prefetch. 'insn' is the prefetch + insn that will be evaluated for effeciency. It is the insn that + prefetches addr. The before_insn is where the generated call to the + instrumentation routine will be inserted. */ + + #define emit_library_call_pm_collect_stats_before(fn_id,addr,insn,before_insn,ref_insn) \ + do{ \ + start_sequence(); \ + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, \ + "__pm_collect_stats"), \ + LCT_ALWAYS_RETURN,VOIDmode,4,GEN_INT (fn_id), \ + Pmode, addr, \ + Pmode,GEN_INT (INSN_UID(insn)),Pmode, \ + GEN_INT (INSN_UID(ref_insn)),Pmode); \ + pm_init_call = get_insns(); \ + end_sequence (); \ + emit_insn_before (pm_init_call,before_insn); \ + } while(0) + + #define LOOP_REG_LIFETIME(LOOP, REGNO) \ ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO))) *************** struct check_store_data *** 3622,3628 **** }; static void check_store (rtx, rtx, void *); ! static void emit_prefetch_instructions (struct loop *); static int rtx_equal_for_prefetch_p (rtx, rtx); /* Set mem_write when mem_address is found. Used as callback to --- 3696,3702 ---- }; static void check_store (rtx, rtx, void *); ! static void emit_prefetch_instructions (struct loop *, int); static int rtx_equal_for_prefetch_p (rtx, rtx); /* Set mem_write when mem_address is found. Used as callback to *************** remove_constant_addition (rtx *x) *** 3788,3794 **** controlled by defined symbols that can be overridden for each target. */ static void ! emit_prefetch_instructions (struct loop *loop) { int num_prefetches = 0; int num_real_prefetches = 0; --- 3862,3868 ---- controlled by defined symbols that can be overridden for each target. */ static void ! emit_prefetch_instructions (struct loop *loop, int flags) { int num_prefetches = 0; int num_real_prefetches = 0; *************** emit_prefetch_instructions (struct loop *** 3802,3812 **** struct prefetch_info info[MAX_PREFETCHES]; struct loop_ivs *ivs = LOOP_IVS (loop); if (!HAVE_prefetch) return; ! /* Consider only loops w/o calls. When a call is done, the loop is probably ! slow enough to read the memory. */ if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call) { if (loop_dump_stream) --- 3876,3899 ---- struct prefetch_info info[MAX_PREFETCHES]; struct loop_ivs *ivs = LOOP_IVS (loop); + /* In order to preserve previous prefetching under + -fprefetch-loop-arrays I am adding these variables to replace + constant defines. This is not the best of things to do!! However + it is an easy hack. */ + + static int announce_once = 1; + int prefetch_dense_mem; + + if (flags & LOOP_PREFETCH_STRIDES) + prefetch_dense_mem = PREFETCH_DENSE_MEM_PM; + else + prefetch_dense_mem = PREFETCH_DENSE_MEM; + if (!HAVE_prefetch) return; ! /* Consider only loops without calls. When a call is done, the loop is ! probably slow enough to read the memory. */ if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call) { if (loop_dump_stream) *************** emit_prefetch_instructions (struct loop *** 4066,4071 **** --- 4153,4298 ---- if (info[i].write) num_real_write_prefetches += info[i].prefetch_in_loop; } + + if (flags & LOOP_PREFETCH_STRIDES) + { + static int total_collect = 0; + static int global_collect; + int pm_cand_idx; + name_length = strlen (cfun->name); + my_fn_id = 0; + + for (name_idx = 0; name_idx < (name_length-1); name_idx++) + my_fn_id += (unsigned) (cfun->name[name_idx]); + + this_mem_insn = INSN_UID (info[i].giv->insn); + if (do_once) + { + profile_info_file = fopen("TRACE_FILE","r"); + + if (profile_info_file) + { + int i; + + for (i = 0; i < MAX_CANDIDATE_PM_INSNS; i++) + { + pm_num_invoked[i] = 0; + correct_count_insns[i] = 0; + } + + total_collect = 0; + global_collect = 0; + pm_cand_cnt = 0; + strides_pm_info_available = 1; + + /* Read profiled information about insns that can be + prefetched. The format is as appears in + trace_strides.c __pm_print_trace_strides: fprintf + (TRACE_FILE, "fn:%d insn:%d ref_ld:%d invoked:%d + correct:%d\n"... + */ + + while ((fscanf (profile_info_file, "%[^\n]\n", line_buf) + != EOF) + && (pm_cand_cnt < MAX_CANDIDATE_PM_INSNS )) + { + if (sscanf (line_buf, + "fn:%d insn:%d ref_ld:%d invoked:%d correct:%d", + &tf_fn_id, &pref_insn_id, &mem_insn_id, + &num_invoked, &num_correct) == 5) + { + /* Valid format use it, else ignore the line */ + + if (tf_fn_id == my_fn_id) + { + + /* find if we have an entry */ + for (pm_cand_idx = 0; + pm_cand_idx < pm_cand_cnt; + pm_cand_idx++) + { + if (candidate_mem_insns[pm_cand_idx] == + mem_insn_id) + { + if (pref_insn_id == mem_insn_id) + pm_num_invoked[pm_cand_idx] = + num_invoked; + correct_count_insns[pm_cand_idx] += + num_correct; + break; + } + } + + /* new entry, add it */ + if (pm_cand_idx == pm_cand_cnt) + { + pm_num_invoked[pm_cand_cnt] = num_invoked; + correct_count_insns[pm_cand_cnt] = + num_correct; + candidate_mem_insns[pm_cand_cnt++] = + mem_insn_id; + } + total_collect += num_invoked; + } + global_collect +=num_invoked; + } + } + if (pm_cand_cnt >= MAX_CANDIDATE_PM_INSNS) + printf ("Warning number of lines in TRACE_FILES exceed" + " the limit set by MAX_CANDIDATE_PM_INSNS.\n"); + fclose (profile_info_file); + } + } + + if (strides_pm_info_available) + { + int found_in_candidates = 0; + + if (announce_once) + { + printf ("PROFILE INFORMATION AVAILABLE AND WILL BE " + "USED.\n"); + announce_once = 0; + } + + for (pm_idx = 0 ; pm_idx < pm_cand_cnt ; pm_idx++) + { + if (candidate_mem_insns[pm_idx] == this_mem_insn) + { + found_in_candidates = 1; + break; + } + } + + if (((!info[i].write) && flag_prefetch_stores_only) + || (!found_in_candidates) + || LT_GDT (total_collect, global_collect) + || LT_ACCURACY_THRESHOLD (correct_count_insns[pm_idx], + pm_num_invoked[pm_idx], + total_collect)) + + /* not in candidate data base then ignore it */ + { + info[i].prefetch_in_loop = 0; + info[i].prefetch_before_loop = 0; + if (loop_dump_stream) + fprintf (loop_dump_stream, + "Prefetch: ignoring prefetch for insn %d, not " + "candidate as per pm_info", + this_mem_insn); + } + } + else + { + if (announce_once) + { + printf ("TRACE_FILE not found MEM PROFILING CODE WILL" + " BE GENERATED.\n"); + announce_once = 0; + } + } + } + } /* Determine how many iterations ahead to prefetch within the loop, based *************** emit_prefetch_instructions (struct loop *** 4154,4159 **** --- 4381,4390 ---- rtx before_insn = info[i].giv->insn; rtx prev_insn = PREV_INSN (info[i].giv->insn); rtx seq; + rtx mem_addr = NULL_RTX; + + if (flags & LOOP_PREFETCH_STRIDES) + mem_addr = copy_rtx (loc); /* We can save some effort by offsetting the address on architectures with offsettable memory references. */ *************** emit_prefetch_instructions (struct loop *** 4179,4184 **** --- 4410,4427 ---- end_sequence (); emit_insn_before (seq, before_insn); + if ((flags & LOOP_PREFETCH_STRIDES) + && (!strides_pm_info_available)) + { + emit_library_call_pm_collect_stats_before (my_fn_id, loc, seq, + before_insn, + info[i].giv->insn); + emit_library_call_pm_collect_stats_before (my_fn_id, mem_addr, + info[i].giv->insn, + before_insn, + info[i].giv->insn); + } + /* Check all insns emitted and record the new GIV information. */ insn = NEXT_INSN (prev_insn); *************** emit_prefetch_instructions (struct loop *** 4198,4209 **** to need. */ for (y = 0; y < info[i].prefetch_before_loop; y++) { rtx reg = gen_reg_rtx (Pmode); rtx loop_start = loop->start; rtx init_val = info[i].class->initial_value; rtx add_val = simplify_gen_binary (PLUS, Pmode, info[i].giv->add_val, ! GEN_INT (y * PREFETCH_BLOCK)); /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a non-constant INIT_VAL to have the same mode as REG, which --- 4441,4453 ---- to need. */ for (y = 0; y < info[i].prefetch_before_loop; y++) { + rtx emitted_pref; rtx reg = gen_reg_rtx (Pmode); rtx loop_start = loop->start; rtx init_val = info[i].class->initial_value; rtx add_val = simplify_gen_binary (PLUS, Pmode, info[i].giv->add_val, ! GEN_INT (y * PREFETCH_BLOCK)); /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a non-constant INIT_VAL to have the same mode as REG, which *************** emit_prefetch_instructions (struct loop *** 4221,4229 **** loop_iv_add_mult_emit_before (loop, init_val, info[i].giv->mult_val, add_val, reg, 0, loop_start); ! emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write), ! GEN_INT (3)), ! loop_start); } } } --- 4465,4489 ---- loop_iv_add_mult_emit_before (loop, init_val, info[i].giv->mult_val, add_val, reg, 0, loop_start); ! ! emitted_pref = emit_insn_before (gen_prefetch (reg, ! GEN_INT (info[i].write), ! GEN_INT (3)), ! loop_start); ! ! if ((flags & LOOP_PREFETCH_STRIDES) ! && (!strides_pm_info_available)) ! { ! emit_library_call_pm_collect_stats_before (my_fn_id, reg, ! emitted_pref, ! loop_start, ! info[i].giv->insn); ! emit_library_call_pm_collect_stats_before (my_fn_id, ! init_val, ! info[i].giv->insn, ! loop_start, ! info[i].giv->insn); ! } } } } *************** strength_reduce (struct loop *loop, int *** 5101,5108 **** loop_iterations (loop); #ifdef HAVE_prefetch ! if (flags & LOOP_PREFETCH) ! emit_prefetch_instructions (loop); #endif /* Now for each giv for which we still don't know whether or not it is --- 5361,5368 ---- loop_iterations (loop); #ifdef HAVE_prefetch ! if ((flags & LOOP_PREFETCH) || (flags & LOOP_PREFETCH_STRIDES)) ! emit_prefetch_instructions (loop, flags); #endif /* Now for each giv for which we still don't know whether or not it is Index: gcc/loop.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/loop.h,v retrieving revision 1.61.2.8 diff -c -3 -p -r1.61.2.8 loop.h *** gcc/loop.h 6 Oct 2003 17:36:36 -0000 1.61.2.8 --- gcc/loop.h 11 Dec 2003 23:37:35 -0000 *************** Software Foundation, 59 Temple Place - S *** 29,34 **** --- 29,35 ---- #define LOOP_BCT 2 #define LOOP_PREFETCH 4 #define LOOP_AUTO_UNROLL 8 + #define LOOP_PREFETCH_STRIDES 16 /* Get the loop info pointer of a loop. */ #define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux) Index: gcc/opts.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/opts.c,v retrieving revision 1.31.2.21 diff -c -3 -p -r1.31.2.21 opts.c *** gcc/opts.c 6 Dec 2003 12:31:27 -0000 1.31.2.21 --- gcc/opts.c 11 Dec 2003 23:37:35 -0000 *************** common_handle_option (size_t scode, cons *** 1187,1192 **** --- 1187,1200 ---- flag_prefetch_loop_arrays = value; break; + case OPT_fprefetch_loop_strides: + flag_prefetch_loop_strides = value; + break; + + case OPT_fprefetch_stores_only: + flag_prefetch_stores_only = value; + break; + case OPT_fprofile: profile_flag = value; break; Index: gcc/toplev.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/toplev.c,v retrieving revision 1.654.2.82 diff -c -3 -p -r1.654.2.82 toplev.c *** gcc/toplev.c 6 Dec 2003 12:31:27 -0000 1.654.2.82 --- gcc/toplev.c 11 Dec 2003 23:37:35 -0000 *************** int flag_unswitch_loops; *** 561,566 **** --- 561,576 ---- int flag_prefetch_loop_arrays; + /* Nonzero enables feedback based prefetch optimization for strided + loads/stores in loops. */ + + int flag_prefetch_loop_strides; + + /* Nonzero enables feedback based prefetch optimization for strided + stores in loops. */ + + int flag_prefetch_stores_only; + /* Nonzero forces all invariant computations in loops to be moved outside the loop. */ *************** static const lang_independent_options f_ *** 1083,1088 **** --- 1093,1100 ---- {"peel-loops", &flag_peel_loops, 1 }, {"unswitch-loops", &flag_unswitch_loops, 1 }, {"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1 }, + {"prefetch-loop-strides", &flag_prefetch_loop_strides, 1 }, + {"prefetch-stores-only", &flag_prefetch_stores_only, 1 }, {"move-all-movables", &flag_move_all_movables, 1 }, {"reduce-all-givs", &flag_reduce_all_givs, 1 }, {"writable-strings", &flag_writable_strings, 1 }, *************** rest_of_handle_gcse (tree decl, rtx insn *** 3016,3022 **** static void rest_of_handle_loop_optimize (tree decl, rtx insns) { ! int do_unroll, do_prefetch; timevar_push (TV_LOOP); delete_dead_jumptables (); --- 3028,3034 ---- static void rest_of_handle_loop_optimize (tree decl, rtx insns) { ! int do_unroll, do_prefetch, do_prefetch_strides; timevar_push (TV_LOOP); delete_dead_jumptables (); *************** rest_of_handle_loop_optimize (tree decl, *** 3031,3036 **** --- 3043,3050 ---- else do_unroll = flag_old_unroll_loops ? LOOP_UNROLL : LOOP_AUTO_UNROLL; do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0; + do_prefetch_strides = flag_prefetch_loop_strides ? LOOP_PREFETCH_STRIDES + : 0; if (flag_rerun_loop_opt) { *************** rest_of_handle_loop_optimize (tree decl, *** 3051,3057 **** reg_scan (insns, max_reg_num (), 1); } cleanup_barriers (); ! loop_optimize (insns, rtl_dump_file, do_unroll | LOOP_BCT | do_prefetch); /* Loop can create trivially dead instructions. */ delete_trivially_dead_insns (insns, max_reg_num ()); --- 3065,3072 ---- reg_scan (insns, max_reg_num (), 1); } cleanup_barriers (); ! loop_optimize (insns, rtl_dump_file, ! do_unroll | LOOP_BCT | do_prefetch | do_prefetch_strides); /* Loop can create trivially dead instructions. */ delete_trivially_dead_insns (insns, max_reg_num ());