]> gcc.gnu.org Git - gcc.git/blob - gcc/tree-parloops.c
re PR tree-optimization/57185 (ICE: Segmentation fault in add_field_for_reduction...
[gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree-flow.h"
26 #include "cfgloop.h"
27 #include "tree-data-ref.h"
28 #include "tree-scalar-evolution.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "tree-vectorizer.h"
33 #include "tree-hasher.h"
34
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
41
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
52
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
58
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
60 /*
61 Reduction handling:
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
64
65
66 parloop
67 {
68 int sum=1;
69
70 for (i = 0; i < N; i++)
71 {
72 x[i] = i + 3;
73 sum+=x[i];
74 }
75 }
76
77 gimple-like code:
78 header_bb:
79
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
88
89
90 exit_bb:
91
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
94
95
96 after reduction transformation (only relevant parts):
97
98 parloop
99 {
100
101 ....
102
103
104 # Storing the initial value given by the user. #
105
106 .paral_data_store.32.sum.27 = 1;
107
108 #pragma omp parallel num_threads(4)
109
110 #pragma omp for schedule(static)
111
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
115
116 # sum.27_29 = PHI <sum.27_11, 0>
117
118 sum.27_11 = D.1827_8 + sum.27_29;
119
120 GIMPLE_OMP_CONTINUE
121
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
124 GIMPLE_OMP_RETURN
125
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
128
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
133
134 GIMPLE_OMP_RETURN
135
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
139 shared struct. #
140
141
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
145
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
149
150 ...
151
152 }
153
154 */
155
156 /* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158 #define MIN_PER_THREAD 100
159
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
163 {
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init; /* reduction initialization value. */
174 gimple new_phi; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
178 };
179
180 /* Reduction info hashtable helpers. */
181
182 struct reduction_hasher : typed_free_remove <reduction_info>
183 {
184 typedef reduction_info value_type;
185 typedef reduction_info compare_type;
186 static inline hashval_t hash (const value_type *);
187 static inline bool equal (const value_type *, const compare_type *);
188 };
189
190 /* Equality and hash functions for hashtab code. */
191
192 inline bool
193 reduction_hasher::equal (const value_type *a, const compare_type *b)
194 {
195 return (a->reduc_phi == b->reduc_phi);
196 }
197
198 inline hashval_t
199 reduction_hasher::hash (const value_type *a)
200 {
201 return a->reduc_version;
202 }
203
204 typedef hash_table <reduction_hasher> reduction_info_table_type;
205
206
207 static struct reduction_info *
208 reduction_phi (reduction_info_table_type reduction_list, gimple phi)
209 {
210 struct reduction_info tmpred, *red;
211
212 if (reduction_list.elements () == 0 || phi == NULL)
213 return NULL;
214
215 tmpred.reduc_phi = phi;
216 tmpred.reduc_version = gimple_uid (phi);
217 red = reduction_list.find (&tmpred);
218
219 return red;
220 }
221
222 /* Element of hashtable of names to copy. */
223
224 struct name_to_copy_elt
225 {
226 unsigned version; /* The version of the name to copy. */
227 tree new_name; /* The new name used in the copy. */
228 tree field; /* The field of the structure used to pass the
229 value. */
230 };
231
232 /* Name copies hashtable helpers. */
233
234 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
235 {
236 typedef name_to_copy_elt value_type;
237 typedef name_to_copy_elt compare_type;
238 static inline hashval_t hash (const value_type *);
239 static inline bool equal (const value_type *, const compare_type *);
240 };
241
242 /* Equality and hash functions for hashtab code. */
243
244 inline bool
245 name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
246 {
247 return a->version == b->version;
248 }
249
250 inline hashval_t
251 name_to_copy_hasher::hash (const value_type *a)
252 {
253 return (hashval_t) a->version;
254 }
255
256 typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
257
258 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
259 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
260 represents the denominator for every element in the matrix. */
261 typedef struct lambda_trans_matrix_s
262 {
263 lambda_matrix matrix;
264 int rowsize;
265 int colsize;
266 int denominator;
267 } *lambda_trans_matrix;
268 #define LTM_MATRIX(T) ((T)->matrix)
269 #define LTM_ROWSIZE(T) ((T)->rowsize)
270 #define LTM_COLSIZE(T) ((T)->colsize)
271 #define LTM_DENOMINATOR(T) ((T)->denominator)
272
273 /* Allocate a new transformation matrix. */
274
275 static lambda_trans_matrix
276 lambda_trans_matrix_new (int colsize, int rowsize,
277 struct obstack * lambda_obstack)
278 {
279 lambda_trans_matrix ret;
280
281 ret = (lambda_trans_matrix)
282 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
283 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
284 LTM_ROWSIZE (ret) = rowsize;
285 LTM_COLSIZE (ret) = colsize;
286 LTM_DENOMINATOR (ret) = 1;
287 return ret;
288 }
289
290 /* Multiply a vector VEC by a matrix MAT.
291 MAT is an M*N matrix, and VEC is a vector with length N. The result
292 is stored in DEST which must be a vector of length M. */
293
294 static void
295 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
296 lambda_vector vec, lambda_vector dest)
297 {
298 int i, j;
299
300 lambda_vector_clear (dest, m);
301 for (i = 0; i < m; i++)
302 for (j = 0; j < n; j++)
303 dest[i] += matrix[i][j] * vec[j];
304 }
305
306 /* Return true if TRANS is a legal transformation matrix that respects
307 the dependence vectors in DISTS and DIRS. The conservative answer
308 is false.
309
310 "Wolfe proves that a unimodular transformation represented by the
311 matrix T is legal when applied to a loop nest with a set of
312 lexicographically non-negative distance vectors RDG if and only if
313 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
314 i.e.: if and only if it transforms the lexicographically positive
315 distance vectors to lexicographically positive vectors. Note that
316 a unimodular matrix must transform the zero vector (and only it) to
317 the zero vector." S.Muchnick. */
318
319 static bool
320 lambda_transform_legal_p (lambda_trans_matrix trans,
321 int nb_loops,
322 vec<ddr_p> dependence_relations)
323 {
324 unsigned int i, j;
325 lambda_vector distres;
326 struct data_dependence_relation *ddr;
327
328 gcc_assert (LTM_COLSIZE (trans) == nb_loops
329 && LTM_ROWSIZE (trans) == nb_loops);
330
331 /* When there are no dependences, the transformation is correct. */
332 if (dependence_relations.length () == 0)
333 return true;
334
335 ddr = dependence_relations[0];
336 if (ddr == NULL)
337 return true;
338
339 /* When there is an unknown relation in the dependence_relations, we
340 know that it is no worth looking at this loop nest: give up. */
341 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
342 return false;
343
344 distres = lambda_vector_new (nb_loops);
345
346 /* For each distance vector in the dependence graph. */
347 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
348 {
349 /* Don't care about relations for which we know that there is no
350 dependence, nor about read-read (aka. output-dependences):
351 these data accesses can happen in any order. */
352 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
353 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
354 continue;
355
356 /* Conservatively answer: "this transformation is not valid". */
357 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
358 return false;
359
360 /* If the dependence could not be captured by a distance vector,
361 conservatively answer that the transform is not valid. */
362 if (DDR_NUM_DIST_VECTS (ddr) == 0)
363 return false;
364
365 /* Compute trans.dist_vect */
366 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
367 {
368 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
369 DDR_DIST_VECT (ddr, j), distres);
370
371 if (!lambda_vector_lexico_pos (distres, nb_loops))
372 return false;
373 }
374 }
375 return true;
376 }
377
378 /* Data dependency analysis. Returns true if the iterations of LOOP
379 are independent on each other (that is, if we can execute them
380 in parallel). */
381
382 static bool
383 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
384 {
385 vec<loop_p> loop_nest;
386 vec<ddr_p> dependence_relations;
387 vec<data_reference_p> datarefs;
388 lambda_trans_matrix trans;
389 bool ret = false;
390
391 if (dump_file && (dump_flags & TDF_DETAILS))
392 {
393 fprintf (dump_file, "Considering loop %d\n", loop->num);
394 if (!loop->inner)
395 fprintf (dump_file, "loop is innermost\n");
396 else
397 fprintf (dump_file, "loop NOT innermost\n");
398 }
399
400 /* Check for problems with dependences. If the loop can be reversed,
401 the iterations are independent. */
402 datarefs.create (10);
403 dependence_relations.create (10 * 10);
404 loop_nest.create (3);
405 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
406 &dependence_relations))
407 {
408 if (dump_file && (dump_flags & TDF_DETAILS))
409 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
410 ret = false;
411 goto end;
412 }
413 if (dump_file && (dump_flags & TDF_DETAILS))
414 dump_data_dependence_relations (dump_file, dependence_relations);
415
416 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
417 LTM_MATRIX (trans)[0][0] = -1;
418
419 if (lambda_transform_legal_p (trans, 1, dependence_relations))
420 {
421 ret = true;
422 if (dump_file && (dump_flags & TDF_DETAILS))
423 fprintf (dump_file, " SUCCESS: may be parallelized\n");
424 }
425 else if (dump_file && (dump_flags & TDF_DETAILS))
426 fprintf (dump_file,
427 " FAILED: data dependencies exist across iterations\n");
428
429 end:
430 loop_nest.release ();
431 free_dependence_relations (dependence_relations);
432 free_data_refs (datarefs);
433
434 return ret;
435 }
436
437 /* Return true when LOOP contains basic blocks marked with the
438 BB_IRREDUCIBLE_LOOP flag. */
439
440 static inline bool
441 loop_has_blocks_with_irreducible_flag (struct loop *loop)
442 {
443 unsigned i;
444 basic_block *bbs = get_loop_body_in_dom_order (loop);
445 bool res = true;
446
447 for (i = 0; i < loop->num_nodes; i++)
448 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
449 goto end;
450
451 res = false;
452 end:
453 free (bbs);
454 return res;
455 }
456
457 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
458 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
459 to their addresses that can be reused. The address of OBJ is known to
460 be invariant in the whole function. Other needed statements are placed
461 right before GSI. */
462
463 static tree
464 take_address_of (tree obj, tree type, edge entry,
465 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
466 {
467 int uid;
468 int_tree_map **dslot;
469 struct int_tree_map ielt, *nielt;
470 tree *var_p, name, addr;
471 gimple stmt;
472 gimple_seq stmts;
473
474 /* Since the address of OBJ is invariant, the trees may be shared.
475 Avoid rewriting unrelated parts of the code. */
476 obj = unshare_expr (obj);
477 for (var_p = &obj;
478 handled_component_p (*var_p);
479 var_p = &TREE_OPERAND (*var_p, 0))
480 continue;
481
482 /* Canonicalize the access to base on a MEM_REF. */
483 if (DECL_P (*var_p))
484 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
485
486 /* Assign a canonical SSA name to the address of the base decl used
487 in the address and share it for all accesses and addresses based
488 on it. */
489 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
490 ielt.uid = uid;
491 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
492 if (!*dslot)
493 {
494 if (gsi == NULL)
495 return NULL;
496 addr = TREE_OPERAND (*var_p, 0);
497 name = make_temp_ssa_name (TREE_TYPE (addr), NULL,
498 get_name (TREE_OPERAND
499 (TREE_OPERAND (*var_p, 0), 0)));
500 stmt = gimple_build_assign (name, addr);
501 gsi_insert_on_edge_immediate (entry, stmt);
502
503 nielt = XNEW (struct int_tree_map);
504 nielt->uid = uid;
505 nielt->to = name;
506 *dslot = nielt;
507 }
508 else
509 name = (*dslot)->to;
510
511 /* Express the address in terms of the canonical SSA name. */
512 TREE_OPERAND (*var_p, 0) = name;
513 if (gsi == NULL)
514 return build_fold_addr_expr_with_type (obj, type);
515
516 name = force_gimple_operand (build_addr (obj, current_function_decl),
517 &stmts, true, NULL_TREE);
518 if (!gimple_seq_empty_p (stmts))
519 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
520
521 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
522 {
523 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
524 NULL_TREE);
525 if (!gimple_seq_empty_p (stmts))
526 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
527 }
528
529 return name;
530 }
531
532 /* Callback for htab_traverse. Create the initialization statement
533 for reduction described in SLOT, and place it at the preheader of
534 the loop described in DATA. */
535
536 int
537 initialize_reductions (reduction_info **slot, struct loop *loop)
538 {
539 tree init, c;
540 tree bvar, type, arg;
541 edge e;
542
543 struct reduction_info *const reduc = *slot;
544
545 /* Create initialization in preheader:
546 reduction_variable = initialization value of reduction. */
547
548 /* In the phi node at the header, replace the argument coming
549 from the preheader with the reduction initialization value. */
550
551 /* Create a new variable to initialize the reduction. */
552 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
553 bvar = create_tmp_var (type, "reduction");
554
555 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
556 OMP_CLAUSE_REDUCTION);
557 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
558 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
559
560 init = omp_reduction_init (c, TREE_TYPE (bvar));
561 reduc->init = init;
562
563 /* Replace the argument representing the initialization value
564 with the initialization value for the reduction (neutral
565 element for the particular operation, e.g. 0 for PLUS_EXPR,
566 1 for MULT_EXPR, etc).
567 Keep the old value in a new variable "reduction_initial",
568 that will be taken in consideration after the parallel
569 computing is done. */
570
571 e = loop_preheader_edge (loop);
572 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
573 /* Create new variable to hold the initial value. */
574
575 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
576 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
577 reduc->initial_value = arg;
578 return 1;
579 }
580
581 struct elv_data
582 {
583 struct walk_stmt_info info;
584 edge entry;
585 int_tree_htab_type decl_address;
586 gimple_stmt_iterator *gsi;
587 bool changed;
588 bool reset;
589 };
590
591 /* Eliminates references to local variables in *TP out of the single
592 entry single exit region starting at DTA->ENTRY.
593 DECL_ADDRESS contains addresses of the references that had their
594 address taken already. If the expression is changed, CHANGED is
595 set to true. Callback for walk_tree. */
596
597 static tree
598 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
599 {
600 struct elv_data *const dta = (struct elv_data *) data;
601 tree t = *tp, var, addr, addr_type, type, obj;
602
603 if (DECL_P (t))
604 {
605 *walk_subtrees = 0;
606
607 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
608 return NULL_TREE;
609
610 type = TREE_TYPE (t);
611 addr_type = build_pointer_type (type);
612 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
613 dta->gsi);
614 if (dta->gsi == NULL && addr == NULL_TREE)
615 {
616 dta->reset = true;
617 return NULL_TREE;
618 }
619
620 *tp = build_simple_mem_ref (addr);
621
622 dta->changed = true;
623 return NULL_TREE;
624 }
625
626 if (TREE_CODE (t) == ADDR_EXPR)
627 {
628 /* ADDR_EXPR may appear in two contexts:
629 -- as a gimple operand, when the address taken is a function invariant
630 -- as gimple rhs, when the resulting address in not a function
631 invariant
632 We do not need to do anything special in the latter case (the base of
633 the memory reference whose address is taken may be replaced in the
634 DECL_P case). The former case is more complicated, as we need to
635 ensure that the new address is still a gimple operand. Thus, it
636 is not sufficient to replace just the base of the memory reference --
637 we need to move the whole computation of the address out of the
638 loop. */
639 if (!is_gimple_val (t))
640 return NULL_TREE;
641
642 *walk_subtrees = 0;
643 obj = TREE_OPERAND (t, 0);
644 var = get_base_address (obj);
645 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
646 return NULL_TREE;
647
648 addr_type = TREE_TYPE (t);
649 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
650 dta->gsi);
651 if (dta->gsi == NULL && addr == NULL_TREE)
652 {
653 dta->reset = true;
654 return NULL_TREE;
655 }
656 *tp = addr;
657
658 dta->changed = true;
659 return NULL_TREE;
660 }
661
662 if (!EXPR_P (t))
663 *walk_subtrees = 0;
664
665 return NULL_TREE;
666 }
667
668 /* Moves the references to local variables in STMT at *GSI out of the single
669 entry single exit region starting at ENTRY. DECL_ADDRESS contains
670 addresses of the references that had their address taken
671 already. */
672
673 static void
674 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
675 int_tree_htab_type decl_address)
676 {
677 struct elv_data dta;
678 gimple stmt = gsi_stmt (*gsi);
679
680 memset (&dta.info, '\0', sizeof (dta.info));
681 dta.entry = entry;
682 dta.decl_address = decl_address;
683 dta.changed = false;
684 dta.reset = false;
685
686 if (gimple_debug_bind_p (stmt))
687 {
688 dta.gsi = NULL;
689 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
690 eliminate_local_variables_1, &dta.info, NULL);
691 if (dta.reset)
692 {
693 gimple_debug_bind_reset_value (stmt);
694 dta.changed = true;
695 }
696 }
697 else
698 {
699 dta.gsi = gsi;
700 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
701 }
702
703 if (dta.changed)
704 update_stmt (stmt);
705 }
706
707 /* Eliminates the references to local variables from the single entry
708 single exit region between the ENTRY and EXIT edges.
709
710 This includes:
711 1) Taking address of a local variable -- these are moved out of the
712 region (and temporary variable is created to hold the address if
713 necessary).
714
715 2) Dereferencing a local variable -- these are replaced with indirect
716 references. */
717
718 static void
719 eliminate_local_variables (edge entry, edge exit)
720 {
721 basic_block bb;
722 vec<basic_block> body;
723 body.create (3);
724 unsigned i;
725 gimple_stmt_iterator gsi;
726 bool has_debug_stmt = false;
727 int_tree_htab_type decl_address;
728 decl_address.create (10);
729 basic_block entry_bb = entry->src;
730 basic_block exit_bb = exit->dest;
731
732 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
733
734 FOR_EACH_VEC_ELT (body, i, bb)
735 if (bb != entry_bb && bb != exit_bb)
736 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
737 if (is_gimple_debug (gsi_stmt (gsi)))
738 {
739 if (gimple_debug_bind_p (gsi_stmt (gsi)))
740 has_debug_stmt = true;
741 }
742 else
743 eliminate_local_variables_stmt (entry, &gsi, decl_address);
744
745 if (has_debug_stmt)
746 FOR_EACH_VEC_ELT (body, i, bb)
747 if (bb != entry_bb && bb != exit_bb)
748 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
749 if (gimple_debug_bind_p (gsi_stmt (gsi)))
750 eliminate_local_variables_stmt (entry, &gsi, decl_address);
751
752 decl_address.dispose ();
753 body.release ();
754 }
755
756 /* Returns true if expression EXPR is not defined between ENTRY and
757 EXIT, i.e. if all its operands are defined outside of the region. */
758
759 static bool
760 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
761 {
762 basic_block entry_bb = entry->src;
763 basic_block exit_bb = exit->dest;
764 basic_block def_bb;
765
766 if (is_gimple_min_invariant (expr))
767 return true;
768
769 if (TREE_CODE (expr) == SSA_NAME)
770 {
771 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
772 if (def_bb
773 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
774 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
775 return false;
776
777 return true;
778 }
779
780 return false;
781 }
782
783 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
784 The copies are stored to NAME_COPIES, if NAME was already duplicated,
785 its duplicate stored in NAME_COPIES is returned.
786
787 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
788 duplicated, storing the copies in DECL_COPIES. */
789
790 static tree
791 separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
792 int_tree_htab_type decl_copies, bool copy_name_p)
793 {
794 tree copy, var, var_copy;
795 unsigned idx, uid, nuid;
796 struct int_tree_map ielt, *nielt;
797 struct name_to_copy_elt elt, *nelt;
798 name_to_copy_elt **slot;
799 int_tree_map **dslot;
800
801 if (TREE_CODE (name) != SSA_NAME)
802 return name;
803
804 idx = SSA_NAME_VERSION (name);
805 elt.version = idx;
806 slot = name_copies.find_slot_with_hash (&elt, idx,
807 copy_name_p ? INSERT : NO_INSERT);
808 if (slot && *slot)
809 return (*slot)->new_name;
810
811 if (copy_name_p)
812 {
813 copy = duplicate_ssa_name (name, NULL);
814 nelt = XNEW (struct name_to_copy_elt);
815 nelt->version = idx;
816 nelt->new_name = copy;
817 nelt->field = NULL_TREE;
818 *slot = nelt;
819 }
820 else
821 {
822 gcc_assert (!slot);
823 copy = name;
824 }
825
826 var = SSA_NAME_VAR (name);
827 if (!var)
828 return copy;
829
830 uid = DECL_UID (var);
831 ielt.uid = uid;
832 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
833 if (!*dslot)
834 {
835 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
836 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
837 nielt = XNEW (struct int_tree_map);
838 nielt->uid = uid;
839 nielt->to = var_copy;
840 *dslot = nielt;
841
842 /* Ensure that when we meet this decl next time, we won't duplicate
843 it again. */
844 nuid = DECL_UID (var_copy);
845 ielt.uid = nuid;
846 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
847 gcc_assert (!*dslot);
848 nielt = XNEW (struct int_tree_map);
849 nielt->uid = nuid;
850 nielt->to = var_copy;
851 *dslot = nielt;
852 }
853 else
854 var_copy = ((struct int_tree_map *) *dslot)->to;
855
856 replace_ssa_name_symbol (copy, var_copy);
857 return copy;
858 }
859
860 /* Finds the ssa names used in STMT that are defined outside the
861 region between ENTRY and EXIT and replaces such ssa names with
862 their duplicates. The duplicates are stored to NAME_COPIES. Base
863 decls of all ssa names used in STMT (including those defined in
864 LOOP) are replaced with the new temporary variables; the
865 replacement decls are stored in DECL_COPIES. */
866
867 static void
868 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
869 name_to_copy_table_type name_copies,
870 int_tree_htab_type decl_copies)
871 {
872 use_operand_p use;
873 def_operand_p def;
874 ssa_op_iter oi;
875 tree name, copy;
876 bool copy_name_p;
877
878 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
879 {
880 name = DEF_FROM_PTR (def);
881 gcc_assert (TREE_CODE (name) == SSA_NAME);
882 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
883 false);
884 gcc_assert (copy == name);
885 }
886
887 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
888 {
889 name = USE_FROM_PTR (use);
890 if (TREE_CODE (name) != SSA_NAME)
891 continue;
892
893 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
894 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
895 copy_name_p);
896 SET_USE (use, copy);
897 }
898 }
899
900 /* Finds the ssa names used in STMT that are defined outside the
901 region between ENTRY and EXIT and replaces such ssa names with
902 their duplicates. The duplicates are stored to NAME_COPIES. Base
903 decls of all ssa names used in STMT (including those defined in
904 LOOP) are replaced with the new temporary variables; the
905 replacement decls are stored in DECL_COPIES. */
906
907 static bool
908 separate_decls_in_region_debug (gimple stmt,
909 name_to_copy_table_type name_copies,
910 int_tree_htab_type decl_copies)
911 {
912 use_operand_p use;
913 ssa_op_iter oi;
914 tree var, name;
915 struct int_tree_map ielt;
916 struct name_to_copy_elt elt;
917 name_to_copy_elt **slot;
918 int_tree_map **dslot;
919
920 if (gimple_debug_bind_p (stmt))
921 var = gimple_debug_bind_get_var (stmt);
922 else if (gimple_debug_source_bind_p (stmt))
923 var = gimple_debug_source_bind_get_var (stmt);
924 else
925 return true;
926 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
927 return true;
928 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
929 ielt.uid = DECL_UID (var);
930 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
931 if (!dslot)
932 return true;
933 if (gimple_debug_bind_p (stmt))
934 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
935 else if (gimple_debug_source_bind_p (stmt))
936 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
937
938 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
939 {
940 name = USE_FROM_PTR (use);
941 if (TREE_CODE (name) != SSA_NAME)
942 continue;
943
944 elt.version = SSA_NAME_VERSION (name);
945 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
946 if (!slot)
947 {
948 gimple_debug_bind_reset_value (stmt);
949 update_stmt (stmt);
950 break;
951 }
952
953 SET_USE (use, (*slot)->new_name);
954 }
955
956 return false;
957 }
958
959 /* Callback for htab_traverse. Adds a field corresponding to the reduction
960 specified in SLOT. The type is passed in DATA. */
961
962 int
963 add_field_for_reduction (reduction_info **slot, tree type)
964 {
965
966 struct reduction_info *const red = *slot;
967 tree var = gimple_assign_lhs (red->reduc_stmt);
968 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
969 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
970
971 insert_field_into_struct (type, field);
972
973 red->field = field;
974
975 return 1;
976 }
977
978 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
979 described in SLOT. The type is passed in DATA. */
980
981 int
982 add_field_for_name (name_to_copy_elt **slot, tree type)
983 {
984 struct name_to_copy_elt *const elt = *slot;
985 tree name = ssa_name (elt->version);
986 tree field = build_decl (UNKNOWN_LOCATION,
987 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
988 TREE_TYPE (name));
989
990 insert_field_into_struct (type, field);
991 elt->field = field;
992
993 return 1;
994 }
995
996 /* Callback for htab_traverse. A local result is the intermediate result
997 computed by a single
998 thread, or the initial value in case no iteration was executed.
999 This function creates a phi node reflecting these values.
1000 The phi's result will be stored in NEW_PHI field of the
1001 reduction's data structure. */
1002
1003 int
1004 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1005 {
1006 struct reduction_info *const reduc = *slot;
1007 edge e;
1008 gimple new_phi;
1009 basic_block store_bb;
1010 tree local_res;
1011 source_location locus;
1012
1013 /* STORE_BB is the block where the phi
1014 should be stored. It is the destination of the loop exit.
1015 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1016 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1017
1018 /* STORE_BB has two predecessors. One coming from the loop
1019 (the reduction's result is computed at the loop),
1020 and another coming from a block preceding the loop,
1021 when no iterations
1022 are executed (the initial value should be taken). */
1023 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1024 e = EDGE_PRED (store_bb, 1);
1025 else
1026 e = EDGE_PRED (store_bb, 0);
1027 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1028 locus = gimple_location (reduc->reduc_stmt);
1029 new_phi = create_phi_node (local_res, store_bb);
1030 add_phi_arg (new_phi, reduc->init, e, locus);
1031 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1032 FALLTHRU_EDGE (loop->latch), locus);
1033 reduc->new_phi = new_phi;
1034
1035 return 1;
1036 }
1037
1038 struct clsn_data
1039 {
1040 tree store;
1041 tree load;
1042
1043 basic_block store_bb;
1044 basic_block load_bb;
1045 };
1046
1047 /* Callback for htab_traverse. Create an atomic instruction for the
1048 reduction described in SLOT.
1049 DATA annotates the place in memory the atomic operation relates to,
1050 and the basic block it needs to be generated in. */
1051
1052 int
1053 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1054 {
1055 struct reduction_info *const reduc = *slot;
1056 gimple_stmt_iterator gsi;
1057 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1058 tree load_struct;
1059 basic_block bb;
1060 basic_block new_bb;
1061 edge e;
1062 tree t, addr, ref, x;
1063 tree tmp_load, name;
1064 gimple load;
1065
1066 load_struct = build_simple_mem_ref (clsn_data->load);
1067 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1068
1069 addr = build_addr (t, current_function_decl);
1070
1071 /* Create phi node. */
1072 bb = clsn_data->load_bb;
1073
1074 e = split_block (bb, t);
1075 new_bb = e->dest;
1076
1077 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1078 tmp_load = make_ssa_name (tmp_load, NULL);
1079 load = gimple_build_omp_atomic_load (tmp_load, addr);
1080 SSA_NAME_DEF_STMT (tmp_load) = load;
1081 gsi = gsi_start_bb (new_bb);
1082 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1083
1084 e = split_block (new_bb, load);
1085 new_bb = e->dest;
1086 gsi = gsi_start_bb (new_bb);
1087 ref = tmp_load;
1088 x = fold_build2 (reduc->reduction_code,
1089 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1090 PHI_RESULT (reduc->new_phi));
1091
1092 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1093 GSI_CONTINUE_LINKING);
1094
1095 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1096 return 1;
1097 }
1098
1099 /* Create the atomic operation at the join point of the threads.
1100 REDUCTION_LIST describes the reductions in the LOOP.
1101 LD_ST_DATA describes the shared data structure where
1102 shared data is stored in and loaded from. */
1103 static void
1104 create_call_for_reduction (struct loop *loop,
1105 reduction_info_table_type reduction_list,
1106 struct clsn_data *ld_st_data)
1107 {
1108 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
1109 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1110 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1111 reduction_list
1112 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1113 }
1114
1115 /* Callback for htab_traverse. Loads the final reduction value at the
1116 join point of all threads, and inserts it in the right place. */
1117
1118 int
1119 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1120 {
1121 struct reduction_info *const red = *slot;
1122 gimple stmt;
1123 gimple_stmt_iterator gsi;
1124 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1125 tree load_struct;
1126 tree name;
1127 tree x;
1128
1129 gsi = gsi_after_labels (clsn_data->load_bb);
1130 load_struct = build_simple_mem_ref (clsn_data->load);
1131 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1132 NULL_TREE);
1133
1134 x = load_struct;
1135 name = PHI_RESULT (red->keep_res);
1136 stmt = gimple_build_assign (name, x);
1137 SSA_NAME_DEF_STMT (name) = stmt;
1138
1139 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1140
1141 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1142 !gsi_end_p (gsi); gsi_next (&gsi))
1143 if (gsi_stmt (gsi) == red->keep_res)
1144 {
1145 remove_phi_node (&gsi, false);
1146 return 1;
1147 }
1148 gcc_unreachable ();
1149 }
1150
1151 /* Load the reduction result that was stored in LD_ST_DATA.
1152 REDUCTION_LIST describes the list of reductions that the
1153 loads should be generated for. */
1154 static void
1155 create_final_loads_for_reduction (reduction_info_table_type reduction_list,
1156 struct clsn_data *ld_st_data)
1157 {
1158 gimple_stmt_iterator gsi;
1159 tree t;
1160 gimple stmt;
1161
1162 gsi = gsi_after_labels (ld_st_data->load_bb);
1163 t = build_fold_addr_expr (ld_st_data->store);
1164 stmt = gimple_build_assign (ld_st_data->load, t);
1165
1166 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1167 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1168
1169 reduction_list
1170 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1171
1172 }
1173
1174 /* Callback for htab_traverse. Store the neutral value for the
1175 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1176 1 for MULT_EXPR, etc. into the reduction field.
1177 The reduction is specified in SLOT. The store information is
1178 passed in DATA. */
1179
1180 int
1181 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1182 {
1183 struct reduction_info *const red = *slot;
1184 tree t;
1185 gimple stmt;
1186 gimple_stmt_iterator gsi;
1187 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1188
1189 gsi = gsi_last_bb (clsn_data->store_bb);
1190 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1191 stmt = gimple_build_assign (t, red->initial_value);
1192 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1193
1194 return 1;
1195 }
1196
1197 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1198 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1199 specified in SLOT. */
1200
1201 int
1202 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1203 struct clsn_data *clsn_data)
1204 {
1205 struct name_to_copy_elt *const elt = *slot;
1206 tree t;
1207 gimple stmt;
1208 gimple_stmt_iterator gsi;
1209 tree type = TREE_TYPE (elt->new_name);
1210 tree load_struct;
1211
1212 gsi = gsi_last_bb (clsn_data->store_bb);
1213 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1214 stmt = gimple_build_assign (t, ssa_name (elt->version));
1215 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1216
1217 gsi = gsi_last_bb (clsn_data->load_bb);
1218 load_struct = build_simple_mem_ref (clsn_data->load);
1219 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1220 stmt = gimple_build_assign (elt->new_name, t);
1221 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1222 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1223
1224 return 1;
1225 }
1226
1227 /* Moves all the variables used in LOOP and defined outside of it (including
1228 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1229 name) to a structure created for this purpose. The code
1230
1231 while (1)
1232 {
1233 use (a);
1234 use (b);
1235 }
1236
1237 is transformed this way:
1238
1239 bb0:
1240 old.a = a;
1241 old.b = b;
1242
1243 bb1:
1244 a' = new->a;
1245 b' = new->b;
1246 while (1)
1247 {
1248 use (a');
1249 use (b');
1250 }
1251
1252 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1253 pointer `new' is intentionally not initialized (the loop will be split to a
1254 separate function later, and `new' will be initialized from its arguments).
1255 LD_ST_DATA holds information about the shared data structure used to pass
1256 information among the threads. It is initialized here, and
1257 gen_parallel_loop will pass it to create_call_for_reduction that
1258 needs this information. REDUCTION_LIST describes the reductions
1259 in LOOP. */
1260
1261 static void
1262 separate_decls_in_region (edge entry, edge exit,
1263 reduction_info_table_type reduction_list,
1264 tree *arg_struct, tree *new_arg_struct,
1265 struct clsn_data *ld_st_data)
1266
1267 {
1268 basic_block bb1 = split_edge (entry);
1269 basic_block bb0 = single_pred (bb1);
1270 name_to_copy_table_type name_copies;
1271 name_copies.create (10);
1272 int_tree_htab_type decl_copies;
1273 decl_copies.create (10);
1274 unsigned i;
1275 tree type, type_name, nvar;
1276 gimple_stmt_iterator gsi;
1277 struct clsn_data clsn_data;
1278 vec<basic_block> body;
1279 body.create (3);
1280 basic_block bb;
1281 basic_block entry_bb = bb1;
1282 basic_block exit_bb = exit->dest;
1283 bool has_debug_stmt = false;
1284
1285 entry = single_succ_edge (entry_bb);
1286 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1287
1288 FOR_EACH_VEC_ELT (body, i, bb)
1289 {
1290 if (bb != entry_bb && bb != exit_bb)
1291 {
1292 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1293 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1294 name_copies, decl_copies);
1295
1296 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1297 {
1298 gimple stmt = gsi_stmt (gsi);
1299
1300 if (is_gimple_debug (stmt))
1301 has_debug_stmt = true;
1302 else
1303 separate_decls_in_region_stmt (entry, exit, stmt,
1304 name_copies, decl_copies);
1305 }
1306 }
1307 }
1308
1309 /* Now process debug bind stmts. We must not create decls while
1310 processing debug stmts, so we defer their processing so as to
1311 make sure we will have debug info for as many variables as
1312 possible (all of those that were dealt with in the loop above),
1313 and discard those for which we know there's nothing we can
1314 do. */
1315 if (has_debug_stmt)
1316 FOR_EACH_VEC_ELT (body, i, bb)
1317 if (bb != entry_bb && bb != exit_bb)
1318 {
1319 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1320 {
1321 gimple stmt = gsi_stmt (gsi);
1322
1323 if (is_gimple_debug (stmt))
1324 {
1325 if (separate_decls_in_region_debug (stmt, name_copies,
1326 decl_copies))
1327 {
1328 gsi_remove (&gsi, true);
1329 continue;
1330 }
1331 }
1332
1333 gsi_next (&gsi);
1334 }
1335 }
1336
1337 body.release ();
1338
1339 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
1340 {
1341 /* It may happen that there is nothing to copy (if there are only
1342 loop carried and external variables in the loop). */
1343 *arg_struct = NULL;
1344 *new_arg_struct = NULL;
1345 }
1346 else
1347 {
1348 /* Create the type for the structure to store the ssa names to. */
1349 type = lang_hooks.types.make_type (RECORD_TYPE);
1350 type_name = build_decl (UNKNOWN_LOCATION,
1351 TYPE_DECL, create_tmp_var_name (".paral_data"),
1352 type);
1353 TYPE_NAME (type) = type_name;
1354
1355 name_copies.traverse <tree, add_field_for_name> (type);
1356 if (reduction_list.is_created () && reduction_list.elements () > 0)
1357 {
1358 /* Create the fields for reductions. */
1359 reduction_list.traverse <tree, add_field_for_reduction> (type);
1360 }
1361 layout_type (type);
1362
1363 /* Create the loads and stores. */
1364 *arg_struct = create_tmp_var (type, ".paral_data_store");
1365 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1366 *new_arg_struct = make_ssa_name (nvar, NULL);
1367
1368 ld_st_data->store = *arg_struct;
1369 ld_st_data->load = *new_arg_struct;
1370 ld_st_data->store_bb = bb0;
1371 ld_st_data->load_bb = bb1;
1372
1373 name_copies
1374 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1375 (ld_st_data);
1376
1377 /* Load the calculation from memory (after the join of the threads). */
1378
1379 if (reduction_list.is_created () && reduction_list.elements () > 0)
1380 {
1381 reduction_list
1382 .traverse <struct clsn_data *, create_stores_for_reduction>
1383 (ld_st_data);
1384 clsn_data.load = make_ssa_name (nvar, NULL);
1385 clsn_data.load_bb = exit->dest;
1386 clsn_data.store = ld_st_data->store;
1387 create_final_loads_for_reduction (reduction_list, &clsn_data);
1388 }
1389 }
1390
1391 decl_copies.dispose ();
1392 name_copies.dispose ();
1393 }
1394
1395 /* Bitmap containing uids of functions created by parallelization. We cannot
1396 allocate it from the default obstack, as it must live across compilation
1397 of several functions; we make it gc allocated instead. */
1398
1399 static GTY(()) bitmap parallelized_functions;
1400
1401 /* Returns true if FN was created by create_loop_fn. */
1402
1403 bool
1404 parallelized_function_p (tree fn)
1405 {
1406 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1407 return false;
1408
1409 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1410 }
1411
1412 /* Creates and returns an empty function that will receive the body of
1413 a parallelized loop. */
1414
1415 static tree
1416 create_loop_fn (location_t loc)
1417 {
1418 char buf[100];
1419 char *tname;
1420 tree decl, type, name, t;
1421 struct function *act_cfun = cfun;
1422 static unsigned loopfn_num;
1423
1424 loc = LOCATION_LOCUS (loc);
1425 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1426 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1427 clean_symbol_name (tname);
1428 name = get_identifier (tname);
1429 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1430
1431 decl = build_decl (loc, FUNCTION_DECL, name, type);
1432 if (!parallelized_functions)
1433 parallelized_functions = BITMAP_GGC_ALLOC ();
1434 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1435
1436 TREE_STATIC (decl) = 1;
1437 TREE_USED (decl) = 1;
1438 DECL_ARTIFICIAL (decl) = 1;
1439 DECL_IGNORED_P (decl) = 0;
1440 TREE_PUBLIC (decl) = 0;
1441 DECL_UNINLINABLE (decl) = 1;
1442 DECL_EXTERNAL (decl) = 0;
1443 DECL_CONTEXT (decl) = NULL_TREE;
1444 DECL_INITIAL (decl) = make_node (BLOCK);
1445
1446 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1447 DECL_ARTIFICIAL (t) = 1;
1448 DECL_IGNORED_P (t) = 1;
1449 DECL_RESULT (decl) = t;
1450
1451 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1452 ptr_type_node);
1453 DECL_ARTIFICIAL (t) = 1;
1454 DECL_ARG_TYPE (t) = ptr_type_node;
1455 DECL_CONTEXT (t) = decl;
1456 TREE_USED (t) = 1;
1457 DECL_ARGUMENTS (decl) = t;
1458
1459 allocate_struct_function (decl, false);
1460
1461 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1462 it. */
1463 set_cfun (act_cfun);
1464
1465 return decl;
1466 }
1467
1468 /* Moves the exit condition of LOOP to the beginning of its header, and
1469 duplicates the part of the last iteration that gets disabled to the
1470 exit of the loop. NIT is the number of iterations of the loop
1471 (used to initialize the variables in the duplicated part).
1472
1473 TODO: the common case is that latch of the loop is empty and immediately
1474 follows the loop exit. In this case, it would be better not to copy the
1475 body of the loop, but only move the entry of the loop directly before the
1476 exit check and increase the number of iterations of the loop by one.
1477 This may need some additional preconditioning in case NIT = ~0.
1478 REDUCTION_LIST describes the reductions in LOOP. */
1479
1480 static void
1481 transform_to_exit_first_loop (struct loop *loop,
1482 reduction_info_table_type reduction_list,
1483 tree nit)
1484 {
1485 basic_block *bbs, *nbbs, ex_bb, orig_header;
1486 unsigned n;
1487 bool ok;
1488 edge exit = single_dom_exit (loop), hpred;
1489 tree control, control_name, res, t;
1490 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1491 gimple_stmt_iterator gsi;
1492 tree nit_1;
1493
1494 split_block_after_labels (loop->header);
1495 orig_header = single_succ (loop->header);
1496 hpred = single_succ_edge (loop->header);
1497
1498 cond_stmt = last_stmt (exit->src);
1499 control = gimple_cond_lhs (cond_stmt);
1500 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1501
1502 /* Make sure that we have phi nodes on exit for all loop header phis
1503 (create_parallel_loop requires that). */
1504 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1505 {
1506 phi = gsi_stmt (gsi);
1507 res = PHI_RESULT (phi);
1508 t = copy_ssa_name (res, phi);
1509 SET_PHI_RESULT (phi, t);
1510 nphi = create_phi_node (res, orig_header);
1511 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1512
1513 if (res == control)
1514 {
1515 gimple_cond_set_lhs (cond_stmt, t);
1516 update_stmt (cond_stmt);
1517 control = t;
1518 }
1519 }
1520
1521 bbs = get_loop_body_in_dom_order (loop);
1522
1523 for (n = 0; bbs[n] != exit->src; n++)
1524 continue;
1525 nbbs = XNEWVEC (basic_block, n);
1526 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1527 bbs + 1, n, nbbs);
1528 gcc_assert (ok);
1529 free (bbs);
1530 ex_bb = nbbs[0];
1531 free (nbbs);
1532
1533 /* Other than reductions, the only gimple reg that should be copied
1534 out of the loop is the control variable. */
1535 exit = single_dom_exit (loop);
1536 control_name = NULL_TREE;
1537 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1538 {
1539 phi = gsi_stmt (gsi);
1540 res = PHI_RESULT (phi);
1541 if (virtual_operand_p (res))
1542 {
1543 gsi_next (&gsi);
1544 continue;
1545 }
1546
1547 /* Check if it is a part of reduction. If it is,
1548 keep the phi at the reduction's keep_res field. The
1549 PHI_RESULT of this phi is the resulting value of the reduction
1550 variable when exiting the loop. */
1551
1552 if (reduction_list.elements () > 0)
1553 {
1554 struct reduction_info *red;
1555
1556 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1557 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1558 if (red)
1559 {
1560 red->keep_res = phi;
1561 gsi_next (&gsi);
1562 continue;
1563 }
1564 }
1565 gcc_assert (control_name == NULL_TREE
1566 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1567 control_name = res;
1568 remove_phi_node (&gsi, false);
1569 }
1570 gcc_assert (control_name != NULL_TREE);
1571
1572 /* Initialize the control variable to number of iterations
1573 according to the rhs of the exit condition. */
1574 gsi = gsi_after_labels (ex_bb);
1575 cond_nit = last_stmt (exit->src);
1576 nit_1 = gimple_cond_rhs (cond_nit);
1577 nit_1 = force_gimple_operand_gsi (&gsi,
1578 fold_convert (TREE_TYPE (control_name), nit_1),
1579 false, NULL_TREE, false, GSI_SAME_STMT);
1580 stmt = gimple_build_assign (control_name, nit_1);
1581 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1582 SSA_NAME_DEF_STMT (control_name) = stmt;
1583 }
1584
1585 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1586 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1587 NEW_DATA is the variable that should be initialized from the argument
1588 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1589 basic block containing GIMPLE_OMP_PARALLEL tree. */
1590
1591 static basic_block
1592 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1593 tree new_data, unsigned n_threads, location_t loc)
1594 {
1595 gimple_stmt_iterator gsi;
1596 basic_block bb, paral_bb, for_bb, ex_bb;
1597 tree t, param;
1598 gimple stmt, for_stmt, phi, cond_stmt;
1599 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1600 edge exit, nexit, guard, end, e;
1601
1602 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1603 bb = loop_preheader_edge (loop)->src;
1604 paral_bb = single_pred (bb);
1605 gsi = gsi_last_bb (paral_bb);
1606
1607 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1608 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1609 = build_int_cst (integer_type_node, n_threads);
1610 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1611 gimple_set_location (stmt, loc);
1612
1613 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1614
1615 /* Initialize NEW_DATA. */
1616 if (data)
1617 {
1618 gsi = gsi_after_labels (bb);
1619
1620 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1621 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1622 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1623 SSA_NAME_DEF_STMT (param) = stmt;
1624
1625 stmt = gimple_build_assign (new_data,
1626 fold_convert (TREE_TYPE (new_data), param));
1627 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1628 SSA_NAME_DEF_STMT (new_data) = stmt;
1629 }
1630
1631 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1632 bb = split_loop_exit_edge (single_dom_exit (loop));
1633 gsi = gsi_last_bb (bb);
1634 stmt = gimple_build_omp_return (false);
1635 gimple_set_location (stmt, loc);
1636 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1637
1638 /* Extract data for GIMPLE_OMP_FOR. */
1639 gcc_assert (loop->header == single_dom_exit (loop)->src);
1640 cond_stmt = last_stmt (loop->header);
1641
1642 cvar = gimple_cond_lhs (cond_stmt);
1643 cvar_base = SSA_NAME_VAR (cvar);
1644 phi = SSA_NAME_DEF_STMT (cvar);
1645 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1646 initvar = copy_ssa_name (cvar, NULL);
1647 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1648 initvar);
1649 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1650
1651 gsi = gsi_last_nondebug_bb (loop->latch);
1652 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1653 gsi_remove (&gsi, true);
1654
1655 /* Prepare cfg. */
1656 for_bb = split_edge (loop_preheader_edge (loop));
1657 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1658 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1659 gcc_assert (exit == single_dom_exit (loop));
1660
1661 guard = make_edge (for_bb, ex_bb, 0);
1662 single_succ_edge (loop->latch)->flags = 0;
1663 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1664 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1665 {
1666 source_location locus;
1667 tree def;
1668 phi = gsi_stmt (gsi);
1669 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1670
1671 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1672 locus = gimple_phi_arg_location_from_edge (stmt,
1673 loop_preheader_edge (loop));
1674 add_phi_arg (phi, def, guard, locus);
1675
1676 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1677 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1678 add_phi_arg (phi, def, end, locus);
1679 }
1680 e = redirect_edge_and_branch (exit, nexit->dest);
1681 PENDING_STMT (e) = NULL;
1682
1683 /* Emit GIMPLE_OMP_FOR. */
1684 gimple_cond_set_lhs (cond_stmt, cvar_base);
1685 type = TREE_TYPE (cvar);
1686 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1687 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1688
1689 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1690 gimple_set_location (for_stmt, loc);
1691 gimple_omp_for_set_index (for_stmt, 0, initvar);
1692 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1693 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1694 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1695 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1696 cvar_base,
1697 build_int_cst (type, 1)));
1698
1699 gsi = gsi_last_bb (for_bb);
1700 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1701 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1702
1703 /* Emit GIMPLE_OMP_CONTINUE. */
1704 gsi = gsi_last_bb (loop->latch);
1705 stmt = gimple_build_omp_continue (cvar_next, cvar);
1706 gimple_set_location (stmt, loc);
1707 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1708 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1709
1710 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1711 gsi = gsi_last_bb (ex_bb);
1712 stmt = gimple_build_omp_return (true);
1713 gimple_set_location (stmt, loc);
1714 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1715
1716 /* After the above dom info is hosed. Re-compute it. */
1717 free_dominance_info (CDI_DOMINATORS);
1718 calculate_dominance_info (CDI_DOMINATORS);
1719
1720 return paral_bb;
1721 }
1722
1723 /* Generates code to execute the iterations of LOOP in N_THREADS
1724 threads in parallel.
1725
1726 NITER describes number of iterations of LOOP.
1727 REDUCTION_LIST describes the reductions existent in the LOOP. */
1728
1729 static void
1730 gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
1731 unsigned n_threads, struct tree_niter_desc *niter)
1732 {
1733 loop_iterator li;
1734 tree many_iterations_cond, type, nit;
1735 tree arg_struct, new_arg_struct;
1736 gimple_seq stmts;
1737 basic_block parallel_head;
1738 edge entry, exit;
1739 struct clsn_data clsn_data;
1740 unsigned prob;
1741 location_t loc;
1742 gimple cond_stmt;
1743 unsigned int m_p_thread=2;
1744
1745 /* From
1746
1747 ---------------------------------------------------------------------
1748 loop
1749 {
1750 IV = phi (INIT, IV + STEP)
1751 BODY1;
1752 if (COND)
1753 break;
1754 BODY2;
1755 }
1756 ---------------------------------------------------------------------
1757
1758 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1759 we generate the following code:
1760
1761 ---------------------------------------------------------------------
1762
1763 if (MAY_BE_ZERO
1764 || NITER < MIN_PER_THREAD * N_THREADS)
1765 goto original;
1766
1767 BODY1;
1768 store all local loop-invariant variables used in body of the loop to DATA.
1769 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1770 load the variables from DATA.
1771 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1772 BODY2;
1773 BODY1;
1774 GIMPLE_OMP_CONTINUE;
1775 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1776 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1777 goto end;
1778
1779 original:
1780 loop
1781 {
1782 IV = phi (INIT, IV + STEP)
1783 BODY1;
1784 if (COND)
1785 break;
1786 BODY2;
1787 }
1788
1789 end:
1790
1791 */
1792
1793 /* Create two versions of the loop -- in the old one, we know that the
1794 number of iterations is large enough, and we will transform it into the
1795 loop that will be split to loop_fn, the new one will be used for the
1796 remaining iterations. */
1797
1798 /* We should compute a better number-of-iterations value for outer loops.
1799 That is, if we have
1800
1801 for (i = 0; i < n; ++i)
1802 for (j = 0; j < m; ++j)
1803 ...
1804
1805 we should compute nit = n * m, not nit = n.
1806 Also may_be_zero handling would need to be adjusted. */
1807
1808 type = TREE_TYPE (niter->niter);
1809 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1810 NULL_TREE);
1811 if (stmts)
1812 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1813
1814 if (loop->inner)
1815 m_p_thread=2;
1816 else
1817 m_p_thread=MIN_PER_THREAD;
1818
1819 many_iterations_cond =
1820 fold_build2 (GE_EXPR, boolean_type_node,
1821 nit, build_int_cst (type, m_p_thread * n_threads));
1822
1823 many_iterations_cond
1824 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1825 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1826 many_iterations_cond);
1827 many_iterations_cond
1828 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1829 if (stmts)
1830 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1831 if (!is_gimple_condexpr (many_iterations_cond))
1832 {
1833 many_iterations_cond
1834 = force_gimple_operand (many_iterations_cond, &stmts,
1835 true, NULL_TREE);
1836 if (stmts)
1837 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1838 }
1839
1840 initialize_original_copy_tables ();
1841
1842 /* We assume that the loop usually iterates a lot. */
1843 prob = 4 * REG_BR_PROB_BASE / 5;
1844 loop_version (loop, many_iterations_cond, NULL,
1845 prob, prob, REG_BR_PROB_BASE - prob, true);
1846 update_ssa (TODO_update_ssa);
1847 free_original_copy_tables ();
1848
1849 /* Base all the induction variables in LOOP on a single control one. */
1850 canonicalize_loop_ivs (loop, &nit, true);
1851
1852 /* Ensure that the exit condition is the first statement in the loop. */
1853 transform_to_exit_first_loop (loop, reduction_list, nit);
1854
1855 /* Generate initializations for reductions. */
1856 if (reduction_list.elements () > 0)
1857 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
1858
1859 /* Eliminate the references to local variables from the loop. */
1860 gcc_assert (single_exit (loop));
1861 entry = loop_preheader_edge (loop);
1862 exit = single_dom_exit (loop);
1863
1864 eliminate_local_variables (entry, exit);
1865 /* In the old loop, move all variables non-local to the loop to a structure
1866 and back, and create separate decls for the variables used in loop. */
1867 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1868 &new_arg_struct, &clsn_data);
1869
1870 /* Create the parallel constructs. */
1871 loc = UNKNOWN_LOCATION;
1872 cond_stmt = last_stmt (loop->header);
1873 if (cond_stmt)
1874 loc = gimple_location (cond_stmt);
1875 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1876 new_arg_struct, n_threads, loc);
1877 if (reduction_list.elements () > 0)
1878 create_call_for_reduction (loop, reduction_list, &clsn_data);
1879
1880 scev_reset ();
1881
1882 /* Cancel the loop (it is simpler to do it here rather than to teach the
1883 expander to do it). */
1884 cancel_loop_tree (loop);
1885
1886 /* Free loop bound estimations that could contain references to
1887 removed statements. */
1888 FOR_EACH_LOOP (li, loop, 0)
1889 free_numbers_of_iterations_estimates_loop (loop);
1890
1891 /* Expand the parallel constructs. We do it directly here instead of running
1892 a separate expand_omp pass, since it is more efficient, and less likely to
1893 cause troubles with further analyses not being able to deal with the
1894 OMP trees. */
1895
1896 omp_expand_local (parallel_head);
1897 }
1898
1899 /* Returns true when LOOP contains vector phi nodes. */
1900
1901 static bool
1902 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1903 {
1904 unsigned i;
1905 basic_block *bbs = get_loop_body_in_dom_order (loop);
1906 gimple_stmt_iterator gsi;
1907 bool res = true;
1908
1909 for (i = 0; i < loop->num_nodes; i++)
1910 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1911 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1912 goto end;
1913
1914 res = false;
1915 end:
1916 free (bbs);
1917 return res;
1918 }
1919
1920 /* Create a reduction_info struct, initialize it with REDUC_STMT
1921 and PHI, insert it to the REDUCTION_LIST. */
1922
1923 static void
1924 build_new_reduction (reduction_info_table_type reduction_list,
1925 gimple reduc_stmt, gimple phi)
1926 {
1927 reduction_info **slot;
1928 struct reduction_info *new_reduction;
1929
1930 gcc_assert (reduc_stmt);
1931
1932 if (dump_file && (dump_flags & TDF_DETAILS))
1933 {
1934 fprintf (dump_file,
1935 "Detected reduction. reduction stmt is: \n");
1936 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1937 fprintf (dump_file, "\n");
1938 }
1939
1940 new_reduction = XCNEW (struct reduction_info);
1941
1942 new_reduction->reduc_stmt = reduc_stmt;
1943 new_reduction->reduc_phi = phi;
1944 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1945 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1946 slot = reduction_list.find_slot (new_reduction, INSERT);
1947 *slot = new_reduction;
1948 }
1949
1950 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1951
1952 int
1953 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1954 {
1955 struct reduction_info *const red = *slot;
1956 gimple_set_uid (red->reduc_phi, red->reduc_version);
1957 return 1;
1958 }
1959
1960 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1961
1962 static void
1963 gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
1964 {
1965 gimple_stmt_iterator gsi;
1966 loop_vec_info simple_loop_info;
1967
1968 simple_loop_info = vect_analyze_loop_form (loop);
1969
1970 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1971 {
1972 gimple phi = gsi_stmt (gsi);
1973 affine_iv iv;
1974 tree res = PHI_RESULT (phi);
1975 bool double_reduc;
1976
1977 if (virtual_operand_p (res))
1978 continue;
1979
1980 if (!simple_iv (loop, loop, res, &iv, true)
1981 && simple_loop_info)
1982 {
1983 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1984 phi, true,
1985 &double_reduc);
1986 if (reduc_stmt && !double_reduc)
1987 build_new_reduction (reduction_list, reduc_stmt, phi);
1988 }
1989 }
1990 destroy_loop_vec_info (simple_loop_info, true);
1991
1992 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1993 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1994 only now. */
1995 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
1996 }
1997
1998 /* Try to initialize NITER for code generation part. */
1999
2000 static bool
2001 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2002 {
2003 edge exit = single_dom_exit (loop);
2004
2005 gcc_assert (exit);
2006
2007 /* We need to know # of iterations, and there should be no uses of values
2008 defined inside loop outside of it, unless the values are invariants of
2009 the loop. */
2010 if (!number_of_iterations_exit (loop, exit, niter, false))
2011 {
2012 if (dump_file && (dump_flags & TDF_DETAILS))
2013 fprintf (dump_file, " FAILED: number of iterations not known\n");
2014 return false;
2015 }
2016
2017 return true;
2018 }
2019
2020 /* Try to initialize REDUCTION_LIST for code generation part.
2021 REDUCTION_LIST describes the reductions. */
2022
2023 static bool
2024 try_create_reduction_list (loop_p loop,
2025 reduction_info_table_type reduction_list)
2026 {
2027 edge exit = single_dom_exit (loop);
2028 gimple_stmt_iterator gsi;
2029
2030 gcc_assert (exit);
2031
2032 gather_scalar_reductions (loop, reduction_list);
2033
2034
2035 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2036 {
2037 gimple phi = gsi_stmt (gsi);
2038 struct reduction_info *red;
2039 imm_use_iterator imm_iter;
2040 use_operand_p use_p;
2041 gimple reduc_phi;
2042 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2043
2044 if (!virtual_operand_p (val))
2045 {
2046 if (dump_file && (dump_flags & TDF_DETAILS))
2047 {
2048 fprintf (dump_file, "phi is ");
2049 print_gimple_stmt (dump_file, phi, 0, 0);
2050 fprintf (dump_file, "arg of phi to exit: value ");
2051 print_generic_expr (dump_file, val, 0);
2052 fprintf (dump_file, " used outside loop\n");
2053 fprintf (dump_file,
2054 " checking if it a part of reduction pattern: \n");
2055 }
2056 if (reduction_list.elements () == 0)
2057 {
2058 if (dump_file && (dump_flags & TDF_DETAILS))
2059 fprintf (dump_file,
2060 " FAILED: it is not a part of reduction.\n");
2061 return false;
2062 }
2063 reduc_phi = NULL;
2064 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2065 {
2066 if (!gimple_debug_bind_p (USE_STMT (use_p))
2067 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2068 {
2069 reduc_phi = USE_STMT (use_p);
2070 break;
2071 }
2072 }
2073 red = reduction_phi (reduction_list, reduc_phi);
2074 if (red == NULL)
2075 {
2076 if (dump_file && (dump_flags & TDF_DETAILS))
2077 fprintf (dump_file,
2078 " FAILED: it is not a part of reduction.\n");
2079 return false;
2080 }
2081 if (dump_file && (dump_flags & TDF_DETAILS))
2082 {
2083 fprintf (dump_file, "reduction phi is ");
2084 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2085 fprintf (dump_file, "reduction stmt is ");
2086 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2087 }
2088 }
2089 }
2090
2091 /* The iterations of the loop may communicate only through bivs whose
2092 iteration space can be distributed efficiently. */
2093 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2094 {
2095 gimple phi = gsi_stmt (gsi);
2096 tree def = PHI_RESULT (phi);
2097 affine_iv iv;
2098
2099 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2100 {
2101 struct reduction_info *red;
2102
2103 red = reduction_phi (reduction_list, phi);
2104 if (red == NULL)
2105 {
2106 if (dump_file && (dump_flags & TDF_DETAILS))
2107 fprintf (dump_file,
2108 " FAILED: scalar dependency between iterations\n");
2109 return false;
2110 }
2111 }
2112 }
2113
2114
2115 return true;
2116 }
2117
2118 /* Detect parallel loops and generate parallel code using libgomp
2119 primitives. Returns true if some loop was parallelized, false
2120 otherwise. */
2121
2122 bool
2123 parallelize_loops (void)
2124 {
2125 unsigned n_threads = flag_tree_parallelize_loops;
2126 bool changed = false;
2127 struct loop *loop;
2128 struct tree_niter_desc niter_desc;
2129 loop_iterator li;
2130 reduction_info_table_type reduction_list;
2131 struct obstack parloop_obstack;
2132 HOST_WIDE_INT estimated;
2133 LOC loop_loc;
2134
2135 /* Do not parallelize loops in the functions created by parallelization. */
2136 if (parallelized_function_p (cfun->decl))
2137 return false;
2138 if (cfun->has_nonlocal_label)
2139 return false;
2140
2141 gcc_obstack_init (&parloop_obstack);
2142 reduction_list.create (10);
2143 init_stmt_vec_info_vec ();
2144
2145 FOR_EACH_LOOP (li, loop, 0)
2146 {
2147 reduction_list.empty ();
2148 if (dump_file && (dump_flags & TDF_DETAILS))
2149 {
2150 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2151 if (loop->inner)
2152 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2153 else
2154 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2155 }
2156
2157 /* If we use autopar in graphite pass, we use its marked dependency
2158 checking results. */
2159 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2160 {
2161 if (dump_file && (dump_flags & TDF_DETAILS))
2162 fprintf (dump_file, "loop is not parallel according to graphite\n");
2163 continue;
2164 }
2165
2166 if (!single_dom_exit (loop))
2167 {
2168
2169 if (dump_file && (dump_flags & TDF_DETAILS))
2170 fprintf (dump_file, "loop is !single_dom_exit\n");
2171
2172 continue;
2173 }
2174
2175 if (/* And of course, the loop must be parallelizable. */
2176 !can_duplicate_loop_p (loop)
2177 || loop_has_blocks_with_irreducible_flag (loop)
2178 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2179 /* FIXME: the check for vector phi nodes could be removed. */
2180 || loop_has_vector_phi_nodes (loop))
2181 continue;
2182
2183 estimated = estimated_stmt_executions_int (loop);
2184 if (estimated == -1)
2185 estimated = max_stmt_executions_int (loop);
2186 /* FIXME: Bypass this check as graphite doesn't update the
2187 count and frequency correctly now. */
2188 if (!flag_loop_parallelize_all
2189 && ((estimated != -1
2190 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2191 /* Do not bother with loops in cold areas. */
2192 || optimize_loop_nest_for_size_p (loop)))
2193 continue;
2194
2195 if (!try_get_loop_niter (loop, &niter_desc))
2196 continue;
2197
2198 if (!try_create_reduction_list (loop, reduction_list))
2199 continue;
2200
2201 if (!flag_loop_parallelize_all
2202 && !loop_parallel_p (loop, &parloop_obstack))
2203 continue;
2204
2205 changed = true;
2206 if (dump_file && (dump_flags & TDF_DETAILS))
2207 {
2208 if (loop->inner)
2209 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2210 else
2211 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2212 loop_loc = find_loop_location (loop);
2213 if (loop_loc != UNKNOWN_LOC)
2214 fprintf (dump_file, "\nloop at %s:%d: ",
2215 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2216 }
2217 gen_parallel_loop (loop, reduction_list,
2218 n_threads, &niter_desc);
2219 }
2220
2221 free_stmt_vec_info_vec ();
2222 reduction_list.dispose ();
2223 obstack_free (&parloop_obstack, NULL);
2224
2225 /* Parallelization will cause new function calls to be inserted through
2226 which local variables will escape. Reset the points-to solution
2227 for ESCAPED. */
2228 if (changed)
2229 pt_solution_reset (&cfun->gimple_df->escaped);
2230
2231 return changed;
2232 }
2233
2234 #include "gt-tree-parloops.h"
This page took 0.135422 seconds and 5 git commands to generate.