]>
Commit | Line | Data |
---|---|---|
20f06221 | 1 | /* Analysis Utilities for Loop Vectorization. |
ebb07520 | 2 | Copyright (C) 2006, 2007 Free Software Foundation, Inc. |
20f06221 DN |
3 | Contributed by Dorit Nuzman <dorit@il.ibm.com> |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9dcd6f09 | 9 | Software Foundation; either version 3, or (at your option) any later |
20f06221 DN |
10 | version. |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ | |
20f06221 DN |
20 | |
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tm.h" | |
25 | #include "ggc.h" | |
26 | #include "tree.h" | |
27 | ||
28 | #include "target.h" | |
29 | #include "basic-block.h" | |
30 | #include "diagnostic.h" | |
31 | #include "tree-flow.h" | |
32 | #include "tree-dump.h" | |
33 | #include "timevar.h" | |
34 | #include "cfgloop.h" | |
35 | #include "expr.h" | |
36 | #include "optabs.h" | |
37 | #include "params.h" | |
38 | #include "tree-data-ref.h" | |
39 | #include "tree-vectorizer.h" | |
40 | #include "recog.h" | |
41 | #include "toplev.h" | |
42 | ||
c0220ea4 | 43 | /* Function prototypes */ |
20f06221 DN |
44 | static void vect_pattern_recog_1 |
45 | (tree (* ) (tree, tree *, tree *), block_stmt_iterator); | |
46 | static bool widened_name_p (tree, tree, tree *, tree *); | |
47 | ||
48 | /* Pattern recognition functions */ | |
49 | static tree vect_recog_widen_sum_pattern (tree, tree *, tree *); | |
50 | static tree vect_recog_widen_mult_pattern (tree, tree *, tree *); | |
51 | static tree vect_recog_dot_prod_pattern (tree, tree *, tree *); | |
0b2229b0 | 52 | static tree vect_recog_pow_pattern (tree, tree *, tree *); |
20f06221 DN |
53 | static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { |
54 | vect_recog_widen_mult_pattern, | |
55 | vect_recog_widen_sum_pattern, | |
0b2229b0 RG |
56 | vect_recog_dot_prod_pattern, |
57 | vect_recog_pow_pattern}; | |
20f06221 DN |
58 | |
59 | ||
60 | /* Function widened_name_p | |
61 | ||
62 | Check whether NAME, an ssa-name used in USE_STMT, | |
63 | is a result of a type-promotion, such that: | |
64 | DEF_STMT: NAME = NOP (name0) | |
65 | where the type of name0 (HALF_TYPE) is smaller than the type of NAME. | |
66 | */ | |
67 | ||
68 | static bool | |
69 | widened_name_p (tree name, tree use_stmt, tree *half_type, tree *def_stmt) | |
70 | { | |
71 | tree dummy; | |
72 | loop_vec_info loop_vinfo; | |
73 | stmt_vec_info stmt_vinfo; | |
74 | tree expr; | |
75 | tree type = TREE_TYPE (name); | |
76 | tree oprnd0; | |
77 | enum vect_def_type dt; | |
78 | tree def; | |
79 | ||
80 | stmt_vinfo = vinfo_for_stmt (use_stmt); | |
81 | loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | |
82 | ||
83 | if (!vect_is_simple_use (name, loop_vinfo, def_stmt, &def, &dt)) | |
84 | return false; | |
85 | ||
86 | if (dt != vect_loop_def | |
87 | && dt != vect_invariant_def && dt != vect_constant_def) | |
88 | return false; | |
89 | ||
90 | if (! *def_stmt) | |
91 | return false; | |
92 | ||
07beea0d | 93 | if (TREE_CODE (*def_stmt) != GIMPLE_MODIFY_STMT) |
20f06221 DN |
94 | return false; |
95 | ||
07beea0d | 96 | expr = GIMPLE_STMT_OPERAND (*def_stmt, 1); |
20f06221 DN |
97 | if (TREE_CODE (expr) != NOP_EXPR) |
98 | return false; | |
99 | ||
100 | oprnd0 = TREE_OPERAND (expr, 0); | |
101 | ||
102 | *half_type = TREE_TYPE (oprnd0); | |
103 | if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) | |
104 | || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) | |
105 | || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) | |
106 | return false; | |
107 | ||
108 | if (!vect_is_simple_use (oprnd0, loop_vinfo, &dummy, &dummy, &dt)) | |
109 | return false; | |
110 | ||
20f06221 DN |
111 | return true; |
112 | } | |
113 | ||
114 | ||
115 | /* Function vect_recog_dot_prod_pattern | |
116 | ||
117 | Try to find the following pattern: | |
118 | ||
119 | type x_t, y_t; | |
120 | TYPE1 prod; | |
121 | TYPE2 sum = init; | |
122 | loop: | |
123 | sum_0 = phi <init, sum_1> | |
124 | S1 x_t = ... | |
125 | S2 y_t = ... | |
126 | S3 x_T = (TYPE1) x_t; | |
127 | S4 y_T = (TYPE1) y_t; | |
128 | S5 prod = x_T * y_T; | |
129 | [S6 prod = (TYPE2) prod; #optional] | |
130 | S7 sum_1 = prod + sum_0; | |
131 | ||
132 | where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the | |
c0220ea4 | 133 | same size of 'TYPE1' or bigger. This is a special case of a reduction |
20f06221 DN |
134 | computation. |
135 | ||
136 | Input: | |
137 | ||
138 | * LAST_STMT: A stmt from which the pattern search begins. In the example, | |
139 | when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be | |
140 | detected. | |
141 | ||
142 | Output: | |
143 | ||
144 | * TYPE_IN: The type of the input arguments to the pattern. | |
145 | ||
146 | * TYPE_OUT: The type of the output of this pattern. | |
147 | ||
148 | * Return value: A new stmt that will be used to replace the sequence of | |
149 | stmts that constitute the pattern. In this case it will be: | |
150 | WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> | |
d29de1bf DN |
151 | |
152 | Note: The dot-prod idiom is a widening reduction pattern that is | |
153 | vectorized without preserving all the intermediate results. It | |
154 | produces only N/2 (widened) results (by summing up pairs of | |
155 | intermediate results) rather than all N results. Therefore, we | |
156 | cannot allow this pattern when we want to get all the results and in | |
157 | the correct order (as is the case when this computation is in an | |
158 | inner-loop nested in an outer-loop that us being vectorized). */ | |
20f06221 DN |
159 | |
160 | static tree | |
161 | vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out) | |
162 | { | |
163 | tree stmt, expr; | |
164 | tree oprnd0, oprnd1; | |
165 | tree oprnd00, oprnd01; | |
166 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); | |
167 | tree type, half_type; | |
168 | tree pattern_expr; | |
169 | tree prod_type; | |
d29de1bf DN |
170 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); |
171 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | |
20f06221 | 172 | |
07beea0d | 173 | if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT) |
20f06221 DN |
174 | return NULL; |
175 | ||
07beea0d | 176 | expr = GIMPLE_STMT_OPERAND (last_stmt, 1); |
20f06221 DN |
177 | type = TREE_TYPE (expr); |
178 | ||
179 | /* Look for the following pattern | |
180 | DX = (TYPE1) X; | |
181 | DY = (TYPE1) Y; | |
182 | DPROD = DX * DY; | |
183 | DDPROD = (TYPE2) DPROD; | |
184 | sum_1 = DDPROD + sum_0; | |
185 | In which | |
186 | - DX is double the size of X | |
187 | - DY is double the size of Y | |
188 | - DX, DY, DPROD all have the same type | |
189 | - sum is the same size of DPROD or bigger | |
190 | - sum has been recognized as a reduction variable. | |
191 | ||
192 | This is equivalent to: | |
193 | DPROD = X w* Y; #widen mult | |
194 | sum_1 = DPROD w+ sum_0; #widen summation | |
195 | or | |
196 | DPROD = X w* Y; #widen mult | |
197 | sum_1 = DPROD + sum_0; #summation | |
198 | */ | |
199 | ||
200 | /* Starting from LAST_STMT, follow the defs of its uses in search | |
201 | of the above pattern. */ | |
202 | ||
203 | if (TREE_CODE (expr) != PLUS_EXPR) | |
204 | return NULL; | |
205 | ||
206 | if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) | |
207 | { | |
208 | /* Has been detected as widening-summation? */ | |
209 | ||
210 | stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); | |
ce133c3f | 211 | expr = GIMPLE_STMT_OPERAND (stmt, 1); |
20f06221 DN |
212 | type = TREE_TYPE (expr); |
213 | if (TREE_CODE (expr) != WIDEN_SUM_EXPR) | |
214 | return NULL; | |
215 | oprnd0 = TREE_OPERAND (expr, 0); | |
216 | oprnd1 = TREE_OPERAND (expr, 1); | |
217 | half_type = TREE_TYPE (oprnd0); | |
218 | } | |
219 | else | |
220 | { | |
221 | tree def_stmt; | |
222 | ||
223 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) | |
224 | return NULL; | |
225 | oprnd0 = TREE_OPERAND (expr, 0); | |
226 | oprnd1 = TREE_OPERAND (expr, 1); | |
227 | if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) != TYPE_MAIN_VARIANT (type) | |
228 | || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) != TYPE_MAIN_VARIANT (type)) | |
229 | return NULL; | |
230 | stmt = last_stmt; | |
231 | ||
232 | if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) | |
233 | { | |
234 | stmt = def_stmt; | |
07beea0d | 235 | expr = GIMPLE_STMT_OPERAND (stmt, 1); |
20f06221 DN |
236 | oprnd0 = TREE_OPERAND (expr, 0); |
237 | } | |
238 | else | |
239 | half_type = type; | |
240 | } | |
241 | ||
242 | /* So far so good. Since last_stmt was detected as a (summation) reduction, | |
243 | we know that oprnd1 is the reduction variable (defined by a loop-header | |
244 | phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. | |
245 | Left to check that oprnd0 is defined by a (widen_)mult_expr */ | |
246 | ||
247 | prod_type = half_type; | |
248 | stmt = SSA_NAME_DEF_STMT (oprnd0); | |
8665227f RG |
249 | /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi |
250 | inside the loop (in case we are analyzing an outer-loop). */ | |
251 | if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) | |
252 | return NULL; | |
20f06221 DN |
253 | stmt_vinfo = vinfo_for_stmt (stmt); |
254 | gcc_assert (stmt_vinfo); | |
b3130586 DN |
255 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_loop_def) |
256 | return NULL; | |
07beea0d | 257 | expr = GIMPLE_STMT_OPERAND (stmt, 1); |
20f06221 DN |
258 | if (TREE_CODE (expr) != MULT_EXPR) |
259 | return NULL; | |
260 | if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) | |
261 | { | |
262 | /* Has been detected as a widening multiplication? */ | |
263 | ||
264 | stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); | |
07beea0d | 265 | expr = GIMPLE_STMT_OPERAND (stmt, 1); |
20f06221 DN |
266 | if (TREE_CODE (expr) != WIDEN_MULT_EXPR) |
267 | return NULL; | |
268 | stmt_vinfo = vinfo_for_stmt (stmt); | |
269 | gcc_assert (stmt_vinfo); | |
270 | gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_loop_def); | |
271 | oprnd00 = TREE_OPERAND (expr, 0); | |
272 | oprnd01 = TREE_OPERAND (expr, 1); | |
273 | } | |
274 | else | |
275 | { | |
276 | tree half_type0, half_type1; | |
277 | tree def_stmt; | |
278 | tree oprnd0, oprnd1; | |
279 | ||
280 | oprnd0 = TREE_OPERAND (expr, 0); | |
281 | oprnd1 = TREE_OPERAND (expr, 1); | |
282 | if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) | |
283 | != TYPE_MAIN_VARIANT (prod_type) | |
284 | || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) | |
285 | != TYPE_MAIN_VARIANT (prod_type)) | |
286 | return NULL; | |
287 | if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) | |
288 | return NULL; | |
07beea0d | 289 | oprnd00 = TREE_OPERAND (GIMPLE_STMT_OPERAND (def_stmt, 1), 0); |
20f06221 DN |
290 | if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) |
291 | return NULL; | |
07beea0d | 292 | oprnd01 = TREE_OPERAND (GIMPLE_STMT_OPERAND (def_stmt, 1), 0); |
20f06221 DN |
293 | if (TYPE_MAIN_VARIANT (half_type0) != TYPE_MAIN_VARIANT (half_type1)) |
294 | return NULL; | |
295 | if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) | |
296 | return NULL; | |
297 | } | |
298 | ||
299 | half_type = TREE_TYPE (oprnd00); | |
300 | *type_in = half_type; | |
301 | *type_out = type; | |
302 | ||
303 | /* Pattern detected. Create a stmt to be used to replace the pattern: */ | |
304 | pattern_expr = build3 (DOT_PROD_EXPR, type, oprnd00, oprnd01, oprnd1); | |
305 | if (vect_print_dump_info (REPORT_DETAILS)) | |
306 | { | |
307 | fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: "); | |
308 | print_generic_expr (vect_dump, pattern_expr, TDF_SLIM); | |
309 | } | |
d29de1bf DN |
310 | |
311 | /* We don't allow changing the order of the computation in the inner-loop | |
312 | when doing outer-loop vectorization. */ | |
313 | if (nested_in_vect_loop_p (loop, last_stmt)) | |
314 | { | |
315 | if (vect_print_dump_info (REPORT_DETAILS)) | |
316 | fprintf (vect_dump, "vect_recog_dot_prod_pattern: not allowed."); | |
317 | return NULL; | |
318 | } | |
319 | ||
20f06221 DN |
320 | return pattern_expr; |
321 | } | |
322 | ||
323 | ||
324 | /* Function vect_recog_widen_mult_pattern | |
325 | ||
326 | Try to find the following pattern: | |
327 | ||
328 | type a_t, b_t; | |
329 | TYPE a_T, b_T, prod_T; | |
330 | ||
331 | S1 a_t = ; | |
332 | S2 b_t = ; | |
333 | S3 a_T = (TYPE) a_t; | |
334 | S4 b_T = (TYPE) b_t; | |
335 | S5 prod_T = a_T * b_T; | |
336 | ||
337 | where type 'TYPE' is at least double the size of type 'type'. | |
338 | ||
339 | Input: | |
340 | ||
341 | * LAST_STMT: A stmt from which the pattern search begins. In the example, | |
342 | when this function is called with S5, the pattern {S3,S4,S5} is be detected. | |
343 | ||
344 | Output: | |
345 | ||
346 | * TYPE_IN: The type of the input arguments to the pattern. | |
347 | ||
348 | * TYPE_OUT: The type of the output of this pattern. | |
349 | ||
350 | * Return value: A new stmt that will be used to replace the sequence of | |
351 | stmts that constitute the pattern. In this case it will be: | |
352 | WIDEN_MULT <a_t, b_t> | |
353 | */ | |
354 | ||
355 | static tree | |
89d67cca DN |
356 | vect_recog_widen_mult_pattern (tree last_stmt, |
357 | tree *type_in, | |
358 | tree *type_out) | |
20f06221 | 359 | { |
89d67cca DN |
360 | tree expr; |
361 | tree def_stmt0, def_stmt1; | |
362 | tree oprnd0, oprnd1; | |
363 | tree type, half_type0, half_type1; | |
364 | tree pattern_expr; | |
365 | tree vectype; | |
366 | tree dummy; | |
367 | enum tree_code dummy_code; | |
368 | ||
07beea0d | 369 | if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT) |
89d67cca DN |
370 | return NULL; |
371 | ||
07beea0d | 372 | expr = GIMPLE_STMT_OPERAND (last_stmt, 1); |
89d67cca DN |
373 | type = TREE_TYPE (expr); |
374 | ||
375 | /* Starting from LAST_STMT, follow the defs of its uses in search | |
376 | of the above pattern. */ | |
377 | ||
378 | if (TREE_CODE (expr) != MULT_EXPR) | |
379 | return NULL; | |
380 | ||
381 | oprnd0 = TREE_OPERAND (expr, 0); | |
382 | oprnd1 = TREE_OPERAND (expr, 1); | |
383 | if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) != TYPE_MAIN_VARIANT (type) | |
384 | || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) != TYPE_MAIN_VARIANT (type)) | |
385 | return NULL; | |
386 | ||
387 | /* Check argument 0 */ | |
388 | if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) | |
389 | return NULL; | |
07beea0d | 390 | oprnd0 = TREE_OPERAND (GIMPLE_STMT_OPERAND (def_stmt0, 1), 0); |
89d67cca DN |
391 | |
392 | /* Check argument 1 */ | |
393 | if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) | |
394 | return NULL; | |
07beea0d | 395 | oprnd1 = TREE_OPERAND (GIMPLE_STMT_OPERAND (def_stmt1, 1), 0); |
89d67cca DN |
396 | |
397 | if (TYPE_MAIN_VARIANT (half_type0) != TYPE_MAIN_VARIANT (half_type1)) | |
398 | return NULL; | |
399 | ||
400 | /* Pattern detected. */ | |
401 | if (vect_print_dump_info (REPORT_DETAILS)) | |
402 | fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: "); | |
403 | ||
404 | /* Check target support */ | |
405 | vectype = get_vectype_for_scalar_type (half_type0); | |
03d3e953 IR |
406 | if (!vectype |
407 | || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, vectype, | |
89d67cca DN |
408 | &dummy, &dummy, &dummy_code, |
409 | &dummy_code)) | |
410 | return NULL; | |
411 | ||
412 | *type_in = vectype; | |
413 | *type_out = NULL_TREE; | |
414 | ||
415 | /* Pattern supported. Create a stmt to be used to replace the pattern: */ | |
416 | pattern_expr = build2 (WIDEN_MULT_EXPR, type, oprnd0, oprnd1); | |
417 | if (vect_print_dump_info (REPORT_DETAILS)) | |
418 | print_generic_expr (vect_dump, pattern_expr, TDF_SLIM); | |
419 | return pattern_expr; | |
20f06221 DN |
420 | } |
421 | ||
422 | ||
0b2229b0 RG |
423 | /* Function vect_recog_pow_pattern |
424 | ||
425 | Try to find the following pattern: | |
426 | ||
427 | x = POW (y, N); | |
428 | ||
429 | with POW being one of pow, powf, powi, powif and N being | |
430 | either 2 or 0.5. | |
431 | ||
432 | Input: | |
433 | ||
434 | * LAST_STMT: A stmt from which the pattern search begins. | |
435 | ||
436 | Output: | |
437 | ||
438 | * TYPE_IN: The type of the input arguments to the pattern. | |
439 | ||
440 | * TYPE_OUT: The type of the output of this pattern. | |
441 | ||
442 | * Return value: A new stmt that will be used to replace the sequence of | |
443 | stmts that constitute the pattern. In this case it will be: | |
444 | x * x | |
445 | or | |
446 | sqrt (x) | |
447 | */ | |
448 | ||
449 | static tree | |
450 | vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out) | |
451 | { | |
452 | tree expr; | |
453 | tree type; | |
5039610b | 454 | tree fn, base, exp; |
0b2229b0 | 455 | |
07beea0d | 456 | if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT) |
0b2229b0 RG |
457 | return NULL; |
458 | ||
07beea0d | 459 | expr = GIMPLE_STMT_OPERAND (last_stmt, 1); |
0b2229b0 RG |
460 | type = TREE_TYPE (expr); |
461 | ||
462 | if (TREE_CODE (expr) != CALL_EXPR) | |
463 | return NULL_TREE; | |
464 | ||
465 | fn = get_callee_fndecl (expr); | |
0b2229b0 RG |
466 | switch (DECL_FUNCTION_CODE (fn)) |
467 | { | |
468 | case BUILT_IN_POWIF: | |
469 | case BUILT_IN_POWI: | |
470 | case BUILT_IN_POWF: | |
471 | case BUILT_IN_POW: | |
5039610b SL |
472 | base = CALL_EXPR_ARG (expr, 0); |
473 | exp = CALL_EXPR_ARG (expr, 1); | |
0b2229b0 RG |
474 | if (TREE_CODE (exp) != REAL_CST |
475 | && TREE_CODE (exp) != INTEGER_CST) | |
476 | return NULL_TREE; | |
477 | break; | |
478 | ||
479 | default:; | |
480 | return NULL_TREE; | |
481 | } | |
482 | ||
483 | /* We now have a pow or powi builtin function call with a constant | |
484 | exponent. */ | |
485 | ||
0b2229b0 RG |
486 | *type_out = NULL_TREE; |
487 | ||
488 | /* Catch squaring. */ | |
489 | if ((host_integerp (exp, 0) | |
490 | && tree_low_cst (exp, 0) == 2) | |
491 | || (TREE_CODE (exp) == REAL_CST | |
492 | && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2))) | |
c6b1b49b RG |
493 | { |
494 | *type_in = TREE_TYPE (base); | |
495 | return build2 (MULT_EXPR, TREE_TYPE (base), base, base); | |
496 | } | |
0b2229b0 RG |
497 | |
498 | /* Catch square root. */ | |
499 | if (TREE_CODE (exp) == REAL_CST | |
500 | && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf)) | |
501 | { | |
502 | tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT); | |
c6b1b49b RG |
503 | *type_in = get_vectype_for_scalar_type (TREE_TYPE (base)); |
504 | if (*type_in) | |
505 | { | |
5039610b | 506 | newfn = build_call_expr (newfn, 1, base); |
b95becfc | 507 | if (vectorizable_function (newfn, *type_in, *type_in) != NULL_TREE) |
c6b1b49b RG |
508 | return newfn; |
509 | } | |
0b2229b0 RG |
510 | } |
511 | ||
512 | return NULL_TREE; | |
513 | } | |
514 | ||
515 | ||
20f06221 DN |
516 | /* Function vect_recog_widen_sum_pattern |
517 | ||
518 | Try to find the following pattern: | |
519 | ||
520 | type x_t; | |
521 | TYPE x_T, sum = init; | |
522 | loop: | |
523 | sum_0 = phi <init, sum_1> | |
524 | S1 x_t = *p; | |
525 | S2 x_T = (TYPE) x_t; | |
526 | S3 sum_1 = x_T + sum_0; | |
527 | ||
528 | where type 'TYPE' is at least double the size of type 'type', i.e - we're | |
529 | summing elements of type 'type' into an accumulator of type 'TYPE'. This is | |
917f1b7e | 530 | a special case of a reduction computation. |
20f06221 DN |
531 | |
532 | Input: | |
533 | ||
534 | * LAST_STMT: A stmt from which the pattern search begins. In the example, | |
535 | when this function is called with S3, the pattern {S2,S3} will be detected. | |
536 | ||
537 | Output: | |
538 | ||
539 | * TYPE_IN: The type of the input arguments to the pattern. | |
540 | ||
541 | * TYPE_OUT: The type of the output of this pattern. | |
542 | ||
543 | * Return value: A new stmt that will be used to replace the sequence of | |
544 | stmts that constitute the pattern. In this case it will be: | |
545 | WIDEN_SUM <x_t, sum_0> | |
d29de1bf | 546 | |
84fbffb2 | 547 | Note: The widening-sum idiom is a widening reduction pattern that is |
d29de1bf DN |
548 | vectorized without preserving all the intermediate results. It |
549 | produces only N/2 (widened) results (by summing up pairs of | |
550 | intermediate results) rather than all N results. Therefore, we | |
551 | cannot allow this pattern when we want to get all the results and in | |
552 | the correct order (as is the case when this computation is in an | |
553 | inner-loop nested in an outer-loop that us being vectorized). */ | |
20f06221 DN |
554 | |
555 | static tree | |
556 | vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out) | |
557 | { | |
558 | tree stmt, expr; | |
559 | tree oprnd0, oprnd1; | |
560 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); | |
561 | tree type, half_type; | |
562 | tree pattern_expr; | |
d29de1bf DN |
563 | loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); |
564 | struct loop *loop = LOOP_VINFO_LOOP (loop_info); | |
20f06221 | 565 | |
07beea0d | 566 | if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT) |
20f06221 DN |
567 | return NULL; |
568 | ||
07beea0d | 569 | expr = GIMPLE_STMT_OPERAND (last_stmt, 1); |
20f06221 DN |
570 | type = TREE_TYPE (expr); |
571 | ||
572 | /* Look for the following pattern | |
573 | DX = (TYPE) X; | |
574 | sum_1 = DX + sum_0; | |
575 | In which DX is at least double the size of X, and sum_1 has been | |
576 | recognized as a reduction variable. | |
577 | */ | |
578 | ||
579 | /* Starting from LAST_STMT, follow the defs of its uses in search | |
580 | of the above pattern. */ | |
581 | ||
582 | if (TREE_CODE (expr) != PLUS_EXPR) | |
583 | return NULL; | |
584 | ||
585 | if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) | |
586 | return NULL; | |
587 | ||
588 | oprnd0 = TREE_OPERAND (expr, 0); | |
589 | oprnd1 = TREE_OPERAND (expr, 1); | |
590 | if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) != TYPE_MAIN_VARIANT (type) | |
591 | || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) != TYPE_MAIN_VARIANT (type)) | |
592 | return NULL; | |
593 | ||
594 | /* So far so good. Since last_stmt was detected as a (summation) reduction, | |
595 | we know that oprnd1 is the reduction variable (defined by a loop-header | |
596 | phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. | |
597 | Left to check that oprnd0 is defined by a cast from type 'type' to type | |
598 | 'TYPE'. */ | |
599 | ||
600 | if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) | |
601 | return NULL; | |
602 | ||
07beea0d | 603 | oprnd0 = TREE_OPERAND (GIMPLE_STMT_OPERAND (stmt, 1), 0); |
20f06221 DN |
604 | *type_in = half_type; |
605 | *type_out = type; | |
606 | ||
607 | /* Pattern detected. Create a stmt to be used to replace the pattern: */ | |
608 | pattern_expr = build2 (WIDEN_SUM_EXPR, type, oprnd0, oprnd1); | |
609 | if (vect_print_dump_info (REPORT_DETAILS)) | |
610 | { | |
611 | fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: "); | |
612 | print_generic_expr (vect_dump, pattern_expr, TDF_SLIM); | |
613 | } | |
d29de1bf DN |
614 | |
615 | /* We don't allow changing the order of the computation in the inner-loop | |
616 | when doing outer-loop vectorization. */ | |
617 | if (nested_in_vect_loop_p (loop, last_stmt)) | |
618 | { | |
619 | if (vect_print_dump_info (REPORT_DETAILS)) | |
620 | fprintf (vect_dump, "vect_recog_widen_sum_pattern: not allowed."); | |
621 | return NULL; | |
622 | } | |
623 | ||
20f06221 DN |
624 | return pattern_expr; |
625 | } | |
626 | ||
627 | ||
628 | /* Function vect_pattern_recog_1 | |
629 | ||
630 | Input: | |
631 | PATTERN_RECOG_FUNC: A pointer to a function that detects a certain | |
632 | computation pattern. | |
633 | STMT: A stmt from which the pattern search should start. | |
634 | ||
635 | If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an | |
636 | expression that computes the same functionality and can be used to | |
637 | replace the sequence of stmts that are involved in the pattern. | |
638 | ||
639 | Output: | |
640 | This function checks if the expression returned by PATTERN_RECOG_FUNC is | |
641 | supported in vector form by the target. We use 'TYPE_IN' to obtain the | |
642 | relevant vector type. If 'TYPE_IN' is already a vector type, then this | |
643 | indicates that target support had already been checked by PATTERN_RECOG_FUNC. | |
644 | If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits | |
645 | to the available target pattern. | |
646 | ||
c0220ea4 | 647 | This function also does some bookkeeping, as explained in the documentation |
20f06221 DN |
648 | for vect_recog_pattern. */ |
649 | ||
650 | static void | |
651 | vect_pattern_recog_1 ( | |
44035081 | 652 | tree (* vect_recog_func) (tree, tree *, tree *), |
20f06221 DN |
653 | block_stmt_iterator si) |
654 | { | |
655 | tree stmt = bsi_stmt (si); | |
656 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
657 | stmt_vec_info pattern_stmt_info; | |
658 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | |
659 | tree pattern_expr; | |
660 | tree pattern_vectype; | |
661 | tree type_in, type_out; | |
662 | tree pattern_type; | |
663 | enum tree_code code; | |
664 | tree var, var_name; | |
665 | stmt_ann_t ann; | |
666 | ||
44035081 | 667 | pattern_expr = (* vect_recog_func) (stmt, &type_in, &type_out); |
20f06221 DN |
668 | if (!pattern_expr) |
669 | return; | |
670 | ||
671 | if (VECTOR_MODE_P (TYPE_MODE (type_in))) | |
672 | { | |
673 | /* No need to check target support (already checked by the pattern | |
674 | recognition function). */ | |
675 | pattern_vectype = type_in; | |
676 | } | |
677 | else | |
678 | { | |
679 | enum tree_code vec_mode; | |
680 | enum insn_code icode; | |
681 | optab optab; | |
682 | ||
683 | /* Check target support */ | |
684 | pattern_vectype = get_vectype_for_scalar_type (type_in); | |
03d3e953 IR |
685 | if (!pattern_vectype) |
686 | return; | |
687 | ||
71d46ca5 MM |
688 | optab = optab_for_tree_code (TREE_CODE (pattern_expr), pattern_vectype, |
689 | optab_default); | |
20f06221 DN |
690 | vec_mode = TYPE_MODE (pattern_vectype); |
691 | if (!optab | |
166cdb08 | 692 | || (icode = optab_handler (optab, vec_mode)->insn_code) == |
20f06221 DN |
693 | CODE_FOR_nothing |
694 | || (type_out | |
6887a8b9 SE |
695 | && (!get_vectype_for_scalar_type (type_out) |
696 | || (insn_data[icode].operand[0].mode != | |
697 | TYPE_MODE (get_vectype_for_scalar_type (type_out)))))) | |
20f06221 DN |
698 | return; |
699 | } | |
700 | ||
701 | /* Found a vectorizable pattern. */ | |
702 | if (vect_print_dump_info (REPORT_DETAILS)) | |
703 | { | |
704 | fprintf (vect_dump, "pattern recognized: "); | |
705 | print_generic_expr (vect_dump, pattern_expr, TDF_SLIM); | |
706 | } | |
707 | ||
708 | /* Mark the stmts that are involved in the pattern, | |
709 | create a new stmt to express the pattern and insert it. */ | |
710 | code = TREE_CODE (pattern_expr); | |
711 | pattern_type = TREE_TYPE (pattern_expr); | |
712 | var = create_tmp_var (pattern_type, "patt"); | |
f004ab02 | 713 | add_referenced_var (var); |
20f06221 | 714 | var_name = make_ssa_name (var, NULL_TREE); |
ebb07520 | 715 | pattern_expr = build_gimple_modify_stmt (var_name, pattern_expr); |
20f06221 DN |
716 | SSA_NAME_DEF_STMT (var_name) = pattern_expr; |
717 | bsi_insert_before (&si, pattern_expr, BSI_SAME_STMT); | |
718 | ann = stmt_ann (pattern_expr); | |
93c094b5 | 719 | set_stmt_info (ann, new_stmt_vec_info (pattern_expr, loop_vinfo)); |
20f06221 DN |
720 | pattern_stmt_info = vinfo_for_stmt (pattern_expr); |
721 | ||
722 | STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; | |
723 | STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); | |
724 | STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; | |
725 | STMT_VINFO_IN_PATTERN_P (stmt_info) = true; | |
726 | STMT_VINFO_RELATED_STMT (stmt_info) = pattern_expr; | |
727 | ||
728 | return; | |
729 | } | |
730 | ||
731 | ||
732 | /* Function vect_pattern_recog | |
733 | ||
734 | Input: | |
735 | LOOP_VINFO - a struct_loop_info of a loop in which we want to look for | |
736 | computation idioms. | |
737 | ||
738 | Output - for each computation idiom that is detected we insert a new stmt | |
739 | that provides the same functionality and that can be vectorized. We | |
740 | also record some information in the struct_stmt_info of the relevant | |
741 | stmts, as explained below: | |
742 | ||
743 | At the entry to this function we have the following stmts, with the | |
744 | following initial value in the STMT_VINFO fields: | |
745 | ||
746 | stmt in_pattern_p related_stmt vec_stmt | |
747 | S1: a_i = .... - - - | |
748 | S2: a_2 = ..use(a_i).. - - - | |
749 | S3: a_1 = ..use(a_2).. - - - | |
750 | S4: a_0 = ..use(a_1).. - - - | |
751 | S5: ... = ..use(a_0).. - - - | |
752 | ||
753 | Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be | |
754 | represented by a single stmt. We then: | |
755 | - create a new stmt S6 that will replace the pattern. | |
756 | - insert the new stmt S6 before the last stmt in the pattern | |
757 | - fill in the STMT_VINFO fields as follows: | |
758 | ||
759 | in_pattern_p related_stmt vec_stmt | |
760 | S1: a_i = .... - - - | |
761 | S2: a_2 = ..use(a_i).. - - - | |
762 | S3: a_1 = ..use(a_2).. - - - | |
763 | > S6: a_new = .... - S4 - | |
764 | S4: a_0 = ..use(a_1).. true S6 - | |
765 | S5: ... = ..use(a_0).. - - - | |
766 | ||
767 | (the last stmt in the pattern (S4) and the new pattern stmt (S6) point | |
768 | to each other through the RELATED_STMT field). | |
769 | ||
770 | S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead | |
771 | of S4 because it will replace all its uses. Stmts {S1,S2,S3} will | |
772 | remain irrelevant unless used by stmts other than S4. | |
773 | ||
774 | If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} | |
c0220ea4 | 775 | (because they are marked as irrelevant). It will vectorize S6, and record |
20f06221 DN |
776 | a pointer to the new vector stmt VS6 both from S6 (as usual), and also |
777 | from S4. We do that so that when we get to vectorizing stmts that use the | |
778 | def of S4 (like S5 that uses a_0), we'll know where to take the relevant | |
779 | vector-def from. S4 will be skipped, and S5 will be vectorized as usual: | |
780 | ||
781 | in_pattern_p related_stmt vec_stmt | |
782 | S1: a_i = .... - - - | |
783 | S2: a_2 = ..use(a_i).. - - - | |
784 | S3: a_1 = ..use(a_2).. - - - | |
785 | > VS6: va_new = .... - - - | |
786 | S6: a_new = .... - S4 VS6 | |
787 | S4: a_0 = ..use(a_1).. true S6 VS6 | |
788 | > VS5: ... = ..vuse(va_new).. - - - | |
789 | S5: ... = ..use(a_0).. - - - | |
790 | ||
791 | DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used | |
792 | elsewhere), and we'll end up with: | |
793 | ||
794 | VS6: va_new = .... | |
795 | VS5: ... = ..vuse(va_new).. | |
796 | ||
797 | If vectorization does not succeed, DCE will clean S6 away (its def is | |
798 | not used), and we'll end up with the original sequence. | |
799 | */ | |
800 | ||
801 | void | |
802 | vect_pattern_recog (loop_vec_info loop_vinfo) | |
803 | { | |
804 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
805 | basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
806 | unsigned int nbbs = loop->num_nodes; | |
807 | block_stmt_iterator si; | |
808 | tree stmt; | |
809 | unsigned int i, j; | |
810 | tree (* vect_recog_func_ptr) (tree, tree *, tree *); | |
811 | ||
812 | if (vect_print_dump_info (REPORT_DETAILS)) | |
813 | fprintf (vect_dump, "=== vect_pattern_recog ==="); | |
814 | ||
815 | /* Scan through the loop stmts, applying the pattern recognition | |
816 | functions starting at each stmt visited: */ | |
817 | for (i = 0; i < nbbs; i++) | |
818 | { | |
819 | basic_block bb = bbs[i]; | |
820 | for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) | |
821 | { | |
822 | stmt = bsi_stmt (si); | |
823 | ||
824 | /* Scan over all generic vect_recog_xxx_pattern functions. */ | |
825 | for (j = 0; j < NUM_PATTERNS; j++) | |
826 | { | |
827 | vect_recog_func_ptr = vect_vect_recog_func_ptrs[j]; | |
828 | vect_pattern_recog_1 (vect_recog_func_ptr, si); | |
829 | } | |
830 | } | |
831 | } | |
832 | } |