]>
Commit | Line | Data |
---|---|---|
26277d41 | 1 | /* Lower complex number and vector operations to scalar operations. |
6de9cd9a DN |
2 | Copyright (C) 2004 Free Software Foundation, Inc. |
3 | ||
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by the | |
8 | Free Software Foundation; either version 2, or (at your option) any | |
9 | later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING. If not, write to the Free | |
18 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
19 | 02111-1307, USA. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tree.h" | |
25 | #include "tm.h" | |
26277d41 PB |
26 | #include "rtl.h" |
27 | #include "expr.h" | |
28 | #include "insn-codes.h" | |
29 | #include "diagnostic.h" | |
30 | #include "optabs.h" | |
31 | #include "machmode.h" | |
32 | #include "langhooks.h" | |
6de9cd9a | 33 | #include "tree-flow.h" |
eadf906f | 34 | #include "tree-gimple.h" |
6de9cd9a DN |
35 | #include "tree-iterator.h" |
36 | #include "tree-pass.h" | |
37 | #include "flags.h" | |
38 | ||
39 | ||
6de9cd9a DN |
40 | /* Extract the real or imaginary part of a complex variable or constant. |
41 | Make sure that it's a proper gimple_val and gimplify it if not. | |
42 | Emit any new code before BSI. */ | |
43 | ||
44 | static tree | |
45 | extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p) | |
46 | { | |
47 | tree ret, inner_type; | |
48 | ||
49 | inner_type = TREE_TYPE (TREE_TYPE (t)); | |
50 | switch (TREE_CODE (t)) | |
51 | { | |
52 | case COMPLEX_CST: | |
53 | ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t)); | |
54 | break; | |
55 | ||
56 | case COMPLEX_EXPR: | |
57 | ret = TREE_OPERAND (t, imagpart_p); | |
58 | break; | |
59 | ||
60 | case VAR_DECL: | |
61 | case PARM_DECL: | |
62 | ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR), | |
63 | inner_type, t); | |
64 | break; | |
65 | ||
66 | default: | |
67 | abort (); | |
68 | } | |
69 | ||
70 | return gimplify_val (bsi, inner_type, ret); | |
71 | } | |
72 | ||
6de9cd9a DN |
73 | /* Update an assignment to a complex variable in place. */ |
74 | ||
75 | static void | |
76 | update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i) | |
77 | { | |
78 | tree stmt = bsi_stmt (*bsi); | |
79 | tree type; | |
80 | ||
6de9cd9a DN |
81 | if (TREE_CODE (stmt) == RETURN_EXPR) |
82 | stmt = TREE_OPERAND (stmt, 0); | |
83 | ||
84 | type = TREE_TYPE (TREE_OPERAND (stmt, 1)); | |
85 | TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i); | |
68b9f53b | 86 | modify_stmt (stmt); |
6de9cd9a DN |
87 | } |
88 | ||
89 | /* Expand complex addition to scalars: | |
90 | a + b = (ar + br) + i(ai + bi) | |
91 | a - b = (ar - br) + i(ai + bi) | |
92 | */ | |
93 | ||
94 | static void | |
95 | expand_complex_addition (block_stmt_iterator *bsi, tree inner_type, | |
96 | tree ar, tree ai, tree br, tree bi, | |
97 | enum tree_code code) | |
98 | { | |
99 | tree rr, ri; | |
100 | ||
26277d41 PB |
101 | rr = gimplify_build2 (bsi, code, inner_type, ar, br); |
102 | ri = gimplify_build2 (bsi, code, inner_type, ai, bi); | |
6de9cd9a DN |
103 | |
104 | update_complex_assignment (bsi, rr, ri); | |
105 | } | |
106 | ||
107 | /* Expand complex multiplication to scalars: | |
108 | a * b = (ar*br - ai*bi) + i(ar*bi + br*ai) | |
109 | */ | |
110 | ||
111 | static void | |
112 | expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type, | |
113 | tree ar, tree ai, tree br, tree bi) | |
114 | { | |
115 | tree t1, t2, t3, t4, rr, ri; | |
116 | ||
26277d41 PB |
117 | t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br); |
118 | t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi); | |
119 | t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi); | |
6de9cd9a DN |
120 | |
121 | /* Avoid expanding redundant multiplication for the common | |
122 | case of squaring a complex number. */ | |
123 | if (ar == br && ai == bi) | |
124 | t4 = t3; | |
125 | else | |
26277d41 | 126 | t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br); |
6de9cd9a | 127 | |
26277d41 PB |
128 | rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2); |
129 | ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4); | |
6de9cd9a DN |
130 | |
131 | update_complex_assignment (bsi, rr, ri); | |
132 | } | |
133 | ||
134 | /* Expand complex division to scalars, straightforward algorithm. | |
135 | a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t) | |
136 | t = br*br + bi*bi | |
137 | */ | |
138 | ||
139 | static void | |
140 | expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type, | |
141 | tree ar, tree ai, tree br, tree bi, | |
142 | enum tree_code code) | |
143 | { | |
144 | tree rr, ri, div, t1, t2, t3; | |
145 | ||
26277d41 PB |
146 | t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br); |
147 | t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi); | |
148 | div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2); | |
6de9cd9a | 149 | |
26277d41 PB |
150 | t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br); |
151 | t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi); | |
152 | t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2); | |
153 | rr = gimplify_build2 (bsi, code, inner_type, t3, div); | |
6de9cd9a | 154 | |
26277d41 PB |
155 | t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br); |
156 | t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi); | |
157 | t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2); | |
158 | ri = gimplify_build2 (bsi, code, inner_type, t3, div); | |
6de9cd9a DN |
159 | |
160 | update_complex_assignment (bsi, rr, ri); | |
161 | } | |
162 | ||
163 | /* Expand complex division to scalars, modified algorithm to minimize | |
164 | overflow with wide input ranges. */ | |
165 | ||
166 | static void | |
167 | expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type, | |
168 | tree ar, tree ai, tree br, tree bi, | |
169 | enum tree_code code) | |
170 | { | |
171 | tree rr, ri, ratio, div, t1, t2, min, max, cond; | |
172 | ||
173 | /* Examine |br| < |bi|, and branch. */ | |
26277d41 PB |
174 | t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br); |
175 | t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi); | |
6de9cd9a DN |
176 | cond = fold (build (LT_EXPR, boolean_type_node, t1, t2)); |
177 | STRIP_NOPS (cond); | |
178 | ||
179 | if (TREE_CONSTANT (cond)) | |
180 | { | |
181 | if (integer_zerop (cond)) | |
182 | min = bi, max = br; | |
183 | else | |
184 | min = br, max = bi; | |
185 | } | |
186 | else | |
187 | { | |
188 | basic_block bb_cond, bb_true, bb_false, bb_join; | |
189 | tree l1, l2, l3; | |
190 | edge e; | |
191 | ||
192 | l1 = create_artificial_label (); | |
193 | t1 = build (GOTO_EXPR, void_type_node, l1); | |
194 | l2 = create_artificial_label (); | |
195 | t2 = build (GOTO_EXPR, void_type_node, l2); | |
196 | cond = build (COND_EXPR, void_type_node, cond, t1, t2); | |
197 | bsi_insert_before (bsi, cond, BSI_SAME_STMT); | |
198 | ||
571325db AP |
199 | min = make_rename_temp (inner_type, NULL); |
200 | max = make_rename_temp (inner_type, NULL); | |
6de9cd9a DN |
201 | l3 = create_artificial_label (); |
202 | ||
203 | /* Split the original block, and create the TRUE and FALSE blocks. */ | |
204 | e = split_block (bsi->bb, cond); | |
205 | bb_cond = e->src; | |
206 | bb_join = e->dest; | |
207 | bb_true = create_empty_bb (bb_cond); | |
208 | bb_false = create_empty_bb (bb_true); | |
209 | ||
210 | /* Wire the blocks together. */ | |
211 | e->flags = EDGE_TRUE_VALUE; | |
212 | redirect_edge_succ (e, bb_true); | |
213 | make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE); | |
214 | make_edge (bb_true, bb_join, 0); | |
215 | make_edge (bb_false, bb_join, 0); | |
216 | ||
217 | /* Update dominance info. Note that bb_join's data was | |
218 | updated by split_block. */ | |
219 | if (dom_computed[CDI_DOMINATORS] >= DOM_CONS_OK) | |
220 | { | |
221 | set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond); | |
222 | set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond); | |
223 | } | |
224 | ||
225 | /* Compute min and max for TRUE block. */ | |
226 | *bsi = bsi_start (bb_true); | |
227 | t1 = build (LABEL_EXPR, void_type_node, l1); | |
228 | bsi_insert_after (bsi, t1, BSI_NEW_STMT); | |
229 | t1 = build (MODIFY_EXPR, inner_type, min, br); | |
230 | bsi_insert_after (bsi, t1, BSI_NEW_STMT); | |
231 | t1 = build (MODIFY_EXPR, inner_type, max, bi); | |
232 | bsi_insert_after (bsi, t1, BSI_NEW_STMT); | |
233 | ||
234 | /* Compute min and max for FALSE block. */ | |
235 | *bsi = bsi_start (bb_false); | |
236 | t1 = build (LABEL_EXPR, void_type_node, l2); | |
237 | bsi_insert_after (bsi, t1, BSI_NEW_STMT); | |
238 | t1 = build (MODIFY_EXPR, inner_type, min, bi); | |
239 | bsi_insert_after (bsi, t1, BSI_NEW_STMT); | |
240 | t1 = build (MODIFY_EXPR, inner_type, max, br); | |
241 | bsi_insert_after (bsi, t1, BSI_NEW_STMT); | |
242 | ||
243 | /* Insert the join label into the tail of the original block. */ | |
244 | *bsi = bsi_start (bb_join); | |
245 | t1 = build (LABEL_EXPR, void_type_node, l3); | |
246 | bsi_insert_before (bsi, t1, BSI_SAME_STMT); | |
247 | } | |
248 | ||
249 | /* Now we have MIN(|br|, |bi|) and MAX(|br|, |bi|). We now use the | |
250 | ratio min/max to scale both the dividend and divisor. */ | |
26277d41 | 251 | ratio = gimplify_build2 (bsi, code, inner_type, min, max); |
6de9cd9a DN |
252 | |
253 | /* Calculate the divisor: min*ratio + max. */ | |
26277d41 PB |
254 | t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, min, ratio); |
255 | div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, max); | |
6de9cd9a | 256 | |
26277d41 PB |
257 | /* Result is now ((ar + ai*ratio)/div) + i((ai - ar*ratio)/div). */ |
258 | t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio); | |
259 | t2 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, ar, t1); | |
260 | rr = gimplify_build2 (bsi, code, inner_type, t2, div); | |
6de9cd9a | 261 | |
26277d41 PB |
262 | t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio); |
263 | t2 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1); | |
264 | ri = gimplify_build2 (bsi, code, inner_type, t2, div); | |
6de9cd9a DN |
265 | |
266 | update_complex_assignment (bsi, rr, ri); | |
267 | } | |
268 | ||
269 | /* Expand complex division to scalars. */ | |
270 | ||
271 | static void | |
272 | expand_complex_division (block_stmt_iterator *bsi, tree inner_type, | |
273 | tree ar, tree ai, tree br, tree bi, | |
274 | enum tree_code code) | |
275 | { | |
276 | switch (flag_complex_divide_method) | |
277 | { | |
278 | case 0: | |
279 | /* straightforward implementation of complex divide acceptable. */ | |
280 | expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code); | |
281 | break; | |
282 | case 1: | |
283 | /* wide ranges of inputs must work for complex divide. */ | |
284 | expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code); | |
285 | break; | |
286 | default: | |
287 | /* C99-like requirements for complex divide (not yet implemented). */ | |
288 | abort (); | |
289 | } | |
290 | } | |
291 | ||
292 | /* Expand complex negation to scalars: | |
293 | -a = (-ar) + i(-ai) | |
294 | */ | |
295 | ||
296 | static void | |
297 | expand_complex_negation (block_stmt_iterator *bsi, tree inner_type, | |
298 | tree ar, tree ai) | |
299 | { | |
300 | tree rr, ri; | |
301 | ||
26277d41 PB |
302 | rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar); |
303 | ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai); | |
6de9cd9a DN |
304 | |
305 | update_complex_assignment (bsi, rr, ri); | |
306 | } | |
307 | ||
308 | /* Expand complex conjugate to scalars: | |
309 | ~a = (ar) + i(-ai) | |
310 | */ | |
311 | ||
312 | static void | |
313 | expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type, | |
314 | tree ar, tree ai) | |
315 | { | |
316 | tree ri; | |
317 | ||
26277d41 | 318 | ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai); |
6de9cd9a DN |
319 | |
320 | update_complex_assignment (bsi, ar, ri); | |
321 | } | |
322 | ||
323 | /* Expand complex comparison (EQ or NE only). */ | |
324 | ||
325 | static void | |
326 | expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai, | |
327 | tree br, tree bi, enum tree_code code) | |
328 | { | |
68b9f53b | 329 | tree cr, ci, cc, stmt, expr, type; |
6de9cd9a | 330 | |
26277d41 PB |
331 | cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br); |
332 | ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi); | |
333 | cc = gimplify_build2 (bsi, | |
334 | (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR), | |
335 | boolean_type_node, cr, ci); | |
6de9cd9a | 336 | |
68b9f53b | 337 | stmt = expr = bsi_stmt (*bsi); |
6de9cd9a DN |
338 | |
339 | switch (TREE_CODE (stmt)) | |
340 | { | |
341 | case RETURN_EXPR: | |
68b9f53b | 342 | expr = TREE_OPERAND (stmt, 0); |
6de9cd9a DN |
343 | /* FALLTHRU */ |
344 | case MODIFY_EXPR: | |
68b9f53b AM |
345 | type = TREE_TYPE (TREE_OPERAND (expr, 1)); |
346 | TREE_OPERAND (expr, 1) = fold_convert (type, cc); | |
6de9cd9a DN |
347 | break; |
348 | case COND_EXPR: | |
349 | TREE_OPERAND (stmt, 0) = cc; | |
350 | break; | |
351 | default: | |
352 | abort (); | |
353 | } | |
68b9f53b AM |
354 | |
355 | modify_stmt (stmt); | |
6de9cd9a DN |
356 | } |
357 | ||
358 | /* Process one statement. If we identify a complex operation, expand it. */ | |
359 | ||
360 | static void | |
361 | expand_complex_operations_1 (block_stmt_iterator *bsi) | |
362 | { | |
363 | tree stmt = bsi_stmt (*bsi); | |
364 | tree rhs, type, inner_type; | |
365 | tree ac, ar, ai, bc, br, bi; | |
366 | enum tree_code code; | |
367 | ||
368 | switch (TREE_CODE (stmt)) | |
369 | { | |
370 | case RETURN_EXPR: | |
371 | stmt = TREE_OPERAND (stmt, 0); | |
372 | if (!stmt) | |
373 | return; | |
374 | if (TREE_CODE (stmt) != MODIFY_EXPR) | |
375 | return; | |
376 | /* FALLTHRU */ | |
377 | ||
378 | case MODIFY_EXPR: | |
379 | rhs = TREE_OPERAND (stmt, 1); | |
380 | break; | |
381 | ||
382 | case COND_EXPR: | |
383 | rhs = TREE_OPERAND (stmt, 0); | |
384 | break; | |
385 | ||
386 | default: | |
387 | return; | |
388 | } | |
389 | ||
390 | type = TREE_TYPE (rhs); | |
391 | code = TREE_CODE (rhs); | |
392 | ||
393 | /* Initial filter for operations we handle. */ | |
394 | switch (code) | |
395 | { | |
396 | case PLUS_EXPR: | |
397 | case MINUS_EXPR: | |
398 | case MULT_EXPR: | |
399 | case TRUNC_DIV_EXPR: | |
400 | case CEIL_DIV_EXPR: | |
401 | case FLOOR_DIV_EXPR: | |
402 | case ROUND_DIV_EXPR: | |
403 | case RDIV_EXPR: | |
404 | case NEGATE_EXPR: | |
405 | case CONJ_EXPR: | |
406 | if (TREE_CODE (type) != COMPLEX_TYPE) | |
407 | return; | |
408 | inner_type = TREE_TYPE (type); | |
409 | break; | |
410 | ||
411 | case EQ_EXPR: | |
412 | case NE_EXPR: | |
413 | inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1)); | |
414 | if (TREE_CODE (inner_type) != COMPLEX_TYPE) | |
415 | return; | |
416 | break; | |
417 | ||
418 | default: | |
419 | return; | |
420 | } | |
421 | ||
422 | /* Extract the components of the two complex values. Make sure and | |
423 | handle the common case of the same value used twice specially. */ | |
424 | ac = TREE_OPERAND (rhs, 0); | |
425 | ar = extract_component (bsi, ac, 0); | |
426 | ai = extract_component (bsi, ac, 1); | |
427 | ||
428 | if (TREE_CODE_CLASS (code) == '1') | |
429 | bc = br = bi = NULL; | |
430 | else | |
431 | { | |
432 | bc = TREE_OPERAND (rhs, 1); | |
433 | if (ac == bc) | |
434 | br = ar, bi = ai; | |
435 | else | |
436 | { | |
437 | br = extract_component (bsi, bc, 0); | |
438 | bi = extract_component (bsi, bc, 1); | |
439 | } | |
440 | } | |
441 | ||
442 | switch (code) | |
443 | { | |
444 | case PLUS_EXPR: | |
445 | case MINUS_EXPR: | |
446 | expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code); | |
447 | break; | |
448 | ||
449 | case MULT_EXPR: | |
450 | expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi); | |
451 | break; | |
452 | ||
453 | case TRUNC_DIV_EXPR: | |
454 | case CEIL_DIV_EXPR: | |
455 | case FLOOR_DIV_EXPR: | |
456 | case ROUND_DIV_EXPR: | |
457 | case RDIV_EXPR: | |
458 | expand_complex_division (bsi, inner_type, ar, ai, br, bi, code); | |
459 | break; | |
460 | ||
461 | case NEGATE_EXPR: | |
462 | expand_complex_negation (bsi, inner_type, ar, ai); | |
463 | break; | |
464 | ||
465 | case CONJ_EXPR: | |
466 | expand_complex_conjugate (bsi, inner_type, ar, ai); | |
467 | break; | |
468 | ||
469 | case EQ_EXPR: | |
470 | case NE_EXPR: | |
471 | expand_complex_comparison (bsi, ar, ai, br, bi, code); | |
472 | break; | |
473 | ||
474 | default: | |
475 | abort (); | |
476 | } | |
477 | } | |
26277d41 PB |
478 | \f |
479 | /* Build a constant of type TYPE, made of VALUE's bits replicated | |
480 | every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ | |
481 | static tree | |
482 | build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) | |
483 | { | |
484 | int width = tree_low_cst (TYPE_SIZE (inner_type), 1); | |
485 | int n = HOST_BITS_PER_WIDE_INT / width; | |
486 | unsigned HOST_WIDE_INT low, high, mask; | |
487 | tree ret; | |
488 | ||
489 | if (n == 0) | |
490 | abort (); | |
491 | ||
492 | if (width == HOST_BITS_PER_WIDE_INT) | |
493 | low = value; | |
494 | else | |
495 | { | |
496 | mask = ((HOST_WIDE_INT)1 << width) - 1; | |
497 | low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); | |
498 | } | |
499 | ||
500 | if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) | |
501 | low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; | |
502 | else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) | |
503 | high = 0; | |
504 | else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) | |
505 | high = low; | |
506 | else | |
507 | abort (); | |
508 | ||
4a90aeeb | 509 | ret = build_int_cst (type, low, high); |
26277d41 PB |
510 | return ret; |
511 | } | |
6de9cd9a | 512 | |
26277d41 PB |
513 | /* Return a suitable vector types made of SUBPARTS units each of mode |
514 | "word_mode" (the global variable). */ | |
515 | static tree | |
516 | build_word_mode_vector_type (int nunits) | |
517 | { | |
518 | static tree innertype; | |
519 | static tree last; | |
520 | static int last_nunits; | |
521 | ||
522 | if (!innertype) | |
523 | innertype = lang_hooks.types.type_for_mode (word_mode, 1); | |
524 | else if (last_nunits == nunits) | |
525 | return last; | |
526 | ||
527 | /* We build a new type, but we canonicalize it nevertheless, | |
528 | because it still saves some memory. */ | |
529 | last_nunits = nunits; | |
530 | last = type_hash_canon (nunits, build_vector_type (innertype, nunits)); | |
531 | return last; | |
532 | } | |
533 | ||
534 | typedef tree (*elem_op_func) (block_stmt_iterator *, | |
535 | tree, tree, tree, tree, tree, enum tree_code); | |
536 | ||
537 | static inline tree | |
538 | tree_vec_extract (block_stmt_iterator *bsi, tree type, | |
539 | tree t, tree bitsize, tree bitpos) | |
540 | { | |
541 | if (bitpos) | |
542 | return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos); | |
543 | else | |
544 | return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t); | |
545 | } | |
546 | ||
547 | static tree | |
548 | do_unop (block_stmt_iterator *bsi, tree inner_type, tree a, | |
549 | tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, | |
550 | enum tree_code code) | |
551 | { | |
552 | a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos); | |
553 | return gimplify_build1 (bsi, code, inner_type, a); | |
554 | } | |
555 | ||
556 | static tree | |
557 | do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b, | |
558 | tree bitpos, tree bitsize, enum tree_code code) | |
559 | { | |
560 | a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos); | |
561 | b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos); | |
562 | return gimplify_build2 (bsi, code, inner_type, a, b); | |
563 | } | |
564 | ||
565 | /* Expand vector addition to scalars. This does bit twiddling | |
566 | in order to increase parallelism: | |
567 | ||
568 | a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ | |
569 | (a ^ b) & 0x80808080 | |
570 | ||
571 | a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ | |
572 | (a ^ ~b) & 0x80808080 | |
573 | ||
574 | -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) | |
575 | ||
576 | This optimization should be done only if 4 vector items or more | |
577 | fit into a word. */ | |
578 | static tree | |
579 | do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b, | |
580 | tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, | |
581 | enum tree_code code) | |
582 | { | |
583 | tree inner_type = TREE_TYPE (TREE_TYPE (a)); | |
584 | unsigned HOST_WIDE_INT max; | |
585 | tree low_bits, high_bits, a_low, b_low, result_low, signs; | |
586 | ||
587 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
588 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
589 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
590 | ||
591 | a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos); | |
592 | b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos); | |
593 | ||
594 | signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b); | |
595 | b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits); | |
596 | if (code == PLUS_EXPR) | |
597 | a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits); | |
598 | else | |
599 | { | |
600 | a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits); | |
601 | signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs); | |
602 | } | |
603 | ||
604 | signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits); | |
605 | result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low); | |
606 | return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
607 | } | |
608 | ||
609 | static tree | |
610 | do_negate (block_stmt_iterator *bsi, tree word_type, tree b, | |
611 | tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, | |
612 | tree bitsize ATTRIBUTE_UNUSED, | |
613 | enum tree_code code ATTRIBUTE_UNUSED) | |
614 | { | |
615 | tree inner_type = TREE_TYPE (TREE_TYPE (b)); | |
616 | HOST_WIDE_INT max; | |
617 | tree low_bits, high_bits, b_low, result_low, signs; | |
618 | ||
619 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
620 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
621 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
622 | ||
623 | b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos); | |
624 | ||
625 | b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits); | |
626 | signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b); | |
627 | signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits); | |
628 | result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low); | |
629 | return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
630 | } | |
631 | ||
632 | /* Expand a vector operation to scalars, by using many operations | |
633 | whose type is the vector type's inner type. */ | |
634 | static tree | |
635 | expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f, | |
636 | tree type, tree inner_type, | |
637 | tree a, tree b, enum tree_code code) | |
638 | { | |
639 | tree head, *chain = &head; | |
640 | tree part_width = TYPE_SIZE (inner_type); | |
641 | tree index = bitsize_int (0); | |
642 | int nunits = TYPE_VECTOR_SUBPARTS (type); | |
643 | int delta = tree_low_cst (part_width, 1) | |
644 | / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); | |
645 | int i; | |
646 | ||
647 | for (i = 0; i < nunits; | |
648 | i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0)) | |
649 | { | |
650 | tree result = f (bsi, inner_type, a, b, index, part_width, code); | |
651 | *chain = tree_cons (NULL_TREE, result, NULL_TREE); | |
652 | chain = &TREE_CHAIN (*chain); | |
653 | } | |
654 | ||
655 | return build1 (CONSTRUCTOR, type, head); | |
656 | } | |
657 | ||
658 | /* Expand a vector operation to scalars with the freedom to use | |
659 | a scalar integer type, or to use a different size for the items | |
660 | in the vector type. */ | |
661 | static tree | |
662 | expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type, | |
663 | tree a, tree b, | |
664 | enum tree_code code) | |
665 | { | |
666 | tree result, compute_type; | |
667 | enum machine_mode mode; | |
668 | int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; | |
669 | ||
670 | /* We have three strategies. If the type is already correct, just do | |
671 | the operation an element at a time. Else, if the vector is wider than | |
672 | one word, do it a word at a time; finally, if the vector is smaller | |
673 | than one word, do it as a scalar. */ | |
674 | if (TYPE_MODE (TREE_TYPE (type)) == word_mode) | |
675 | return expand_vector_piecewise (bsi, f, | |
676 | type, TREE_TYPE (type), | |
677 | a, b, code); | |
678 | else if (n_words > 1) | |
679 | { | |
680 | tree word_type = build_word_mode_vector_type (n_words); | |
681 | result = expand_vector_piecewise (bsi, f, | |
682 | word_type, TREE_TYPE (word_type), | |
683 | a, b, code); | |
684 | result = gimplify_val (bsi, word_type, result); | |
685 | } | |
686 | else | |
687 | { | |
688 | /* Use a single scalar operation with a mode no wider than word_mode. */ | |
689 | mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); | |
690 | compute_type = lang_hooks.types.type_for_mode (mode, 1); | |
691 | result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); | |
692 | } | |
693 | ||
694 | return build1 (VIEW_CONVERT_EXPR, type, result); | |
695 | } | |
696 | ||
697 | /* Expand a vector operation to scalars; for integer types we can use | |
698 | special bit twiddling tricks to do the sums a word at a time, using | |
699 | function F_PARALLEL instead of F. These tricks are done only if | |
700 | they can process at least four items, that is, only if the vector | |
701 | holds at least four items and if a word can hold four items. */ | |
702 | static tree | |
703 | expand_vector_addition (block_stmt_iterator *bsi, | |
704 | elem_op_func f, elem_op_func f_parallel, | |
705 | tree type, tree a, tree b, enum tree_code code) | |
706 | { | |
707 | int parts_per_word = UNITS_PER_WORD | |
708 | / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); | |
709 | ||
710 | if (INTEGRAL_TYPE_P (TREE_TYPE (type)) | |
711 | && parts_per_word >= 4 | |
712 | && TYPE_VECTOR_SUBPARTS (type) >= 4) | |
713 | return expand_vector_parallel (bsi, f_parallel, | |
714 | type, a, b, code); | |
715 | else | |
716 | return expand_vector_piecewise (bsi, f, | |
717 | type, TREE_TYPE (type), | |
718 | a, b, code); | |
719 | } | |
720 | ||
721 | /* Return a type for the widest vector mode whose components are of mode | |
722 | INNER_MODE, or NULL_TREE if none is found. */ | |
723 | static tree | |
724 | type_for_widest_vector_mode (enum machine_mode inner_mode, optab op) | |
725 | { | |
726 | enum machine_mode best_mode = VOIDmode, mode; | |
727 | int best_nunits = 0; | |
728 | ||
729 | if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT) | |
730 | mode = MIN_MODE_VECTOR_FLOAT; | |
731 | else | |
732 | mode = MIN_MODE_VECTOR_INT; | |
733 | ||
734 | for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) | |
735 | if (GET_MODE_INNER (mode) == inner_mode | |
736 | && GET_MODE_NUNITS (mode) > best_nunits | |
737 | && op->handlers[mode].insn_code != CODE_FOR_nothing) | |
738 | best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); | |
739 | ||
740 | if (best_mode == VOIDmode) | |
741 | return NULL_TREE; | |
742 | else | |
743 | return lang_hooks.types.type_for_mode (best_mode, 1); | |
744 | } | |
745 | ||
746 | /* Process one statement. If we identify a vector operation, expand it. */ | |
747 | ||
748 | static void | |
749 | expand_vector_operations_1 (block_stmt_iterator *bsi) | |
750 | { | |
751 | tree stmt = bsi_stmt (*bsi); | |
752 | tree *p_rhs, rhs, type, compute_type; | |
753 | enum tree_code code; | |
754 | enum machine_mode compute_mode; | |
755 | optab op; | |
756 | ||
757 | switch (TREE_CODE (stmt)) | |
758 | { | |
759 | case RETURN_EXPR: | |
760 | stmt = TREE_OPERAND (stmt, 0); | |
761 | if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR) | |
762 | return; | |
763 | ||
764 | /* FALLTHRU */ | |
765 | ||
766 | case MODIFY_EXPR: | |
767 | p_rhs = &TREE_OPERAND (stmt, 1); | |
768 | rhs = *p_rhs; | |
769 | break; | |
770 | ||
771 | default: | |
772 | return; | |
773 | } | |
774 | ||
775 | type = TREE_TYPE (rhs); | |
776 | if (TREE_CODE (type) != VECTOR_TYPE) | |
777 | return; | |
778 | ||
779 | code = TREE_CODE (rhs); | |
780 | if (TREE_CODE_CLASS (code) != '1' | |
781 | && TREE_CODE_CLASS (code) != '2') | |
782 | return; | |
783 | ||
784 | if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR) | |
785 | return; | |
786 | ||
787 | if (code == CONVERT_EXPR) | |
788 | abort (); | |
789 | ||
790 | op = optab_for_tree_code (code, type); | |
791 | ||
792 | /* Optabs will try converting a negation into a subtraction, so | |
793 | look for it as well. TODO: negation of floating-point vectors | |
794 | might be turned into an exclusive OR toggling the sign bit. */ | |
795 | if (op == NULL | |
796 | && code == NEGATE_EXPR | |
797 | && INTEGRAL_TYPE_P (TREE_TYPE (type))) | |
798 | op = optab_for_tree_code (MINUS_EXPR, type); | |
799 | ||
800 | /* For very wide vectors, try using a smaller vector mode. */ | |
801 | compute_type = type; | |
802 | if (TYPE_MODE (type) == BLKmode && op) | |
803 | { | |
804 | tree vector_compute_type | |
805 | = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op); | |
806 | if (vector_compute_type != NULL_TREE) | |
807 | compute_type = vector_compute_type; | |
808 | } | |
809 | ||
810 | compute_mode = TYPE_MODE (compute_type); | |
811 | ||
812 | /* If we are breaking a BLKmode vector into smaller pieces, | |
813 | type_for_widest_vector_mode has already looked into the optab, | |
814 | so skip these checks. */ | |
815 | if (compute_type == type) | |
816 | { | |
817 | if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT | |
818 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT) | |
819 | && op != NULL | |
820 | && op->handlers[compute_mode].insn_code != CODE_FOR_nothing) | |
821 | return; | |
822 | else | |
823 | { | |
824 | /* There is no operation in hardware, so fall back to scalars. */ | |
825 | compute_type = TREE_TYPE (type); | |
826 | compute_mode = TYPE_MODE (compute_type); | |
827 | } | |
828 | } | |
829 | ||
830 | /* If the compute mode is not a vector mode (hence we are decomposing | |
831 | a BLKmode vector to smaller, hardware-supported vectors), we may | |
832 | want to expand the operations in parallel. */ | |
833 | if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
834 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT) | |
835 | switch (code) | |
836 | { | |
837 | case PLUS_EXPR: | |
838 | case MINUS_EXPR: | |
839 | if (TYPE_TRAP_SIGNED (type)) | |
840 | break; | |
841 | ||
842 | *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type, | |
843 | TREE_OPERAND (rhs, 0), | |
844 | TREE_OPERAND (rhs, 1), code); | |
845 | modify_stmt (bsi_stmt (*bsi)); | |
846 | return; | |
847 | ||
848 | case NEGATE_EXPR: | |
849 | if (TYPE_TRAP_SIGNED (type)) | |
850 | break; | |
851 | ||
852 | *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type, | |
853 | TREE_OPERAND (rhs, 0), | |
854 | NULL_TREE, code); | |
855 | modify_stmt (bsi_stmt (*bsi)); | |
856 | return; | |
857 | ||
858 | case BIT_AND_EXPR: | |
859 | case BIT_IOR_EXPR: | |
860 | case BIT_XOR_EXPR: | |
861 | *p_rhs = expand_vector_parallel (bsi, do_binop, type, | |
862 | TREE_OPERAND (rhs, 0), | |
863 | TREE_OPERAND (rhs, 1), code); | |
864 | modify_stmt (bsi_stmt (*bsi)); | |
865 | return; | |
866 | ||
867 | case BIT_NOT_EXPR: | |
868 | *p_rhs = expand_vector_parallel (bsi, do_unop, type, | |
869 | TREE_OPERAND (rhs, 0), | |
870 | NULL_TREE, code); | |
871 | modify_stmt (bsi_stmt (*bsi)); | |
872 | return; | |
873 | ||
874 | default: | |
875 | break; | |
876 | } | |
877 | ||
878 | if (TREE_CODE_CLASS (code) == '1') | |
879 | *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type, | |
880 | TREE_OPERAND (rhs, 0), | |
881 | NULL_TREE, code); | |
882 | else | |
883 | *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type, | |
884 | TREE_OPERAND (rhs, 0), | |
885 | TREE_OPERAND (rhs, 1), code); | |
886 | ||
887 | modify_stmt (bsi_stmt (*bsi)); | |
888 | } | |
889 | \f | |
890 | static void | |
891 | expand_vector_operations (void) | |
892 | { | |
893 | block_stmt_iterator bsi; | |
894 | basic_block bb; | |
895 | ||
896 | FOR_EACH_BB (bb) | |
897 | { | |
898 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) | |
899 | expand_vector_operations_1 (&bsi); | |
900 | } | |
901 | } | |
6de9cd9a DN |
902 | |
903 | static void | |
26277d41 | 904 | tree_lower_operations (void) |
6de9cd9a DN |
905 | { |
906 | int old_last_basic_block = last_basic_block; | |
907 | block_stmt_iterator bsi; | |
908 | basic_block bb; | |
909 | ||
910 | FOR_EACH_BB (bb) | |
911 | { | |
912 | if (bb->index >= old_last_basic_block) | |
913 | continue; | |
914 | for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) | |
26277d41 PB |
915 | { |
916 | expand_complex_operations_1 (&bsi); | |
917 | expand_vector_operations_1 (&bsi); | |
918 | } | |
6de9cd9a DN |
919 | } |
920 | } | |
921 | ||
26277d41 PB |
922 | |
923 | struct tree_opt_pass pass_lower_vector_ssa = | |
6de9cd9a | 924 | { |
26277d41 | 925 | "vector", /* name */ |
6de9cd9a | 926 | NULL, /* gate */ |
26277d41 | 927 | expand_vector_operations, /* execute */ |
6de9cd9a DN |
928 | NULL, /* sub */ |
929 | NULL, /* next */ | |
930 | 0, /* static_pass_number */ | |
931 | 0, /* tv_id */ | |
932 | PROP_cfg, /* properties_required */ | |
933 | 0, /* properties_provided */ | |
934 | 0, /* properties_destroyed */ | |
935 | 0, /* todo_flags_start */ | |
26277d41 | 936 | TODO_dump_func | TODO_rename_vars /* todo_flags_finish */ |
6de9cd9a | 937 | | TODO_ggc_collect | TODO_verify_ssa |
26277d41 PB |
938 | | TODO_verify_stmts | TODO_verify_flow |
939 | }; | |
940 | ||
941 | struct tree_opt_pass pass_pre_expand = | |
942 | { | |
943 | "oplower", /* name */ | |
944 | 0, /* gate */ | |
945 | tree_lower_operations, /* execute */ | |
946 | NULL, /* sub */ | |
947 | NULL, /* next */ | |
948 | 0, /* static_pass_number */ | |
949 | 0, /* tv_id */ | |
950 | PROP_cfg, /* properties_required */ | |
951 | 0, /* properties_provided */ | |
952 | 0, /* properties_destroyed */ | |
953 | 0, /* todo_flags_start */ | |
954 | TODO_dump_func | TODO_ggc_collect | |
955 | | TODO_verify_stmts /* todo_flags_finish */ | |
6de9cd9a | 956 | }; |