]>
Commit | Line | Data |
---|---|---|
2b725155 | 1 | /* Lower vector operations to scalar operations. |
eeef0e45 | 2 | Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc. |
2b725155 RH |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by the | |
9dcd6f09 | 8 | Free Software Foundation; either version 3, or (at your option) any |
2b725155 RH |
9 | later version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
2b725155 RH |
19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tree.h" | |
24 | #include "tm.h" | |
25 | #include "rtl.h" | |
26 | #include "expr.h" | |
27 | #include "insn-codes.h" | |
28 | #include "diagnostic.h" | |
29 | #include "optabs.h" | |
30 | #include "machmode.h" | |
31 | #include "langhooks.h" | |
32 | #include "tree-flow.h" | |
726a989a | 33 | #include "gimple.h" |
2b725155 RH |
34 | #include "tree-iterator.h" |
35 | #include "tree-pass.h" | |
36 | #include "flags.h" | |
37 | #include "ggc.h" | |
38 | ||
39 | ||
40 | /* Build a constant of type TYPE, made of VALUE's bits replicated | |
41 | every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ | |
42 | static tree | |
43 | build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) | |
44 | { | |
45 | int width = tree_low_cst (TYPE_SIZE (inner_type), 1); | |
46 | int n = HOST_BITS_PER_WIDE_INT / width; | |
47 | unsigned HOST_WIDE_INT low, high, mask; | |
48 | tree ret; | |
49 | ||
50 | gcc_assert (n); | |
51 | ||
52 | if (width == HOST_BITS_PER_WIDE_INT) | |
53 | low = value; | |
54 | else | |
55 | { | |
56 | mask = ((HOST_WIDE_INT)1 << width) - 1; | |
57 | low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); | |
58 | } | |
59 | ||
60 | if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) | |
61 | low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; | |
62 | else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) | |
63 | high = 0; | |
64 | else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) | |
65 | high = low; | |
66 | else | |
67 | gcc_unreachable (); | |
68 | ||
69 | ret = build_int_cst_wide (type, low, high); | |
70 | return ret; | |
71 | } | |
72 | ||
73 | static GTY(()) tree vector_inner_type; | |
74 | static GTY(()) tree vector_last_type; | |
75 | static GTY(()) int vector_last_nunits; | |
76 | ||
77 | /* Return a suitable vector types made of SUBPARTS units each of mode | |
78 | "word_mode" (the global variable). */ | |
79 | static tree | |
80 | build_word_mode_vector_type (int nunits) | |
81 | { | |
82 | if (!vector_inner_type) | |
83 | vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1); | |
84 | else if (vector_last_nunits == nunits) | |
85 | { | |
86 | gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE); | |
87 | return vector_last_type; | |
88 | } | |
89 | ||
90 | /* We build a new type, but we canonicalize it nevertheless, | |
91 | because it still saves some memory. */ | |
92 | vector_last_nunits = nunits; | |
93 | vector_last_type = type_hash_canon (nunits, | |
94 | build_vector_type (vector_inner_type, | |
95 | nunits)); | |
96 | return vector_last_type; | |
97 | } | |
98 | ||
726a989a | 99 | typedef tree (*elem_op_func) (gimple_stmt_iterator *, |
2b725155 RH |
100 | tree, tree, tree, tree, tree, enum tree_code); |
101 | ||
102 | static inline tree | |
726a989a | 103 | tree_vec_extract (gimple_stmt_iterator *gsi, tree type, |
2b725155 RH |
104 | tree t, tree bitsize, tree bitpos) |
105 | { | |
106 | if (bitpos) | |
726a989a | 107 | return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); |
2b725155 | 108 | else |
726a989a | 109 | return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); |
2b725155 RH |
110 | } |
111 | ||
112 | static tree | |
726a989a | 113 | do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, |
2b725155 RH |
114 | tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, |
115 | enum tree_code code) | |
116 | { | |
726a989a RB |
117 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
118 | return gimplify_build1 (gsi, code, inner_type, a); | |
2b725155 RH |
119 | } |
120 | ||
121 | static tree | |
726a989a | 122 | do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, |
2b725155 RH |
123 | tree bitpos, tree bitsize, enum tree_code code) |
124 | { | |
726a989a RB |
125 | a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
126 | b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
127 | return gimplify_build2 (gsi, code, inner_type, a, b); | |
2b725155 RH |
128 | } |
129 | ||
130 | /* Expand vector addition to scalars. This does bit twiddling | |
131 | in order to increase parallelism: | |
132 | ||
133 | a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^ | |
134 | (a ^ b) & 0x80808080 | |
135 | ||
136 | a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^ | |
137 | (a ^ ~b) & 0x80808080 | |
138 | ||
139 | -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080) | |
140 | ||
141 | This optimization should be done only if 4 vector items or more | |
142 | fit into a word. */ | |
143 | static tree | |
726a989a | 144 | do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, |
2b725155 RH |
145 | tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, |
146 | enum tree_code code) | |
147 | { | |
148 | tree inner_type = TREE_TYPE (TREE_TYPE (a)); | |
149 | unsigned HOST_WIDE_INT max; | |
150 | tree low_bits, high_bits, a_low, b_low, result_low, signs; | |
151 | ||
152 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
153 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
154 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
155 | ||
726a989a RB |
156 | a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos); |
157 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); | |
2b725155 | 158 | |
726a989a RB |
159 | signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b); |
160 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); | |
2b725155 | 161 | if (code == PLUS_EXPR) |
726a989a | 162 | a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits); |
2b725155 RH |
163 | else |
164 | { | |
726a989a RB |
165 | a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits); |
166 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs); | |
2b725155 RH |
167 | } |
168 | ||
726a989a RB |
169 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); |
170 | result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low); | |
171 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
2b725155 RH |
172 | } |
173 | ||
174 | static tree | |
726a989a | 175 | do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, |
2b725155 RH |
176 | tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, |
177 | tree bitsize ATTRIBUTE_UNUSED, | |
178 | enum tree_code code ATTRIBUTE_UNUSED) | |
179 | { | |
180 | tree inner_type = TREE_TYPE (TREE_TYPE (b)); | |
181 | HOST_WIDE_INT max; | |
182 | tree low_bits, high_bits, b_low, result_low, signs; | |
183 | ||
184 | max = GET_MODE_MASK (TYPE_MODE (inner_type)); | |
185 | low_bits = build_replicated_const (word_type, inner_type, max >> 1); | |
186 | high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1)); | |
187 | ||
726a989a | 188 | b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos); |
2b725155 | 189 | |
726a989a RB |
190 | b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits); |
191 | signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b); | |
192 | signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits); | |
193 | result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low); | |
194 | return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs); | |
2b725155 RH |
195 | } |
196 | ||
197 | /* Expand a vector operation to scalars, by using many operations | |
198 | whose type is the vector type's inner type. */ | |
199 | static tree | |
726a989a | 200 | expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, |
2b725155 RH |
201 | tree type, tree inner_type, |
202 | tree a, tree b, enum tree_code code) | |
203 | { | |
4038c495 | 204 | VEC(constructor_elt,gc) *v; |
2b725155 RH |
205 | tree part_width = TYPE_SIZE (inner_type); |
206 | tree index = bitsize_int (0); | |
207 | int nunits = TYPE_VECTOR_SUBPARTS (type); | |
208 | int delta = tree_low_cst (part_width, 1) | |
209 | / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); | |
210 | int i; | |
211 | ||
4038c495 | 212 | v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta); |
2b725155 RH |
213 | for (i = 0; i < nunits; |
214 | i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0)) | |
215 | { | |
726a989a | 216 | tree result = f (gsi, inner_type, a, b, index, part_width, code); |
4038c495 GB |
217 | constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL); |
218 | ce->index = NULL_TREE; | |
219 | ce->value = result; | |
2b725155 RH |
220 | } |
221 | ||
4038c495 | 222 | return build_constructor (type, v); |
2b725155 RH |
223 | } |
224 | ||
225 | /* Expand a vector operation to scalars with the freedom to use | |
226 | a scalar integer type, or to use a different size for the items | |
227 | in the vector type. */ | |
228 | static tree | |
726a989a | 229 | expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, |
2b725155 RH |
230 | tree a, tree b, |
231 | enum tree_code code) | |
232 | { | |
233 | tree result, compute_type; | |
234 | enum machine_mode mode; | |
235 | int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; | |
236 | ||
237 | /* We have three strategies. If the type is already correct, just do | |
238 | the operation an element at a time. Else, if the vector is wider than | |
239 | one word, do it a word at a time; finally, if the vector is smaller | |
240 | than one word, do it as a scalar. */ | |
241 | if (TYPE_MODE (TREE_TYPE (type)) == word_mode) | |
726a989a | 242 | return expand_vector_piecewise (gsi, f, |
2b725155 RH |
243 | type, TREE_TYPE (type), |
244 | a, b, code); | |
245 | else if (n_words > 1) | |
246 | { | |
247 | tree word_type = build_word_mode_vector_type (n_words); | |
726a989a | 248 | result = expand_vector_piecewise (gsi, f, |
2b725155 RH |
249 | word_type, TREE_TYPE (word_type), |
250 | a, b, code); | |
726a989a RB |
251 | result = force_gimple_operand_gsi (gsi, result, true, NULL, true, |
252 | GSI_SAME_STMT); | |
2b725155 RH |
253 | } |
254 | else | |
255 | { | |
256 | /* Use a single scalar operation with a mode no wider than word_mode. */ | |
257 | mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); | |
258 | compute_type = lang_hooks.types.type_for_mode (mode, 1); | |
726a989a | 259 | result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); |
2b725155 RH |
260 | } |
261 | ||
262 | return result; | |
263 | } | |
264 | ||
265 | /* Expand a vector operation to scalars; for integer types we can use | |
266 | special bit twiddling tricks to do the sums a word at a time, using | |
267 | function F_PARALLEL instead of F. These tricks are done only if | |
268 | they can process at least four items, that is, only if the vector | |
269 | holds at least four items and if a word can hold four items. */ | |
270 | static tree | |
726a989a | 271 | expand_vector_addition (gimple_stmt_iterator *gsi, |
2b725155 RH |
272 | elem_op_func f, elem_op_func f_parallel, |
273 | tree type, tree a, tree b, enum tree_code code) | |
274 | { | |
275 | int parts_per_word = UNITS_PER_WORD | |
276 | / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); | |
277 | ||
278 | if (INTEGRAL_TYPE_P (TREE_TYPE (type)) | |
279 | && parts_per_word >= 4 | |
280 | && TYPE_VECTOR_SUBPARTS (type) >= 4) | |
726a989a | 281 | return expand_vector_parallel (gsi, f_parallel, |
2b725155 RH |
282 | type, a, b, code); |
283 | else | |
726a989a | 284 | return expand_vector_piecewise (gsi, f, |
2b725155 RH |
285 | type, TREE_TYPE (type), |
286 | a, b, code); | |
287 | } | |
288 | ||
289 | static tree | |
726a989a RB |
290 | expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, |
291 | gimple assign, enum tree_code code) | |
2b725155 RH |
292 | { |
293 | enum machine_mode compute_mode = TYPE_MODE (compute_type); | |
294 | ||
295 | /* If the compute mode is not a vector mode (hence we are not decomposing | |
296 | a BLKmode vector to smaller, hardware-supported vectors), we may want | |
297 | to expand the operations in parallel. */ | |
298 | if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
325217ed CF |
299 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT |
300 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT | |
301 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT | |
302 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM | |
303 | && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | |
2b725155 RH |
304 | switch (code) |
305 | { | |
306 | case PLUS_EXPR: | |
307 | case MINUS_EXPR: | |
eeef0e45 | 308 | if (!TYPE_OVERFLOW_TRAPS (type)) |
726a989a RB |
309 | return expand_vector_addition (gsi, do_binop, do_plus_minus, type, |
310 | gimple_assign_rhs1 (assign), | |
311 | gimple_assign_rhs2 (assign), code); | |
2b725155 RH |
312 | break; |
313 | ||
314 | case NEGATE_EXPR: | |
eeef0e45 | 315 | if (!TYPE_OVERFLOW_TRAPS (type)) |
726a989a RB |
316 | return expand_vector_addition (gsi, do_unop, do_negate, type, |
317 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
318 | NULL_TREE, code); |
319 | break; | |
320 | ||
321 | case BIT_AND_EXPR: | |
322 | case BIT_IOR_EXPR: | |
323 | case BIT_XOR_EXPR: | |
726a989a RB |
324 | return expand_vector_parallel (gsi, do_binop, type, |
325 | gimple_assign_rhs1 (assign), | |
326 | gimple_assign_rhs2 (assign), code); | |
2b725155 RH |
327 | |
328 | case BIT_NOT_EXPR: | |
726a989a RB |
329 | return expand_vector_parallel (gsi, do_unop, type, |
330 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
331 | NULL_TREE, code); |
332 | ||
333 | default: | |
334 | break; | |
335 | } | |
336 | ||
337 | if (TREE_CODE_CLASS (code) == tcc_unary) | |
726a989a RB |
338 | return expand_vector_piecewise (gsi, do_unop, type, compute_type, |
339 | gimple_assign_rhs1 (assign), | |
2b725155 RH |
340 | NULL_TREE, code); |
341 | else | |
726a989a RB |
342 | return expand_vector_piecewise (gsi, do_binop, type, compute_type, |
343 | gimple_assign_rhs1 (assign), | |
344 | gimple_assign_rhs2 (assign), code); | |
2b725155 RH |
345 | } |
346 | \f | |
347 | /* Return a type for the widest vector mode whose components are of mode | |
325217ed CF |
348 | INNER_MODE, or NULL_TREE if none is found. |
349 | SATP is true for saturating fixed-point types. */ | |
350 | ||
2b725155 | 351 | static tree |
325217ed | 352 | type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp) |
2b725155 RH |
353 | { |
354 | enum machine_mode best_mode = VOIDmode, mode; | |
355 | int best_nunits = 0; | |
356 | ||
3d8bf70f | 357 | if (SCALAR_FLOAT_MODE_P (inner_mode)) |
2b725155 | 358 | mode = MIN_MODE_VECTOR_FLOAT; |
325217ed CF |
359 | else if (SCALAR_FRACT_MODE_P (inner_mode)) |
360 | mode = MIN_MODE_VECTOR_FRACT; | |
361 | else if (SCALAR_UFRACT_MODE_P (inner_mode)) | |
362 | mode = MIN_MODE_VECTOR_UFRACT; | |
363 | else if (SCALAR_ACCUM_MODE_P (inner_mode)) | |
364 | mode = MIN_MODE_VECTOR_ACCUM; | |
365 | else if (SCALAR_UACCUM_MODE_P (inner_mode)) | |
366 | mode = MIN_MODE_VECTOR_UACCUM; | |
2b725155 RH |
367 | else |
368 | mode = MIN_MODE_VECTOR_INT; | |
369 | ||
370 | for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) | |
371 | if (GET_MODE_INNER (mode) == inner_mode | |
372 | && GET_MODE_NUNITS (mode) > best_nunits | |
166cdb08 | 373 | && optab_handler (op, mode)->insn_code != CODE_FOR_nothing) |
2b725155 RH |
374 | best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); |
375 | ||
376 | if (best_mode == VOIDmode) | |
377 | return NULL_TREE; | |
378 | else | |
325217ed CF |
379 | { |
380 | /* For fixed-point modes, we need to pass satp as the 2nd parameter. */ | |
381 | if (ALL_FIXED_POINT_MODE_P (best_mode)) | |
382 | return lang_hooks.types.type_for_mode (best_mode, satp); | |
383 | ||
384 | return lang_hooks.types.type_for_mode (best_mode, 1); | |
385 | } | |
2b725155 RH |
386 | } |
387 | ||
388 | /* Process one statement. If we identify a vector operation, expand it. */ | |
389 | ||
390 | static void | |
726a989a | 391 | expand_vector_operations_1 (gimple_stmt_iterator *gsi) |
2b725155 | 392 | { |
726a989a RB |
393 | gimple stmt = gsi_stmt (*gsi); |
394 | tree lhs, rhs1, rhs2 = NULL, type, compute_type; | |
2b725155 RH |
395 | enum tree_code code; |
396 | enum machine_mode compute_mode; | |
397 | optab op; | |
726a989a RB |
398 | enum gimple_rhs_class rhs_class; |
399 | tree new_rhs; | |
2b725155 | 400 | |
726a989a RB |
401 | if (gimple_code (stmt) != GIMPLE_ASSIGN) |
402 | return; | |
2b725155 | 403 | |
726a989a RB |
404 | code = gimple_assign_rhs_code (stmt); |
405 | rhs_class = get_gimple_rhs_class (code); | |
2b725155 | 406 | |
726a989a RB |
407 | if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) |
408 | return; | |
2b725155 | 409 | |
726a989a RB |
410 | lhs = gimple_assign_lhs (stmt); |
411 | rhs1 = gimple_assign_rhs1 (stmt); | |
412 | type = gimple_expr_type (stmt); | |
413 | if (rhs_class == GIMPLE_BINARY_RHS) | |
414 | rhs2 = gimple_assign_rhs2 (stmt); | |
2b725155 | 415 | |
2b725155 RH |
416 | if (TREE_CODE (type) != VECTOR_TYPE) |
417 | return; | |
418 | ||
f57d17f1 TM |
419 | if (code == NOP_EXPR |
420 | || code == FLOAT_EXPR | |
421 | || code == FIX_TRUNC_EXPR | |
422 | || code == VIEW_CONVERT_EXPR) | |
2b725155 RH |
423 | return; |
424 | ||
425 | gcc_assert (code != CONVERT_EXPR); | |
9f106823 UB |
426 | |
427 | /* The signedness is determined from input argument. */ | |
428 | if (code == VEC_UNPACK_FLOAT_HI_EXPR | |
429 | || code == VEC_UNPACK_FLOAT_LO_EXPR) | |
726a989a | 430 | type = TREE_TYPE (rhs1); |
9f106823 | 431 | |
71d46ca5 MM |
432 | /* Choose between vector shift/rotate by vector and vector shift/rotate by |
433 | scalar */ | |
726a989a RB |
434 | if (code == LSHIFT_EXPR |
435 | || code == RSHIFT_EXPR | |
436 | || code == LROTATE_EXPR | |
71d46ca5 MM |
437 | || code == RROTATE_EXPR) |
438 | { | |
439 | /* If the 2nd argument is vector, we need a vector/vector shift */ | |
726a989a | 440 | if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2)))) |
71d46ca5 | 441 | op = optab_for_tree_code (code, type, optab_vector); |
71d46ca5 MM |
442 | else |
443 | { | |
444 | /* Try for a vector/scalar shift, and if we don't have one, see if we | |
445 | have a vector/vector shift */ | |
446 | op = optab_for_tree_code (code, type, optab_scalar); | |
447 | if (!op | |
448 | || (op->handlers[(int) TYPE_MODE (type)].insn_code | |
449 | == CODE_FOR_nothing)) | |
450 | op = optab_for_tree_code (code, type, optab_vector); | |
451 | } | |
452 | } | |
453 | else | |
454 | op = optab_for_tree_code (code, type, optab_default); | |
2b725155 | 455 | |
9fc4da9d | 456 | /* For widening/narrowing vector operations, the relevant type is of the |
9f106823 UB |
457 | arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is |
458 | calculated in the same way above. */ | |
89d67cca DN |
459 | if (code == WIDEN_SUM_EXPR |
460 | || code == VEC_WIDEN_MULT_HI_EXPR | |
461 | || code == VEC_WIDEN_MULT_LO_EXPR | |
462 | || code == VEC_UNPACK_HI_EXPR | |
463 | || code == VEC_UNPACK_LO_EXPR | |
8115817b | 464 | || code == VEC_PACK_TRUNC_EXPR |
d9987fb4 UB |
465 | || code == VEC_PACK_SAT_EXPR |
466 | || code == VEC_PACK_FIX_TRUNC_EXPR) | |
726a989a | 467 | type = TREE_TYPE (rhs1); |
20f06221 | 468 | |
2b725155 RH |
469 | /* Optabs will try converting a negation into a subtraction, so |
470 | look for it as well. TODO: negation of floating-point vectors | |
471 | might be turned into an exclusive OR toggling the sign bit. */ | |
472 | if (op == NULL | |
473 | && code == NEGATE_EXPR | |
474 | && INTEGRAL_TYPE_P (TREE_TYPE (type))) | |
71d46ca5 | 475 | op = optab_for_tree_code (MINUS_EXPR, type, optab_default); |
2b725155 RH |
476 | |
477 | /* For very wide vectors, try using a smaller vector mode. */ | |
478 | compute_type = type; | |
479 | if (TYPE_MODE (type) == BLKmode && op) | |
480 | { | |
481 | tree vector_compute_type | |
325217ed CF |
482 | = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op, |
483 | TYPE_SATURATING (TREE_TYPE (type))); | |
2b725155 RH |
484 | if (vector_compute_type != NULL_TREE) |
485 | compute_type = vector_compute_type; | |
486 | } | |
487 | ||
488 | /* If we are breaking a BLKmode vector into smaller pieces, | |
489 | type_for_widest_vector_mode has already looked into the optab, | |
490 | so skip these checks. */ | |
491 | if (compute_type == type) | |
492 | { | |
493 | compute_mode = TYPE_MODE (compute_type); | |
494 | if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT | |
325217ed CF |
495 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT |
496 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FRACT | |
497 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UFRACT | |
498 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_ACCUM | |
499 | || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UACCUM) | |
2b725155 | 500 | && op != NULL |
166cdb08 | 501 | && optab_handler (op, compute_mode)->insn_code != CODE_FOR_nothing) |
2b725155 RH |
502 | return; |
503 | else | |
504 | /* There is no operation in hardware, so fall back to scalars. */ | |
505 | compute_type = TREE_TYPE (type); | |
506 | } | |
507 | ||
a6b46ba2 | 508 | gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR); |
726a989a RB |
509 | new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); |
510 | if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) | |
511 | new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | |
512 | new_rhs); | |
513 | ||
514 | /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | |
515 | way to do it is change expand_vector_operation and its callees to | |
516 | return a tree_code, RHS1 and RHS2 instead of a tree. */ | |
517 | gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
2b725155 | 518 | |
726a989a | 519 | gimple_set_modified (gsi_stmt (*gsi), true); |
2b725155 RH |
520 | } |
521 | \f | |
522 | /* Use this to lower vector operations introduced by the vectorizer, | |
523 | if it may need the bit-twiddling tricks implemented in this file. */ | |
524 | ||
525 | static bool | |
526 | gate_expand_vector_operations (void) | |
527 | { | |
528 | return flag_tree_vectorize != 0; | |
529 | } | |
530 | ||
c2924966 | 531 | static unsigned int |
2b725155 RH |
532 | expand_vector_operations (void) |
533 | { | |
726a989a | 534 | gimple_stmt_iterator gsi; |
2b725155 RH |
535 | basic_block bb; |
536 | ||
537 | FOR_EACH_BB (bb) | |
538 | { | |
726a989a | 539 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
2b725155 | 540 | { |
726a989a RB |
541 | expand_vector_operations_1 (&gsi); |
542 | update_stmt_if_modified (gsi_stmt (gsi)); | |
2b725155 RH |
543 | } |
544 | } | |
c2924966 | 545 | return 0; |
2b725155 RH |
546 | } |
547 | ||
8ddbbcae | 548 | struct gimple_opt_pass pass_lower_vector = |
2b725155 | 549 | { |
8ddbbcae JH |
550 | { |
551 | GIMPLE_PASS, | |
2b725155 RH |
552 | "veclower", /* name */ |
553 | 0, /* gate */ | |
554 | expand_vector_operations, /* execute */ | |
555 | NULL, /* sub */ | |
556 | NULL, /* next */ | |
557 | 0, /* static_pass_number */ | |
558 | 0, /* tv_id */ | |
559 | PROP_cfg, /* properties_required */ | |
560 | 0, /* properties_provided */ | |
561 | 0, /* properties_destroyed */ | |
562 | 0, /* todo_flags_start */ | |
563 | TODO_dump_func | TODO_ggc_collect | |
8ddbbcae JH |
564 | | TODO_verify_stmts /* todo_flags_finish */ |
565 | } | |
2b725155 RH |
566 | }; |
567 | ||
8ddbbcae | 568 | struct gimple_opt_pass pass_lower_vector_ssa = |
2b725155 | 569 | { |
8ddbbcae JH |
570 | { |
571 | GIMPLE_PASS, | |
2b725155 RH |
572 | "veclower2", /* name */ |
573 | gate_expand_vector_operations, /* gate */ | |
574 | expand_vector_operations, /* execute */ | |
575 | NULL, /* sub */ | |
576 | NULL, /* next */ | |
577 | 0, /* static_pass_number */ | |
578 | 0, /* tv_id */ | |
579 | PROP_cfg, /* properties_required */ | |
580 | 0, /* properties_provided */ | |
581 | 0, /* properties_destroyed */ | |
582 | 0, /* todo_flags_start */ | |
583 | TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ | |
584 | | TODO_verify_ssa | |
8ddbbcae JH |
585 | | TODO_verify_stmts | TODO_verify_flow |
586 | } | |
2b725155 RH |
587 | }; |
588 | ||
589 | #include "gt-tree-vect-generic.h" |