]>
Commit | Line | Data |
---|---|---|
29e6733c | 1 | ;; VSX patterns. |
23a5b65a | 2 | ;; Copyright (C) 2009-2014 Free Software Foundation, Inc. |
29e6733c MM |
3 | ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> |
4 | ||
5 | ;; This file is part of GCC. | |
6 | ||
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published | |
9 | ;; by the Free Software Foundation; either version 3, or (at your | |
10 | ;; option) any later version. | |
11 | ||
12 | ;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | ;; License for more details. | |
16 | ||
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
21 | ;; Iterator for both scalar and vector floating point types supported by VSX | |
22 | (define_mode_iterator VSX_B [DF V4SF V2DF]) | |
23 | ||
24 | ;; Iterator for the 2 64-bit vector types | |
25 | (define_mode_iterator VSX_D [V2DF V2DI]) | |
26 | ||
27 | ;; Iterator for the 2 32-bit vector types | |
28 | (define_mode_iterator VSX_W [V4SF V4SI]) | |
29 | ||
688e4919 MM |
30 | ;; Iterator for the DF types |
31 | (define_mode_iterator VSX_DF [V2DF DF]) | |
32 | ||
29e6733c MM |
33 | ;; Iterator for vector floating point types supported by VSX |
34 | (define_mode_iterator VSX_F [V4SF V2DF]) | |
35 | ||
36 | ;; Iterator for logical types supported by VSX | |
a16a872d | 37 | (define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI]) |
29e6733c MM |
38 | |
39 | ;; Iterator for memory move. Handle TImode specially to allow | |
40 | ;; it to use gprs as well as vsx registers. | |
a16a872d | 41 | (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI]) |
29e6733c | 42 | |
d86e633a MM |
43 | (define_mode_iterator VSX_M2 [V16QI |
44 | V8HI | |
45 | V4SI | |
46 | V2DI | |
47 | V4SF | |
48 | V2DF | |
a16a872d | 49 | V1TI |
d86e633a MM |
50 | (TI "TARGET_VSX_TIMODE")]) |
51 | ||
29e6733c MM |
52 | ;; Map into the appropriate load/store name based on the type |
53 | (define_mode_attr VSm [(V16QI "vw4") | |
54 | (V8HI "vw4") | |
55 | (V4SI "vw4") | |
56 | (V4SF "vw4") | |
57 | (V2DF "vd2") | |
58 | (V2DI "vd2") | |
59 | (DF "d") | |
a16a872d | 60 | (V1TI "vd2") |
c6d5ff83 | 61 | (TI "vd2")]) |
29e6733c MM |
62 | |
63 | ;; Map into the appropriate suffix based on the type | |
64 | (define_mode_attr VSs [(V16QI "sp") | |
65 | (V8HI "sp") | |
66 | (V4SI "sp") | |
67 | (V4SF "sp") | |
68 | (V2DF "dp") | |
69 | (V2DI "dp") | |
70 | (DF "dp") | |
71 | (SF "sp") | |
a16a872d | 72 | (V1TI "dp") |
c6d5ff83 | 73 | (TI "dp")]) |
29e6733c MM |
74 | |
75 | ;; Map the register class used | |
76 | (define_mode_attr VSr [(V16QI "v") | |
77 | (V8HI "v") | |
78 | (V4SI "v") | |
79 | (V4SF "wf") | |
80 | (V2DI "wd") | |
81 | (V2DF "wd") | |
82 | (DF "ws") | |
83 | (SF "d") | |
a16a872d | 84 | (V1TI "v") |
c6d5ff83 | 85 | (TI "wt")]) |
29e6733c MM |
86 | |
87 | ;; Map the register class used for float<->int conversions | |
88 | (define_mode_attr VSr2 [(V2DF "wd") | |
89 | (V4SF "wf") | |
7042fe5e | 90 | (DF "ws")]) |
29e6733c MM |
91 | |
92 | (define_mode_attr VSr3 [(V2DF "wa") | |
93 | (V4SF "wa") | |
7042fe5e | 94 | (DF "ws")]) |
29e6733c MM |
95 | |
96 | ;; Map the register class for sp<->dp float conversions, destination | |
97 | (define_mode_attr VSr4 [(SF "ws") | |
98 | (DF "f") | |
99 | (V2DF "wd") | |
100 | (V4SF "v")]) | |
101 | ||
102 | ;; Map the register class for sp<->dp float conversions, destination | |
103 | (define_mode_attr VSr5 [(SF "ws") | |
104 | (DF "f") | |
105 | (V2DF "v") | |
106 | (V4SF "wd")]) | |
107 | ||
108 | ;; Same size integer type for floating point data | |
109 | (define_mode_attr VSi [(V4SF "v4si") | |
110 | (V2DF "v2di") | |
111 | (DF "di")]) | |
112 | ||
113 | (define_mode_attr VSI [(V4SF "V4SI") | |
114 | (V2DF "V2DI") | |
115 | (DF "DI")]) | |
116 | ||
117 | ;; Word size for same size conversion | |
118 | (define_mode_attr VSc [(V4SF "w") | |
119 | (V2DF "d") | |
120 | (DF "d")]) | |
121 | ||
29e6733c MM |
122 | ;; Map into either s or v, depending on whether this is a scalar or vector |
123 | ;; operation | |
124 | (define_mode_attr VSv [(V16QI "v") | |
125 | (V8HI "v") | |
126 | (V4SI "v") | |
127 | (V4SF "v") | |
128 | (V2DI "v") | |
129 | (V2DF "v") | |
a16a872d | 130 | (V1TI "v") |
29e6733c MM |
131 | (DF "s")]) |
132 | ||
133 | ;; Appropriate type for add ops (and other simple FP ops) | |
4356b75d | 134 | (define_mode_attr VStype_simple [(V2DF "vecdouble") |
29e6733c MM |
135 | (V4SF "vecfloat") |
136 | (DF "fp")]) | |
137 | ||
138 | (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d") | |
139 | (V4SF "fp_addsub_s") | |
140 | (DF "fp_addsub_d")]) | |
141 | ||
142 | ;; Appropriate type for multiply ops | |
4356b75d | 143 | (define_mode_attr VStype_mul [(V2DF "vecdouble") |
29e6733c MM |
144 | (V4SF "vecfloat") |
145 | (DF "dmul")]) | |
146 | ||
147 | (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d") | |
148 | (V4SF "fp_mul_s") | |
149 | (DF "fp_mul_d")]) | |
150 | ||
4356b75d PH |
151 | ;; Appropriate type for divide ops. |
152 | (define_mode_attr VStype_div [(V2DF "vecdiv") | |
153 | (V4SF "vecfdiv") | |
29e6733c MM |
154 | (DF "ddiv")]) |
155 | ||
156 | (define_mode_attr VSfptype_div [(V2DF "fp_div_d") | |
157 | (V4SF "fp_div_s") | |
158 | (DF "fp_div_d")]) | |
159 | ||
160 | ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with | |
161 | ;; the scalar sqrt | |
162 | (define_mode_attr VStype_sqrt [(V2DF "dsqrt") | |
4356b75d PH |
163 | (V4SF "ssqrt") |
164 | (DF "dsqrt")]) | |
29e6733c MM |
165 | |
166 | (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d") | |
167 | (V4SF "fp_sqrt_s") | |
168 | (DF "fp_sqrt_d")]) | |
169 | ||
170 | ;; Iterator and modes for sp<->dp conversions | |
171 | ;; Because scalar SF values are represented internally as double, use the | |
172 | ;; V4SF type to represent this than SF. | |
173 | (define_mode_iterator VSX_SPDP [DF V4SF V2DF]) | |
174 | ||
175 | (define_mode_attr VS_spdp_res [(DF "V4SF") | |
176 | (V4SF "V2DF") | |
177 | (V2DF "V4SF")]) | |
178 | ||
179 | (define_mode_attr VS_spdp_insn [(DF "xscvdpsp") | |
180 | (V4SF "xvcvspdp") | |
181 | (V2DF "xvcvdpsp")]) | |
182 | ||
183 | (define_mode_attr VS_spdp_type [(DF "fp") | |
4356b75d PH |
184 | (V4SF "vecdouble") |
185 | (V2DF "vecdouble")]) | |
29e6733c MM |
186 | |
187 | ;; Map the scalar mode for a vector type | |
a16a872d MM |
188 | (define_mode_attr VS_scalar [(V1TI "TI") |
189 | (V2DF "DF") | |
29e6733c MM |
190 | (V2DI "DI") |
191 | (V4SF "SF") | |
192 | (V4SI "SI") | |
193 | (V8HI "HI") | |
194 | (V16QI "QI")]) | |
5aebfdad RH |
195 | |
196 | ;; Map to a double-sized vector mode | |
197 | (define_mode_attr VS_double [(V4SI "V8SI") | |
198 | (V4SF "V8SF") | |
199 | (V2DI "V4DI") | |
a16a872d MM |
200 | (V2DF "V4DF") |
201 | (V1TI "V2TI")]) | |
5aebfdad | 202 | |
29e6733c | 203 | ;; Constants for creating unspecs |
f3c33d9d MM |
204 | (define_c_enum "unspec" |
205 | [UNSPEC_VSX_CONCAT | |
206 | UNSPEC_VSX_CVDPSXWS | |
207 | UNSPEC_VSX_CVDPUXWS | |
208 | UNSPEC_VSX_CVSPDP | |
0bd62dca MM |
209 | UNSPEC_VSX_CVSPDPN |
210 | UNSPEC_VSX_CVDPSPN | |
f3c33d9d MM |
211 | UNSPEC_VSX_CVSXWDP |
212 | UNSPEC_VSX_CVUXWDP | |
213 | UNSPEC_VSX_CVSXDSP | |
214 | UNSPEC_VSX_CVUXDSP | |
215 | UNSPEC_VSX_CVSPSXDS | |
216 | UNSPEC_VSX_CVSPUXDS | |
217 | UNSPEC_VSX_TDIV | |
218 | UNSPEC_VSX_TSQRT | |
f3c33d9d MM |
219 | UNSPEC_VSX_SET |
220 | UNSPEC_VSX_ROUND_I | |
221 | UNSPEC_VSX_ROUND_IC | |
222 | UNSPEC_VSX_SLDWI | |
bf53d4b8 | 223 | UNSPEC_VSX_XXSPLTW |
f3c33d9d | 224 | ]) |
29e6733c MM |
225 | |
226 | ;; VSX moves | |
0cf68694 BS |
227 | |
228 | ;; The patterns for LE permuted loads and stores come before the general | |
229 | ;; VSX moves so they match first. | |
6e8b7d9c BS |
230 | (define_insn_and_split "*vsx_le_perm_load_<mode>" |
231 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
232 | (match_operand:VSX_D 1 "memory_operand" "Z"))] | |
0cf68694 BS |
233 | "!BYTES_BIG_ENDIAN && TARGET_VSX" |
234 | "#" | |
235 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
236 | [(set (match_dup 2) | |
6e8b7d9c | 237 | (vec_select:<MODE> |
0cf68694 BS |
238 | (match_dup 1) |
239 | (parallel [(const_int 1) (const_int 0)]))) | |
240 | (set (match_dup 0) | |
6e8b7d9c | 241 | (vec_select:<MODE> |
0cf68694 BS |
242 | (match_dup 2) |
243 | (parallel [(const_int 1) (const_int 0)])))] | |
244 | " | |
245 | { | |
246 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) | |
247 | : operands[0]; | |
248 | } | |
249 | " | |
250 | [(set_attr "type" "vecload") | |
251 | (set_attr "length" "8")]) | |
252 | ||
6e8b7d9c BS |
253 | (define_insn_and_split "*vsx_le_perm_load_<mode>" |
254 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") | |
255 | (match_operand:VSX_W 1 "memory_operand" "Z"))] | |
0cf68694 BS |
256 | "!BYTES_BIG_ENDIAN && TARGET_VSX" |
257 | "#" | |
258 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
259 | [(set (match_dup 2) | |
6e8b7d9c | 260 | (vec_select:<MODE> |
0cf68694 BS |
261 | (match_dup 1) |
262 | (parallel [(const_int 2) (const_int 3) | |
263 | (const_int 0) (const_int 1)]))) | |
264 | (set (match_dup 0) | |
6e8b7d9c | 265 | (vec_select:<MODE> |
0cf68694 BS |
266 | (match_dup 2) |
267 | (parallel [(const_int 2) (const_int 3) | |
268 | (const_int 0) (const_int 1)])))] | |
269 | " | |
270 | { | |
271 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) | |
272 | : operands[0]; | |
273 | } | |
274 | " | |
275 | [(set_attr "type" "vecload") | |
276 | (set_attr "length" "8")]) | |
277 | ||
278 | (define_insn_and_split "*vsx_le_perm_load_v8hi" | |
279 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
280 | (match_operand:V8HI 1 "memory_operand" "Z"))] | |
281 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
282 | "#" | |
283 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
284 | [(set (match_dup 2) | |
285 | (vec_select:V8HI | |
286 | (match_dup 1) | |
287 | (parallel [(const_int 4) (const_int 5) | |
288 | (const_int 6) (const_int 7) | |
289 | (const_int 0) (const_int 1) | |
290 | (const_int 2) (const_int 3)]))) | |
291 | (set (match_dup 0) | |
292 | (vec_select:V8HI | |
293 | (match_dup 2) | |
294 | (parallel [(const_int 4) (const_int 5) | |
295 | (const_int 6) (const_int 7) | |
296 | (const_int 0) (const_int 1) | |
297 | (const_int 2) (const_int 3)])))] | |
298 | " | |
299 | { | |
300 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) | |
301 | : operands[0]; | |
302 | } | |
303 | " | |
304 | [(set_attr "type" "vecload") | |
305 | (set_attr "length" "8")]) | |
306 | ||
307 | (define_insn_and_split "*vsx_le_perm_load_v16qi" | |
308 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
309 | (match_operand:V16QI 1 "memory_operand" "Z"))] | |
310 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
311 | "#" | |
312 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
313 | [(set (match_dup 2) | |
314 | (vec_select:V16QI | |
315 | (match_dup 1) | |
316 | (parallel [(const_int 8) (const_int 9) | |
317 | (const_int 10) (const_int 11) | |
318 | (const_int 12) (const_int 13) | |
319 | (const_int 14) (const_int 15) | |
320 | (const_int 0) (const_int 1) | |
321 | (const_int 2) (const_int 3) | |
322 | (const_int 4) (const_int 5) | |
323 | (const_int 6) (const_int 7)]))) | |
324 | (set (match_dup 0) | |
325 | (vec_select:V16QI | |
326 | (match_dup 2) | |
327 | (parallel [(const_int 8) (const_int 9) | |
328 | (const_int 10) (const_int 11) | |
329 | (const_int 12) (const_int 13) | |
330 | (const_int 14) (const_int 15) | |
331 | (const_int 0) (const_int 1) | |
332 | (const_int 2) (const_int 3) | |
333 | (const_int 4) (const_int 5) | |
334 | (const_int 6) (const_int 7)])))] | |
335 | " | |
336 | { | |
337 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) | |
338 | : operands[0]; | |
339 | } | |
340 | " | |
341 | [(set_attr "type" "vecload") | |
342 | (set_attr "length" "8")]) | |
343 | ||
411f1755 | 344 | (define_insn "*vsx_le_perm_store_<mode>" |
6e8b7d9c BS |
345 | [(set (match_operand:VSX_D 0 "memory_operand" "=Z") |
346 | (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] | |
0cf68694 BS |
347 | "!BYTES_BIG_ENDIAN && TARGET_VSX" |
348 | "#" | |
411f1755 BS |
349 | [(set_attr "type" "vecstore") |
350 | (set_attr "length" "12")]) | |
351 | ||
352 | (define_split | |
353 | [(set (match_operand:VSX_D 0 "memory_operand" "") | |
354 | (match_operand:VSX_D 1 "vsx_register_operand" ""))] | |
355 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" | |
0cf68694 | 356 | [(set (match_dup 2) |
6e8b7d9c | 357 | (vec_select:<MODE> |
0cf68694 BS |
358 | (match_dup 1) |
359 | (parallel [(const_int 1) (const_int 0)]))) | |
360 | (set (match_dup 0) | |
6e8b7d9c | 361 | (vec_select:<MODE> |
0cf68694 BS |
362 | (match_dup 2) |
363 | (parallel [(const_int 1) (const_int 0)])))] | |
0cf68694 BS |
364 | { |
365 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) | |
366 | : operands[1]; | |
411f1755 BS |
367 | }) |
368 | ||
369 | ;; The post-reload split requires that we re-permute the source | |
370 | ;; register in case it is still live. | |
371 | (define_split | |
372 | [(set (match_operand:VSX_D 0 "memory_operand" "") | |
373 | (match_operand:VSX_D 1 "vsx_register_operand" ""))] | |
374 | "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" | |
375 | [(set (match_dup 1) | |
376 | (vec_select:<MODE> | |
377 | (match_dup 1) | |
378 | (parallel [(const_int 1) (const_int 0)]))) | |
379 | (set (match_dup 0) | |
380 | (vec_select:<MODE> | |
381 | (match_dup 1) | |
382 | (parallel [(const_int 1) (const_int 0)]))) | |
383 | (set (match_dup 1) | |
384 | (vec_select:<MODE> | |
385 | (match_dup 1) | |
386 | (parallel [(const_int 1) (const_int 0)])))] | |
387 | "") | |
0cf68694 | 388 | |
411f1755 | 389 | (define_insn "*vsx_le_perm_store_<mode>" |
6e8b7d9c BS |
390 | [(set (match_operand:VSX_W 0 "memory_operand" "=Z") |
391 | (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))] | |
0cf68694 BS |
392 | "!BYTES_BIG_ENDIAN && TARGET_VSX" |
393 | "#" | |
411f1755 BS |
394 | [(set_attr "type" "vecstore") |
395 | (set_attr "length" "12")]) | |
396 | ||
397 | (define_split | |
398 | [(set (match_operand:VSX_W 0 "memory_operand" "") | |
399 | (match_operand:VSX_W 1 "vsx_register_operand" ""))] | |
400 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" | |
0cf68694 | 401 | [(set (match_dup 2) |
6e8b7d9c | 402 | (vec_select:<MODE> |
0cf68694 BS |
403 | (match_dup 1) |
404 | (parallel [(const_int 2) (const_int 3) | |
405 | (const_int 0) (const_int 1)]))) | |
406 | (set (match_dup 0) | |
6e8b7d9c | 407 | (vec_select:<MODE> |
0cf68694 BS |
408 | (match_dup 2) |
409 | (parallel [(const_int 2) (const_int 3) | |
410 | (const_int 0) (const_int 1)])))] | |
0cf68694 BS |
411 | { |
412 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) | |
413 | : operands[1]; | |
411f1755 BS |
414 | }) |
415 | ||
416 | ;; The post-reload split requires that we re-permute the source | |
417 | ;; register in case it is still live. | |
418 | (define_split | |
419 | [(set (match_operand:VSX_W 0 "memory_operand" "") | |
420 | (match_operand:VSX_W 1 "vsx_register_operand" ""))] | |
421 | "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" | |
422 | [(set (match_dup 1) | |
423 | (vec_select:<MODE> | |
424 | (match_dup 1) | |
425 | (parallel [(const_int 2) (const_int 3) | |
426 | (const_int 0) (const_int 1)]))) | |
427 | (set (match_dup 0) | |
428 | (vec_select:<MODE> | |
429 | (match_dup 1) | |
430 | (parallel [(const_int 2) (const_int 3) | |
431 | (const_int 0) (const_int 1)]))) | |
432 | (set (match_dup 1) | |
433 | (vec_select:<MODE> | |
434 | (match_dup 1) | |
435 | (parallel [(const_int 2) (const_int 3) | |
436 | (const_int 0) (const_int 1)])))] | |
437 | "") | |
0cf68694 | 438 | |
411f1755 | 439 | (define_insn "*vsx_le_perm_store_v8hi" |
0cf68694 BS |
440 | [(set (match_operand:V8HI 0 "memory_operand" "=Z") |
441 | (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] | |
442 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
443 | "#" | |
411f1755 BS |
444 | [(set_attr "type" "vecstore") |
445 | (set_attr "length" "12")]) | |
446 | ||
447 | (define_split | |
448 | [(set (match_operand:V8HI 0 "memory_operand" "") | |
449 | (match_operand:V8HI 1 "vsx_register_operand" ""))] | |
450 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" | |
0cf68694 BS |
451 | [(set (match_dup 2) |
452 | (vec_select:V8HI | |
453 | (match_dup 1) | |
454 | (parallel [(const_int 4) (const_int 5) | |
455 | (const_int 6) (const_int 7) | |
456 | (const_int 0) (const_int 1) | |
457 | (const_int 2) (const_int 3)]))) | |
458 | (set (match_dup 0) | |
459 | (vec_select:V8HI | |
460 | (match_dup 2) | |
461 | (parallel [(const_int 4) (const_int 5) | |
462 | (const_int 6) (const_int 7) | |
463 | (const_int 0) (const_int 1) | |
464 | (const_int 2) (const_int 3)])))] | |
0cf68694 BS |
465 | { |
466 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) | |
467 | : operands[1]; | |
411f1755 BS |
468 | }) |
469 | ||
470 | ;; The post-reload split requires that we re-permute the source | |
471 | ;; register in case it is still live. | |
472 | (define_split | |
473 | [(set (match_operand:V8HI 0 "memory_operand" "") | |
474 | (match_operand:V8HI 1 "vsx_register_operand" ""))] | |
475 | "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" | |
476 | [(set (match_dup 1) | |
477 | (vec_select:V8HI | |
478 | (match_dup 1) | |
479 | (parallel [(const_int 4) (const_int 5) | |
480 | (const_int 6) (const_int 7) | |
481 | (const_int 0) (const_int 1) | |
482 | (const_int 2) (const_int 3)]))) | |
483 | (set (match_dup 0) | |
484 | (vec_select:V8HI | |
485 | (match_dup 1) | |
486 | (parallel [(const_int 4) (const_int 5) | |
487 | (const_int 6) (const_int 7) | |
488 | (const_int 0) (const_int 1) | |
489 | (const_int 2) (const_int 3)]))) | |
490 | (set (match_dup 1) | |
491 | (vec_select:V8HI | |
492 | (match_dup 1) | |
493 | (parallel [(const_int 4) (const_int 5) | |
494 | (const_int 6) (const_int 7) | |
495 | (const_int 0) (const_int 1) | |
496 | (const_int 2) (const_int 3)])))] | |
497 | "") | |
0cf68694 | 498 | |
411f1755 | 499 | (define_insn "*vsx_le_perm_store_v16qi" |
0cf68694 BS |
500 | [(set (match_operand:V16QI 0 "memory_operand" "=Z") |
501 | (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] | |
502 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
503 | "#" | |
411f1755 BS |
504 | [(set_attr "type" "vecstore") |
505 | (set_attr "length" "12")]) | |
506 | ||
507 | (define_split | |
508 | [(set (match_operand:V16QI 0 "memory_operand" "") | |
509 | (match_operand:V16QI 1 "vsx_register_operand" ""))] | |
510 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed" | |
0cf68694 BS |
511 | [(set (match_dup 2) |
512 | (vec_select:V16QI | |
513 | (match_dup 1) | |
514 | (parallel [(const_int 8) (const_int 9) | |
515 | (const_int 10) (const_int 11) | |
516 | (const_int 12) (const_int 13) | |
517 | (const_int 14) (const_int 15) | |
518 | (const_int 0) (const_int 1) | |
519 | (const_int 2) (const_int 3) | |
520 | (const_int 4) (const_int 5) | |
521 | (const_int 6) (const_int 7)]))) | |
522 | (set (match_dup 0) | |
523 | (vec_select:V16QI | |
524 | (match_dup 2) | |
525 | (parallel [(const_int 8) (const_int 9) | |
526 | (const_int 10) (const_int 11) | |
527 | (const_int 12) (const_int 13) | |
528 | (const_int 14) (const_int 15) | |
529 | (const_int 0) (const_int 1) | |
530 | (const_int 2) (const_int 3) | |
531 | (const_int 4) (const_int 5) | |
532 | (const_int 6) (const_int 7)])))] | |
0cf68694 BS |
533 | { |
534 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) | |
535 | : operands[1]; | |
411f1755 BS |
536 | }) |
537 | ||
538 | ;; The post-reload split requires that we re-permute the source | |
539 | ;; register in case it is still live. | |
540 | (define_split | |
541 | [(set (match_operand:V16QI 0 "memory_operand" "") | |
542 | (match_operand:V16QI 1 "vsx_register_operand" ""))] | |
543 | "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed" | |
544 | [(set (match_dup 1) | |
545 | (vec_select:V16QI | |
546 | (match_dup 1) | |
547 | (parallel [(const_int 8) (const_int 9) | |
548 | (const_int 10) (const_int 11) | |
549 | (const_int 12) (const_int 13) | |
550 | (const_int 14) (const_int 15) | |
551 | (const_int 0) (const_int 1) | |
552 | (const_int 2) (const_int 3) | |
553 | (const_int 4) (const_int 5) | |
554 | (const_int 6) (const_int 7)]))) | |
555 | (set (match_dup 0) | |
556 | (vec_select:V16QI | |
557 | (match_dup 1) | |
558 | (parallel [(const_int 8) (const_int 9) | |
559 | (const_int 10) (const_int 11) | |
560 | (const_int 12) (const_int 13) | |
561 | (const_int 14) (const_int 15) | |
562 | (const_int 0) (const_int 1) | |
563 | (const_int 2) (const_int 3) | |
564 | (const_int 4) (const_int 5) | |
565 | (const_int 6) (const_int 7)]))) | |
566 | (set (match_dup 1) | |
567 | (vec_select:V16QI | |
568 | (match_dup 1) | |
569 | (parallel [(const_int 8) (const_int 9) | |
570 | (const_int 10) (const_int 11) | |
571 | (const_int 12) (const_int 13) | |
572 | (const_int 14) (const_int 15) | |
573 | (const_int 0) (const_int 1) | |
574 | (const_int 2) (const_int 3) | |
575 | (const_int 4) (const_int 5) | |
576 | (const_int 6) (const_int 7)])))] | |
577 | "") | |
0cf68694 BS |
578 | |
579 | ||
29e6733c | 580 | (define_insn "*vsx_mov<mode>" |
19be72ab MM |
581 | [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v") |
582 | (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))] | |
29e6733c MM |
583 | "VECTOR_MEM_VSX_P (<MODE>mode) |
584 | && (register_operand (operands[0], <MODE>mode) | |
585 | || register_operand (operands[1], <MODE>mode))" | |
586 | { | |
19be72ab | 587 | return rs6000_output_move_128bit (operands); |
29e6733c | 588 | } |
19be72ab MM |
589 | [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload") |
590 | (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")]) | |
29e6733c | 591 | |
c6d5ff83 MM |
592 | ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal |
593 | ;; use of TImode is for unions. However for plain data movement, slightly | |
594 | ;; favor the vector loads | |
595 | (define_insn "*vsx_movti_64bit" | |
19be72ab MM |
596 | [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r") |
597 | (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))] | |
c6d5ff83 | 598 | "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) |
29e6733c MM |
599 | && (register_operand (operands[0], TImode) |
600 | || register_operand (operands[1], TImode))" | |
601 | { | |
19be72ab | 602 | return rs6000_output_move_128bit (operands); |
c6d5ff83 | 603 | } |
19be72ab MM |
604 | [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*") |
605 | (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")]) | |
c6d5ff83 MM |
606 | |
607 | (define_insn "*vsx_movti_32bit" | |
608 | [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r") | |
18a6701e | 609 | (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v,r,r, Q, Y, r,n"))] |
c6d5ff83 MM |
610 | "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) |
611 | && (register_operand (operands[0], TImode) | |
612 | || register_operand (operands[1], TImode))" | |
613 | { | |
614 | switch (which_alternative) | |
615 | { | |
616 | case 0: | |
617 | return "stxvd2x %x1,%y0"; | |
618 | ||
619 | case 1: | |
620 | return "lxvd2x %x0,%y1"; | |
621 | ||
622 | case 2: | |
623 | return "xxlor %x0,%x1,%x1"; | |
624 | ||
625 | case 3: | |
626 | return "xxlxor %x0,%x0,%x0"; | |
627 | ||
628 | case 4: | |
29e6733c MM |
629 | return output_vec_const_move (operands); |
630 | ||
c6d5ff83 | 631 | case 5: |
29e6733c MM |
632 | return "stvx %1,%y0"; |
633 | ||
c6d5ff83 | 634 | case 6: |
29e6733c MM |
635 | return "lvx %0,%y1"; |
636 | ||
c6d5ff83 MM |
637 | case 7: |
638 | if (TARGET_STRING) | |
639 | return \"stswi %1,%P0,16\"; | |
640 | ||
641 | case 8: | |
642 | return \"#\"; | |
643 | ||
644 | case 9: | |
645 | /* If the address is not used in the output, we can use lsi. Otherwise, | |
646 | fall through to generating four loads. */ | |
647 | if (TARGET_STRING | |
648 | && ! reg_overlap_mentioned_p (operands[0], operands[1])) | |
649 | return \"lswi %0,%P1,16\"; | |
650 | /* ... fall through ... */ | |
651 | ||
652 | case 10: | |
653 | case 11: | |
654 | case 12: | |
655 | return \"#\"; | |
29e6733c MM |
656 | default: |
657 | gcc_unreachable (); | |
658 | } | |
659 | } | |
c6d5ff83 MM |
660 | [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store_ux,store_ux,load_ux,load_ux, *, *") |
661 | (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16") | |
662 | (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING") | |
663 | (const_string "always") | |
664 | (const_string "conditional")))]) | |
29e6733c | 665 | |
c9485473 MM |
666 | ;; Explicit load/store expanders for the builtin functions |
667 | (define_expand "vsx_load_<mode>" | |
668 | [(set (match_operand:VSX_M 0 "vsx_register_operand" "") | |
669 | (match_operand:VSX_M 1 "memory_operand" ""))] | |
670 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
671 | "") | |
672 | ||
673 | (define_expand "vsx_store_<mode>" | |
c6d5ff83 MM |
674 | [(set (match_operand:VSX_M 0 "memory_operand" "") |
675 | (match_operand:VSX_M 1 "vsx_register_operand" ""))] | |
c9485473 MM |
676 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
677 | "") | |
678 | ||
29e6733c | 679 | \f |
0609bdf2 MM |
680 | ;; VSX vector floating point arithmetic instructions. The VSX scalar |
681 | ;; instructions are now combined with the insn for the traditional floating | |
682 | ;; point unit. | |
29e6733c | 683 | (define_insn "*vsx_add<mode>3" |
0609bdf2 MM |
684 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
685 | (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
686 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 687 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 688 | "xvadd<VSs> %x0,%x1,%x2" |
29e6733c MM |
689 | [(set_attr "type" "<VStype_simple>") |
690 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
691 | ||
692 | (define_insn "*vsx_sub<mode>3" | |
0609bdf2 MM |
693 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
694 | (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
695 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 696 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 697 | "xvsub<VSs> %x0,%x1,%x2" |
29e6733c MM |
698 | [(set_attr "type" "<VStype_simple>") |
699 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
700 | ||
701 | (define_insn "*vsx_mul<mode>3" | |
0609bdf2 MM |
702 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
703 | (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
704 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 705 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 MM |
706 | "xvmul<VSs> %x0,%x1,%x2" |
707 | [(set_attr "type" "<VStype_simple>") | |
29e6733c MM |
708 | (set_attr "fp_type" "<VSfptype_mul>")]) |
709 | ||
710 | (define_insn "*vsx_div<mode>3" | |
0609bdf2 MM |
711 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
712 | (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
713 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 714 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 715 | "xvdiv<VSs> %x0,%x1,%x2" |
29e6733c MM |
716 | [(set_attr "type" "<VStype_div>") |
717 | (set_attr "fp_type" "<VSfptype_div>")]) | |
718 | ||
719 | ;; *tdiv* instruction returning the FG flag | |
720 | (define_expand "vsx_tdiv<mode>3_fg" | |
721 | [(set (match_dup 3) | |
722 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "") | |
723 | (match_operand:VSX_B 2 "vsx_register_operand" "")] | |
724 | UNSPEC_VSX_TDIV)) | |
725 | (set (match_operand:SI 0 "gpc_reg_operand" "") | |
726 | (gt:SI (match_dup 3) | |
727 | (const_int 0)))] | |
728 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
729 | { | |
730 | operands[3] = gen_reg_rtx (CCFPmode); | |
731 | }) | |
732 | ||
733 | ;; *tdiv* instruction returning the FE flag | |
734 | (define_expand "vsx_tdiv<mode>3_fe" | |
735 | [(set (match_dup 3) | |
736 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "") | |
737 | (match_operand:VSX_B 2 "vsx_register_operand" "")] | |
738 | UNSPEC_VSX_TDIV)) | |
739 | (set (match_operand:SI 0 "gpc_reg_operand" "") | |
740 | (eq:SI (match_dup 3) | |
741 | (const_int 0)))] | |
742 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
743 | { | |
744 | operands[3] = gen_reg_rtx (CCFPmode); | |
745 | }) | |
746 | ||
747 | (define_insn "*vsx_tdiv<mode>3_internal" | |
748 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") | |
749 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa") | |
750 | (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,wa")] | |
751 | UNSPEC_VSX_TDIV))] | |
752 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
753 | "x<VSv>tdiv<VSs> %0,%x1,%x2" | |
754 | [(set_attr "type" "<VStype_simple>") | |
755 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
756 | ||
757 | (define_insn "vsx_fre<mode>2" | |
0609bdf2 MM |
758 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
759 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")] | |
29e6733c MM |
760 | UNSPEC_FRES))] |
761 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
0609bdf2 | 762 | "xvre<VSs> %x0,%x1" |
29e6733c MM |
763 | [(set_attr "type" "<VStype_simple>") |
764 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
765 | ||
766 | (define_insn "*vsx_neg<mode>2" | |
0609bdf2 MM |
767 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
768 | (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 769 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 770 | "xvneg<VSs> %x0,%x1" |
29e6733c MM |
771 | [(set_attr "type" "<VStype_simple>") |
772 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
773 | ||
774 | (define_insn "*vsx_abs<mode>2" | |
0609bdf2 MM |
775 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
776 | (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 777 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 778 | "xvabs<VSs> %x0,%x1" |
29e6733c MM |
779 | [(set_attr "type" "<VStype_simple>") |
780 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
781 | ||
782 | (define_insn "vsx_nabs<mode>2" | |
0609bdf2 MM |
783 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
784 | (neg:VSX_F | |
785 | (abs:VSX_F | |
786 | (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa"))))] | |
29e6733c | 787 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 788 | "xvnabs<VSs> %x0,%x1" |
29e6733c MM |
789 | [(set_attr "type" "<VStype_simple>") |
790 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
791 | ||
792 | (define_insn "vsx_smax<mode>3" | |
0609bdf2 MM |
793 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
794 | (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
795 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 796 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 797 | "xvmax<VSs> %x0,%x1,%x2" |
29e6733c MM |
798 | [(set_attr "type" "<VStype_simple>") |
799 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
800 | ||
801 | (define_insn "*vsx_smin<mode>3" | |
0609bdf2 MM |
802 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
803 | (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
804 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 805 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 806 | "xvmin<VSs> %x0,%x1,%x2" |
29e6733c MM |
807 | [(set_attr "type" "<VStype_simple>") |
808 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
809 | ||
810 | (define_insn "*vsx_sqrt<mode>2" | |
0609bdf2 MM |
811 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
812 | (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 813 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 814 | "xvsqrt<VSs> %x0,%x1" |
29e6733c MM |
815 | [(set_attr "type" "<VStype_sqrt>") |
816 | (set_attr "fp_type" "<VSfptype_sqrt>")]) | |
817 | ||
92902797 | 818 | (define_insn "*vsx_rsqrte<mode>2" |
0609bdf2 MM |
819 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
820 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")] | |
92902797 | 821 | UNSPEC_RSQRT))] |
29e6733c | 822 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 823 | "xvrsqrte<VSs> %x0,%x1" |
29e6733c MM |
824 | [(set_attr "type" "<VStype_simple>") |
825 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
826 | ||
827 | ;; *tsqrt* returning the fg flag | |
828 | (define_expand "vsx_tsqrt<mode>2_fg" | |
829 | [(set (match_dup 3) | |
830 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")] | |
831 | UNSPEC_VSX_TSQRT)) | |
832 | (set (match_operand:SI 0 "gpc_reg_operand" "") | |
833 | (gt:SI (match_dup 3) | |
834 | (const_int 0)))] | |
835 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
836 | { | |
837 | operands[3] = gen_reg_rtx (CCFPmode); | |
838 | }) | |
839 | ||
840 | ;; *tsqrt* returning the fe flag | |
841 | (define_expand "vsx_tsqrt<mode>2_fe" | |
842 | [(set (match_dup 3) | |
843 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")] | |
844 | UNSPEC_VSX_TSQRT)) | |
845 | (set (match_operand:SI 0 "gpc_reg_operand" "") | |
846 | (eq:SI (match_dup 3) | |
847 | (const_int 0)))] | |
848 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
849 | { | |
850 | operands[3] = gen_reg_rtx (CCFPmode); | |
851 | }) | |
852 | ||
853 | (define_insn "*vsx_tsqrt<mode>2_internal" | |
854 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") | |
855 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")] | |
856 | UNSPEC_VSX_TSQRT))] | |
857 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
858 | "x<VSv>tsqrt<VSs> %0,%x1" | |
859 | [(set_attr "type" "<VStype_simple>") | |
860 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
861 | ||
0609bdf2 MM |
862 | ;; Fused vector multiply/add instructions. Support the classical Altivec |
863 | ;; versions of fma, which allows the target to be a separate register from the | |
864 | ;; 3 inputs. Under VSX, the target must be either the addend or the first | |
865 | ;; multiply. | |
c36193c6 MM |
866 | |
867 | (define_insn "*vsx_fmav4sf4" | |
868 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,ws,?wa,?wa,v") | |
869 | (fma:V4SF | |
870 | (match_operand:V4SF 1 "vsx_register_operand" "%ws,ws,wa,wa,v") | |
871 | (match_operand:V4SF 2 "vsx_register_operand" "ws,0,wa,0,v") | |
872 | (match_operand:V4SF 3 "vsx_register_operand" "0,ws,0,wa,v")))] | |
873 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
874 | "@ | |
875 | xvmaddasp %x0,%x1,%x2 | |
876 | xvmaddmsp %x0,%x1,%x3 | |
877 | xvmaddasp %x0,%x1,%x2 | |
878 | xvmaddmsp %x0,%x1,%x3 | |
879 | vmaddfp %0,%1,%2,%3" | |
880 | [(set_attr "type" "vecfloat")]) | |
881 | ||
882 | (define_insn "*vsx_fmav2df4" | |
883 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=ws,ws,?wa,?wa") | |
884 | (fma:V2DF | |
885 | (match_operand:V2DF 1 "vsx_register_operand" "%ws,ws,wa,wa") | |
886 | (match_operand:V2DF 2 "vsx_register_operand" "ws,0,wa,0") | |
887 | (match_operand:V2DF 3 "vsx_register_operand" "0,ws,0,wa")))] | |
888 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
889 | "@ | |
890 | xvmaddadp %x0,%x1,%x2 | |
891 | xvmaddmdp %x0,%x1,%x3 | |
892 | xvmaddadp %x0,%x1,%x2 | |
893 | xvmaddmdp %x0,%x1,%x3" | |
4356b75d | 894 | [(set_attr "type" "vecdouble")]) |
c36193c6 | 895 | |
d6613781 | 896 | (define_insn "*vsx_fms<mode>4" |
c36193c6 MM |
897 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") |
898 | (fma:VSX_F | |
899 | (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa") | |
900 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0") | |
901 | (neg:VSX_F | |
902 | (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))] | |
29e6733c MM |
903 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
904 | "@ | |
0609bdf2 MM |
905 | xvmsuba<VSs> %x0,%x1,%x2 |
906 | xvmsubm<VSs> %x0,%x1,%x3 | |
907 | xvmsuba<VSs> %x0,%x1,%x2 | |
908 | xvmsubm<VSs> %x0,%x1,%x3" | |
4356b75d | 909 | [(set_attr "type" "<VStype_mul>")]) |
c36193c6 | 910 | |
d6613781 | 911 | (define_insn "*vsx_nfma<mode>4" |
c36193c6 MM |
912 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa") |
913 | (neg:VSX_F | |
914 | (fma:VSX_F | |
915 | (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa") | |
916 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,wa,0") | |
917 | (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,wa"))))] | |
29e6733c | 918 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
1b1562a5 | 919 | "@ |
0609bdf2 MM |
920 | xvnmadda<VSs> %x0,%x1,%x2 |
921 | xvnmaddm<VSs> %x0,%x1,%x3 | |
922 | xvnmadda<VSs> %x0,%x1,%x2 | |
923 | xvnmaddm<VSs> %x0,%x1,%x3" | |
1b1562a5 MM |
924 | [(set_attr "type" "<VStype_mul>") |
925 | (set_attr "fp_type" "<VSfptype_mul>")]) | |
29e6733c | 926 | |
c36193c6 MM |
927 | (define_insn "*vsx_nfmsv4sf4" |
928 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") | |
929 | (neg:V4SF | |
930 | (fma:V4SF | |
931 | (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") | |
932 | (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") | |
933 | (neg:V4SF | |
934 | (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))] | |
935 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
936 | "@ | |
937 | xvnmsubasp %x0,%x1,%x2 | |
938 | xvnmsubmsp %x0,%x1,%x3 | |
939 | xvnmsubasp %x0,%x1,%x2 | |
940 | xvnmsubmsp %x0,%x1,%x3 | |
941 | vnmsubfp %0,%1,%2,%3" | |
942 | [(set_attr "type" "vecfloat")]) | |
943 | ||
944 | (define_insn "*vsx_nfmsv2df4" | |
945 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") | |
946 | (neg:V2DF | |
947 | (fma:V2DF | |
948 | (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") | |
949 | (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") | |
950 | (neg:V2DF | |
951 | (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))] | |
952 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
953 | "@ | |
954 | xvnmsubadp %x0,%x1,%x2 | |
955 | xvnmsubmdp %x0,%x1,%x3 | |
956 | xvnmsubadp %x0,%x1,%x2 | |
957 | xvnmsubmdp %x0,%x1,%x3" | |
4356b75d | 958 | [(set_attr "type" "vecdouble")]) |
29e6733c | 959 | |
29e6733c MM |
960 | ;; Vector conditional expressions (no scalar version for these instructions) |
961 | (define_insn "vsx_eq<mode>" | |
962 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") | |
963 | (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
964 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
965 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
966 | "xvcmpeq<VSs> %x0,%x1,%x2" | |
967 | [(set_attr "type" "<VStype_simple>") | |
968 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
969 | ||
970 | (define_insn "vsx_gt<mode>" | |
971 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") | |
972 | (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
973 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
974 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
975 | "xvcmpgt<VSs> %x0,%x1,%x2" | |
976 | [(set_attr "type" "<VStype_simple>") | |
977 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
978 | ||
979 | (define_insn "*vsx_ge<mode>" | |
980 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") | |
981 | (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
982 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")))] | |
983 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
984 | "xvcmpge<VSs> %x0,%x1,%x2" | |
985 | [(set_attr "type" "<VStype_simple>") | |
986 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
987 | ||
29e6733c MM |
988 | ;; Compare vectors producing a vector result and a predicate, setting CR6 to |
989 | ;; indicate a combined status | |
990 | (define_insn "*vsx_eq_<mode>_p" | |
991 | [(set (reg:CC 74) | |
992 | (unspec:CC | |
993 | [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa") | |
994 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))] | |
995 | UNSPEC_PREDICATE)) | |
996 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") | |
997 | (eq:VSX_F (match_dup 1) | |
998 | (match_dup 2)))] | |
999 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1000 | "xvcmpeq<VSs>. %x0,%x1,%x2" | |
4356b75d | 1001 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1002 | |
1003 | (define_insn "*vsx_gt_<mode>_p" | |
1004 | [(set (reg:CC 74) | |
1005 | (unspec:CC | |
1006 | [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa") | |
1007 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))] | |
1008 | UNSPEC_PREDICATE)) | |
1009 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") | |
1010 | (gt:VSX_F (match_dup 1) | |
1011 | (match_dup 2)))] | |
1012 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1013 | "xvcmpgt<VSs>. %x0,%x1,%x2" | |
4356b75d | 1014 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1015 | |
1016 | (define_insn "*vsx_ge_<mode>_p" | |
1017 | [(set (reg:CC 74) | |
1018 | (unspec:CC | |
1019 | [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?wa") | |
1020 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?wa"))] | |
1021 | UNSPEC_PREDICATE)) | |
1022 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") | |
1023 | (ge:VSX_F (match_dup 1) | |
1024 | (match_dup 2)))] | |
1025 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1026 | "xvcmpge<VSs>. %x0,%x1,%x2" | |
4356b75d | 1027 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1028 | |
1029 | ;; Vector select | |
1030 | (define_insn "*vsx_xxsel<mode>" | |
1031 | [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") | |
1032 | (if_then_else:VSX_L | |
1033 | (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") | |
70db9095 | 1034 | (match_operand:VSX_L 4 "zero_constant" "")) |
29e6733c MM |
1035 | (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa") |
1036 | (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))] | |
1037 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
1038 | "xxsel %x0,%x3,%x2,%x1" | |
1039 | [(set_attr "type" "vecperm")]) | |
1040 | ||
1041 | (define_insn "*vsx_xxsel<mode>_uns" | |
1042 | [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") | |
1043 | (if_then_else:VSX_L | |
1044 | (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") | |
70db9095 | 1045 | (match_operand:VSX_L 4 "zero_constant" "")) |
29e6733c MM |
1046 | (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa") |
1047 | (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))] | |
1048 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
1049 | "xxsel %x0,%x3,%x2,%x1" | |
1050 | [(set_attr "type" "vecperm")]) | |
1051 | ||
1052 | ;; Copy sign | |
1053 | (define_insn "vsx_copysign<mode>3" | |
0609bdf2 MM |
1054 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
1055 | (unspec:VSX_F | |
1056 | [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa") | |
1057 | (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,wa")] | |
8119a6a6 | 1058 | UNSPEC_COPYSIGN))] |
29e6733c | 1059 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 1060 | "xvcpsgn<VSs> %x0,%x2,%x1" |
29e6733c MM |
1061 | [(set_attr "type" "<VStype_simple>") |
1062 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1063 | ||
1064 | ;; For the conversions, limit the register class for the integer value to be | |
1065 | ;; the fprs because we don't want to add the altivec registers to movdi/movsi. | |
1066 | ;; For the unsigned tests, there isn't a generic double -> unsigned conversion | |
1067 | ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. | |
28fc3eee | 1068 | ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md. |
29e6733c | 1069 | (define_insn "vsx_float<VSi><mode>2" |
28fc3eee MM |
1070 | [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa") |
1071 | (float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] | |
29e6733c MM |
1072 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
1073 | "x<VSv>cvsx<VSc><VSs> %x0,%x1" | |
1074 | [(set_attr "type" "<VStype_simple>") | |
1075 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1076 | ||
1077 | (define_insn "vsx_floatuns<VSi><mode>2" | |
28fc3eee MM |
1078 | [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?wa") |
1079 | (unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] | |
29e6733c MM |
1080 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
1081 | "x<VSv>cvux<VSc><VSs> %x0,%x1" | |
1082 | [(set_attr "type" "<VStype_simple>") | |
1083 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1084 | ||
1085 | (define_insn "vsx_fix_trunc<mode><VSi>2" | |
28fc3eee MM |
1086 | [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") |
1087 | (fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))] | |
29e6733c MM |
1088 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
1089 | "x<VSv>cv<VSs>sx<VSc>s %x0,%x1" | |
1090 | [(set_attr "type" "<VStype_simple>") | |
1091 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1092 | ||
1093 | (define_insn "vsx_fixuns_trunc<mode><VSi>2" | |
28fc3eee MM |
1094 | [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") |
1095 | (unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,wa")))] | |
29e6733c MM |
1096 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
1097 | "x<VSv>cv<VSs>ux<VSc>s %x0,%x1" | |
1098 | [(set_attr "type" "<VStype_simple>") | |
1099 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1100 | ||
1101 | ;; Math rounding functions | |
1102 | (define_insn "vsx_x<VSv>r<VSs>i" | |
1103 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") | |
1104 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")] | |
1105 | UNSPEC_VSX_ROUND_I))] | |
1106 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1107 | "x<VSv>r<VSs>i %x0,%x1" | |
1108 | [(set_attr "type" "<VStype_simple>") | |
1109 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1110 | ||
1111 | (define_insn "vsx_x<VSv>r<VSs>ic" | |
1112 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") | |
1113 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")] | |
1114 | UNSPEC_VSX_ROUND_IC))] | |
1115 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1116 | "x<VSv>r<VSs>ic %x0,%x1" | |
1117 | [(set_attr "type" "<VStype_simple>") | |
1118 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1119 | ||
1120 | (define_insn "vsx_btrunc<mode>2" | |
0609bdf2 MM |
1121 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
1122 | (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c | 1123 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
0609bdf2 | 1124 | "xvr<VSs>iz %x0,%x1" |
29e6733c MM |
1125 | [(set_attr "type" "<VStype_simple>") |
1126 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1127 | ||
1128 | (define_insn "*vsx_b2trunc<mode>2" | |
1129 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa") | |
1130 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")] | |
1131 | UNSPEC_FRIZ))] | |
1132 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1133 | "x<VSv>r<VSs>iz %x0,%x1" | |
1134 | [(set_attr "type" "<VStype_simple>") | |
1135 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1136 | ||
1137 | (define_insn "vsx_floor<mode>2" | |
0609bdf2 MM |
1138 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
1139 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")] | |
29e6733c MM |
1140 | UNSPEC_FRIM))] |
1141 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
0609bdf2 | 1142 | "xvr<VSs>im %x0,%x1" |
29e6733c MM |
1143 | [(set_attr "type" "<VStype_simple>") |
1144 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1145 | ||
1146 | (define_insn "vsx_ceil<mode>2" | |
0609bdf2 MM |
1147 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa") |
1148 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,wa")] | |
29e6733c MM |
1149 | UNSPEC_FRIP))] |
1150 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
0609bdf2 | 1151 | "xvr<VSs>ip %x0,%x1" |
29e6733c MM |
1152 | [(set_attr "type" "<VStype_simple>") |
1153 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1154 | ||
1155 | \f | |
1156 | ;; VSX convert to/from double vector | |
1157 | ||
1158 | ;; Convert between single and double precision | |
1159 | ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal | |
1160 | ;; scalar single precision instructions internally use the double format. | |
1161 | ;; Prefer the altivec registers, since we likely will need to do a vperm | |
1162 | (define_insn "vsx_<VS_spdp_insn>" | |
1163 | [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?wa") | |
1164 | (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,wa")] | |
1165 | UNSPEC_VSX_CVSPDP))] | |
1166 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1167 | "<VS_spdp_insn> %x0,%x1" | |
1168 | [(set_attr "type" "<VS_spdp_type>")]) | |
1169 | ||
1170 | ;; xscvspdp, represent the scalar SF type as V4SF | |
1171 | (define_insn "vsx_xscvspdp" | |
1172 | [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa") | |
1173 | (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")] | |
1174 | UNSPEC_VSX_CVSPDP))] | |
df5a9a7c | 1175 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
29e6733c MM |
1176 | "xscvspdp %x0,%x1" |
1177 | [(set_attr "type" "fp")]) | |
1178 | ||
1179 | ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF | |
1180 | ;; format of scalars is actually DF. | |
1181 | (define_insn "vsx_xscvdpsp_scalar" | |
1182 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
1183 | (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")] | |
1184 | UNSPEC_VSX_CVSPDP))] | |
df5a9a7c | 1185 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
29e6733c MM |
1186 | "xscvdpsp %x0,%x1" |
1187 | [(set_attr "type" "fp")]) | |
1188 | ||
df10b6d4 MM |
1189 | ;; Same as vsx_xscvspdp, but use SF as the type |
1190 | (define_insn "vsx_xscvspdp_scalar2" | |
1191 | [(set (match_operand:SF 0 "vsx_register_operand" "=f") | |
1192 | (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] | |
1193 | UNSPEC_VSX_CVSPDP))] | |
df5a9a7c | 1194 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
df10b6d4 MM |
1195 | "xscvspdp %x0,%x1" |
1196 | [(set_attr "type" "fp")]) | |
1197 | ||
0bd62dca MM |
1198 | ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs |
1199 | (define_insn "vsx_xscvdpspn" | |
1200 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa") | |
1201 | (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")] | |
1202 | UNSPEC_VSX_CVDPSPN))] | |
1203 | "TARGET_XSCVDPSPN" | |
1204 | "xscvdpspn %x0,%x1" | |
1205 | [(set_attr "type" "fp")]) | |
1206 | ||
1207 | (define_insn "vsx_xscvspdpn" | |
1208 | [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa") | |
1209 | (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")] | |
1210 | UNSPEC_VSX_CVSPDPN))] | |
1211 | "TARGET_XSCVSPDPN" | |
1212 | "xscvspdpn %x0,%x1" | |
1213 | [(set_attr "type" "fp")]) | |
1214 | ||
1215 | (define_insn "vsx_xscvdpspn_scalar" | |
1216 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
1217 | (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")] | |
1218 | UNSPEC_VSX_CVDPSPN))] | |
1219 | "TARGET_XSCVDPSPN" | |
1220 | "xscvdpspn %x0,%x1" | |
1221 | [(set_attr "type" "fp")]) | |
1222 | ||
1223 | ;; Used by direct move to move a SFmode value from GPR to VSX register | |
1224 | (define_insn "vsx_xscvspdpn_directmove" | |
1225 | [(set (match_operand:SF 0 "vsx_register_operand" "=wa") | |
8418cf33 | 1226 | (unspec:SF [(match_operand:DI 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
1227 | UNSPEC_VSX_CVSPDPN))] |
1228 | "TARGET_XSCVSPDPN" | |
1229 | "xscvspdpn %x0,%x1" | |
1230 | [(set_attr "type" "fp")]) | |
1231 | ||
29e6733c MM |
1232 | ;; Convert from 64-bit to 32-bit types |
1233 | ;; Note, favor the Altivec registers since the usual use of these instructions | |
1234 | ;; is in vector converts and we need to use the Altivec vperm instruction. | |
1235 | ||
1236 | (define_insn "vsx_xvcvdpsxws" | |
1237 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") | |
1238 | (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] | |
1239 | UNSPEC_VSX_CVDPSXWS))] | |
1240 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1241 | "xvcvdpsxws %x0,%x1" | |
4356b75d | 1242 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
1243 | |
1244 | (define_insn "vsx_xvcvdpuxws" | |
1245 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") | |
1246 | (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] | |
1247 | UNSPEC_VSX_CVDPUXWS))] | |
1248 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1249 | "xvcvdpuxws %x0,%x1" | |
4356b75d | 1250 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
1251 | |
1252 | (define_insn "vsx_xvcvsxdsp" | |
1253 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa") | |
1254 | (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")] | |
1255 | UNSPEC_VSX_CVSXDSP))] | |
1256 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1257 | "xvcvsxdsp %x0,%x1" | |
1258 | [(set_attr "type" "vecfloat")]) | |
1259 | ||
1260 | (define_insn "vsx_xvcvuxdsp" | |
1261 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa") | |
1262 | (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")] | |
1263 | UNSPEC_VSX_CVUXDSP))] | |
1264 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1265 | "xvcvuxwdp %x0,%x1" | |
4356b75d | 1266 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
1267 | |
1268 | ;; Convert from 32-bit to 64-bit types | |
1269 | (define_insn "vsx_xvcvsxwdp" | |
1270 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") | |
1271 | (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] | |
1272 | UNSPEC_VSX_CVSXWDP))] | |
1273 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1274 | "xvcvsxwdp %x0,%x1" | |
4356b75d | 1275 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
1276 | |
1277 | (define_insn "vsx_xvcvuxwdp" | |
1278 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") | |
1279 | (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] | |
1280 | UNSPEC_VSX_CVUXWDP))] | |
1281 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1282 | "xvcvuxwdp %x0,%x1" | |
4356b75d | 1283 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
1284 | |
1285 | (define_insn "vsx_xvcvspsxds" | |
1286 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") | |
1287 | (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] | |
1288 | UNSPEC_VSX_CVSPSXDS))] | |
1289 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1290 | "xvcvspsxds %x0,%x1" | |
4356b75d | 1291 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
1292 | |
1293 | (define_insn "vsx_xvcvspuxds" | |
1294 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") | |
1295 | (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] | |
1296 | UNSPEC_VSX_CVSPUXDS))] | |
1297 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1298 | "xvcvspuxds %x0,%x1" | |
4356b75d | 1299 | [(set_attr "type" "vecdouble")]) |
688e4919 MM |
1300 | |
1301 | ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since | |
1302 | ;; since the xsrdpiz instruction does not truncate the value if the floating | |
1303 | ;; point value is < LONG_MIN or > LONG_MAX. | |
1304 | (define_insn "*vsx_float_fix_<mode>2" | |
1305 | [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?wa") | |
1306 | (float:VSX_DF | |
1307 | (fix:<VSI> | |
1308 | (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?wa"))))] | |
1309 | "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT | |
1310 | && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations | |
1311 | && !flag_trapping_math && TARGET_FRIZ" | |
1312 | "x<VSv>r<VSs>iz %x0,%x1" | |
1313 | [(set_attr "type" "<VStype_simple>") | |
1314 | (set_attr "fp_type" "<VSfptype_simple>")]) | |
1315 | ||
29e6733c MM |
1316 | \f |
1317 | ;; Permute operations | |
1318 | ||
1319 | ;; Build a V2DF/V2DI vector from two scalars | |
1320 | (define_insn "vsx_concat_<mode>" | |
c6d5ff83 MM |
1321 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?wa") |
1322 | (vec_concat:VSX_D | |
1323 | (match_operand:<VS_scalar> 1 "vsx_register_operand" "ws,wa") | |
1324 | (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa")))] | |
29e6733c | 1325 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
de75c876 BS |
1326 | { |
1327 | if (BYTES_BIG_ENDIAN) | |
1328 | return "xxpermdi %x0,%x1,%x2,0"; | |
1329 | else | |
1330 | return "xxpermdi %x0,%x2,%x1,0"; | |
1331 | } | |
29e6733c MM |
1332 | [(set_attr "type" "vecperm")]) |
1333 | ||
1334 | ;; Special purpose concat using xxpermdi to glue two single precision values | |
1335 | ;; together, relying on the fact that internally scalar floats are represented | |
1336 | ;; as doubles. This is used to initialize a V4SF vector with 4 floats | |
1337 | (define_insn "vsx_concat_v2sf" | |
1338 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") | |
1339 | (unspec:V2DF | |
1340 | [(match_operand:SF 1 "vsx_register_operand" "f,f") | |
1341 | (match_operand:SF 2 "vsx_register_operand" "f,f")] | |
1342 | UNSPEC_VSX_CONCAT))] | |
1343 | "VECTOR_MEM_VSX_P (V2DFmode)" | |
de75c876 BS |
1344 | { |
1345 | if (BYTES_BIG_ENDIAN) | |
1346 | return "xxpermdi %x0,%x1,%x2,0"; | |
1347 | else | |
1348 | return "xxpermdi %x0,%x2,%x1,0"; | |
1349 | } | |
29e6733c MM |
1350 | [(set_attr "type" "vecperm")]) |
1351 | ||
0cf68694 BS |
1352 | ;; xxpermdi for little endian loads and stores. We need several of |
1353 | ;; these since the form of the PARALLEL differs by mode. | |
1354 | (define_insn "*vsx_xxpermdi2_le_<mode>" | |
1355 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
1356 | (vec_select:VSX_D | |
1357 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") | |
1358 | (parallel [(const_int 1) (const_int 0)])))] | |
1359 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" | |
1360 | "xxpermdi %x0,%x1,%x1,2" | |
1361 | [(set_attr "type" "vecperm")]) | |
1362 | ||
1363 | (define_insn "*vsx_xxpermdi4_le_<mode>" | |
1364 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") | |
1365 | (vec_select:VSX_W | |
1366 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") | |
1367 | (parallel [(const_int 2) (const_int 3) | |
1368 | (const_int 0) (const_int 1)])))] | |
1369 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" | |
1370 | "xxpermdi %x0,%x1,%x1,2" | |
1371 | [(set_attr "type" "vecperm")]) | |
1372 | ||
1373 | (define_insn "*vsx_xxpermdi8_le_V8HI" | |
1374 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
1375 | (vec_select:V8HI | |
1376 | (match_operand:V8HI 1 "vsx_register_operand" "wa") | |
1377 | (parallel [(const_int 4) (const_int 5) | |
1378 | (const_int 6) (const_int 7) | |
1379 | (const_int 0) (const_int 1) | |
1380 | (const_int 2) (const_int 3)])))] | |
1381 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" | |
1382 | "xxpermdi %x0,%x1,%x1,2" | |
1383 | [(set_attr "type" "vecperm")]) | |
1384 | ||
1385 | (define_insn "*vsx_xxpermdi16_le_V16QI" | |
1386 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
1387 | (vec_select:V16QI | |
1388 | (match_operand:V16QI 1 "vsx_register_operand" "wa") | |
1389 | (parallel [(const_int 8) (const_int 9) | |
1390 | (const_int 10) (const_int 11) | |
1391 | (const_int 12) (const_int 13) | |
1392 | (const_int 14) (const_int 15) | |
1393 | (const_int 0) (const_int 1) | |
1394 | (const_int 2) (const_int 3) | |
1395 | (const_int 4) (const_int 5) | |
1396 | (const_int 6) (const_int 7)])))] | |
1397 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" | |
1398 | "xxpermdi %x0,%x1,%x1,2" | |
1399 | [(set_attr "type" "vecperm")]) | |
1400 | ||
1401 | ;; lxvd2x for little endian loads. We need several of | |
1402 | ;; these since the form of the PARALLEL differs by mode. | |
1403 | (define_insn "*vsx_lxvd2x2_le_<mode>" | |
1404 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
1405 | (vec_select:VSX_D | |
1406 | (match_operand:VSX_D 1 "memory_operand" "Z") | |
1407 | (parallel [(const_int 1) (const_int 0)])))] | |
1408 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" | |
1409 | "lxvd2x %x0,%y1" | |
1410 | [(set_attr "type" "vecload")]) | |
1411 | ||
1412 | (define_insn "*vsx_lxvd2x4_le_<mode>" | |
1413 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") | |
1414 | (vec_select:VSX_W | |
1415 | (match_operand:VSX_W 1 "memory_operand" "Z") | |
1416 | (parallel [(const_int 2) (const_int 3) | |
1417 | (const_int 0) (const_int 1)])))] | |
1418 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" | |
1419 | "lxvd2x %x0,%y1" | |
1420 | [(set_attr "type" "vecload")]) | |
1421 | ||
1422 | (define_insn "*vsx_lxvd2x8_le_V8HI" | |
1423 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
1424 | (vec_select:V8HI | |
1425 | (match_operand:V8HI 1 "memory_operand" "Z") | |
1426 | (parallel [(const_int 4) (const_int 5) | |
1427 | (const_int 6) (const_int 7) | |
1428 | (const_int 0) (const_int 1) | |
1429 | (const_int 2) (const_int 3)])))] | |
1430 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" | |
1431 | "lxvd2x %x0,%y1" | |
1432 | [(set_attr "type" "vecload")]) | |
1433 | ||
1434 | (define_insn "*vsx_lxvd2x16_le_V16QI" | |
1435 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
1436 | (vec_select:V16QI | |
1437 | (match_operand:V16QI 1 "memory_operand" "Z") | |
1438 | (parallel [(const_int 8) (const_int 9) | |
1439 | (const_int 10) (const_int 11) | |
1440 | (const_int 12) (const_int 13) | |
1441 | (const_int 14) (const_int 15) | |
1442 | (const_int 0) (const_int 1) | |
1443 | (const_int 2) (const_int 3) | |
1444 | (const_int 4) (const_int 5) | |
1445 | (const_int 6) (const_int 7)])))] | |
1446 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" | |
1447 | "lxvd2x %x0,%y1" | |
1448 | [(set_attr "type" "vecload")]) | |
1449 | ||
1450 | ;; stxvd2x for little endian stores. We need several of | |
1451 | ;; these since the form of the PARALLEL differs by mode. | |
1452 | (define_insn "*vsx_stxvd2x2_le_<mode>" | |
1453 | [(set (match_operand:VSX_D 0 "memory_operand" "=Z") | |
1454 | (vec_select:VSX_D | |
1455 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") | |
1456 | (parallel [(const_int 1) (const_int 0)])))] | |
1457 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" | |
1458 | "stxvd2x %x1,%y0" | |
1459 | [(set_attr "type" "vecstore")]) | |
1460 | ||
1461 | (define_insn "*vsx_stxvd2x4_le_<mode>" | |
1462 | [(set (match_operand:VSX_W 0 "memory_operand" "=Z") | |
1463 | (vec_select:VSX_W | |
1464 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") | |
1465 | (parallel [(const_int 2) (const_int 3) | |
1466 | (const_int 0) (const_int 1)])))] | |
1467 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" | |
1468 | "stxvd2x %x1,%y0" | |
1469 | [(set_attr "type" "vecstore")]) | |
1470 | ||
1471 | (define_insn "*vsx_stxvd2x8_le_V8HI" | |
1472 | [(set (match_operand:V8HI 0 "memory_operand" "=Z") | |
1473 | (vec_select:V8HI | |
1474 | (match_operand:V8HI 1 "vsx_register_operand" "wa") | |
1475 | (parallel [(const_int 4) (const_int 5) | |
1476 | (const_int 6) (const_int 7) | |
1477 | (const_int 0) (const_int 1) | |
1478 | (const_int 2) (const_int 3)])))] | |
1479 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" | |
1480 | "stxvd2x %x1,%y0" | |
1481 | [(set_attr "type" "vecstore")]) | |
1482 | ||
1483 | (define_insn "*vsx_stxvd2x16_le_V16QI" | |
1484 | [(set (match_operand:V16QI 0 "memory_operand" "=Z") | |
1485 | (vec_select:V16QI | |
1486 | (match_operand:V16QI 1 "vsx_register_operand" "wa") | |
1487 | (parallel [(const_int 8) (const_int 9) | |
1488 | (const_int 10) (const_int 11) | |
1489 | (const_int 12) (const_int 13) | |
1490 | (const_int 14) (const_int 15) | |
1491 | (const_int 0) (const_int 1) | |
1492 | (const_int 2) (const_int 3) | |
1493 | (const_int 4) (const_int 5) | |
1494 | (const_int 6) (const_int 7)])))] | |
1495 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" | |
1496 | "stxvd2x %x1,%y0" | |
1497 | [(set_attr "type" "vecstore")]) | |
1498 | ||
a16a872d MM |
1499 | ;; Convert a TImode value into V1TImode |
1500 | (define_expand "vsx_set_v1ti" | |
1501 | [(match_operand:V1TI 0 "nonimmediate_operand" "") | |
1502 | (match_operand:V1TI 1 "nonimmediate_operand" "") | |
1503 | (match_operand:TI 2 "input_operand" "") | |
1504 | (match_operand:QI 3 "u5bit_cint_operand" "")] | |
1505 | "VECTOR_MEM_VSX_P (V1TImode)" | |
1506 | { | |
1507 | if (operands[3] != const0_rtx) | |
1508 | gcc_unreachable (); | |
1509 | ||
1510 | emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); | |
1511 | DONE; | |
1512 | }) | |
1513 | ||
29e6733c MM |
1514 | ;; Set the element of a V2DI/VD2F mode |
1515 | (define_insn "vsx_set_<mode>" | |
1516 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa") | |
1517 | (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wd,wa") | |
1518 | (match_operand:<VS_scalar> 2 "vsx_register_operand" "ws,wa") | |
1519 | (match_operand:QI 3 "u5bit_cint_operand" "i,i")] | |
1520 | UNSPEC_VSX_SET))] | |
1521 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
1522 | { | |
27b097f8 BS |
1523 | int idx_first = BYTES_BIG_ENDIAN ? 0 : 1; |
1524 | if (INTVAL (operands[3]) == idx_first) | |
2ff16893 | 1525 | return \"xxpermdi %x0,%x2,%x1,1\"; |
27b097f8 | 1526 | else if (INTVAL (operands[3]) == 1 - idx_first) |
2ff16893 | 1527 | return \"xxpermdi %x0,%x1,%x2,0\"; |
29e6733c MM |
1528 | else |
1529 | gcc_unreachable (); | |
1530 | } | |
1531 | [(set_attr "type" "vecperm")]) | |
1532 | ||
1533 | ;; Extract a DF/DI element from V2DF/V2DI | |
117f16fb MM |
1534 | (define_expand "vsx_extract_<mode>" |
1535 | [(set (match_operand:<VS_scalar> 0 "register_operand" "") | |
1536 | (vec_select:<VS_scalar> (match_operand:VSX_D 1 "register_operand" "") | |
29e6733c | 1537 | (parallel |
117f16fb | 1538 | [(match_operand:QI 2 "u5bit_cint_operand" "")])))] |
29e6733c | 1539 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
117f16fb MM |
1540 | "") |
1541 | ||
1542 | ;; Optimize cases were we can do a simple or direct move. | |
1543 | ;; Or see if we can avoid doing the move at all | |
1544 | (define_insn "*vsx_extract_<mode>_internal1" | |
1545 | [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,ws,?wa,r") | |
1546 | (vec_select:<VS_scalar> | |
1547 | (match_operand:VSX_D 1 "register_operand" "d,wd,wa,wm") | |
1548 | (parallel | |
1549 | [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD,wD")])))] | |
1550 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE" | |
1551 | { | |
1552 | int op0_regno = REGNO (operands[0]); | |
1553 | int op1_regno = REGNO (operands[1]); | |
1554 | ||
1555 | if (op0_regno == op1_regno) | |
1556 | return "nop"; | |
1557 | ||
1558 | if (INT_REGNO_P (op0_regno)) | |
1559 | return "mfvsrd %0,%x1"; | |
1560 | ||
1561 | if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) | |
1562 | return "fmr %0,%1"; | |
1563 | ||
1564 | return "xxlor %x0,%x1,%x1"; | |
1565 | } | |
1566 | [(set_attr "type" "fp,vecsimple,vecsimple,mftgpr") | |
1567 | (set_attr "length" "4")]) | |
1568 | ||
1569 | (define_insn "*vsx_extract_<mode>_internal2" | |
1570 | [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=d,ws,ws,?wa") | |
1571 | (vec_select:<VS_scalar> | |
1572 | (match_operand:VSX_D 1 "vsx_register_operand" "d,wd,wd,wa") | |
1573 | (parallel [(match_operand:QI 2 "u5bit_cint_operand" "wD,wD,i,i")])))] | |
1574 | "VECTOR_MEM_VSX_P (<MODE>mode) | |
1575 | && (!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE | |
1576 | || INTVAL (operands[2]) != VECTOR_ELEMENT_SCALAR_64BIT)" | |
29e6733c | 1577 | { |
27b097f8 | 1578 | int fldDM; |
29e6733c | 1579 | gcc_assert (UINTVAL (operands[2]) <= 1); |
117f16fb MM |
1580 | |
1581 | if (INTVAL (operands[2]) == VECTOR_ELEMENT_SCALAR_64BIT) | |
1582 | { | |
1583 | int op0_regno = REGNO (operands[0]); | |
1584 | int op1_regno = REGNO (operands[1]); | |
1585 | ||
1586 | if (op0_regno == op1_regno) | |
1587 | return "nop"; | |
1588 | ||
1589 | if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) | |
1590 | return "fmr %0,%1"; | |
1591 | ||
1592 | return "xxlor %x0,%x1,%x1"; | |
1593 | } | |
1594 | ||
27b097f8 BS |
1595 | fldDM = INTVAL (operands[2]) << 1; |
1596 | if (!BYTES_BIG_ENDIAN) | |
1597 | fldDM = 3 - fldDM; | |
1598 | operands[3] = GEN_INT (fldDM); | |
117f16fb | 1599 | return "xxpermdi %x0,%x1,%x1,%3"; |
29e6733c | 1600 | } |
117f16fb MM |
1601 | [(set_attr "type" "fp,vecsimple,vecperm,vecperm") |
1602 | (set_attr "length" "4")]) | |
29e6733c | 1603 | |
117f16fb MM |
1604 | ;; Optimize extracting a single scalar element from memory if the scalar is in |
1605 | ;; the correct location to use a single load. | |
1606 | (define_insn "*vsx_extract_<mode>_load" | |
1607 | [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,wv,wr") | |
29e6733c | 1608 | (vec_select:<VS_scalar> |
117f16fb MM |
1609 | (match_operand:VSX_D 1 "memory_operand" "m,Z,m") |
1610 | (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] | |
1611 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
1612 | "@ | |
1613 | lfd%U1%X1 %0,%1 | |
1614 | lxsd%U1x %x0,%y1 | |
1615 | ld%U1%X1 %0,%1" | |
1616 | [(set_attr_alternative "type" | |
1617 | [(if_then_else | |
1618 | (match_test "update_indexed_address_mem (operands[1], VOIDmode)") | |
1619 | (const_string "fpload_ux") | |
1620 | (if_then_else | |
1621 | (match_test "update_address_mem (operands[1], VOIDmode)") | |
1622 | (const_string "fpload_u") | |
1623 | (const_string "fpload"))) | |
1624 | (const_string "fpload") | |
1625 | (if_then_else | |
1626 | (match_test "update_indexed_address_mem (operands[1], VOIDmode)") | |
1627 | (const_string "load_ux") | |
1628 | (if_then_else | |
1629 | (match_test "update_address_mem (operands[1], VOIDmode)") | |
1630 | (const_string "load_u") | |
1631 | (const_string "load")))]) | |
1632 | (set_attr "length" "4")]) | |
1633 | ||
1634 | ;; Optimize storing a single scalar element that is the right location to | |
1635 | ;; memory | |
1636 | (define_insn "*vsx_extract_<mode>_store" | |
1637 | [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,?Z") | |
27b097f8 | 1638 | (vec_select:<VS_scalar> |
117f16fb MM |
1639 | (match_operand:VSX_D 1 "register_operand" "d,wd,wa") |
1640 | (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] | |
1641 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
1642 | "@ | |
1643 | stfd%U0%X0 %1,%0 | |
1644 | stxsd%U0x %x1,%y0 | |
1645 | stxsd%U0x %x1,%y0" | |
1646 | [(set_attr_alternative "type" | |
1647 | [(if_then_else | |
1648 | (match_test "update_indexed_address_mem (operands[0], VOIDmode)") | |
1649 | (const_string "fpstore_ux") | |
1650 | (if_then_else | |
1651 | (match_test "update_address_mem (operands[0], VOIDmode)") | |
1652 | (const_string "fpstore_u") | |
1653 | (const_string "fpstore"))) | |
1654 | (const_string "fpstore") | |
1655 | (const_string "fpstore")]) | |
1656 | (set_attr "length" "4")]) | |
27b097f8 | 1657 | |
df10b6d4 MM |
1658 | ;; Extract a SF element from V4SF |
1659 | (define_insn_and_split "vsx_extract_v4sf" | |
1660 | [(set (match_operand:SF 0 "vsx_register_operand" "=f,f") | |
1661 | (vec_select:SF | |
1662 | (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
1663 | (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")]))) | |
1664 | (clobber (match_scratch:V4SF 3 "=X,0"))] | |
1665 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
1666 | "@ | |
1667 | xscvspdp %x0,%x1 | |
1668 | #" | |
1669 | "" | |
1670 | [(const_int 0)] | |
1671 | " | |
1672 | { | |
1673 | rtx op0 = operands[0]; | |
1674 | rtx op1 = operands[1]; | |
1675 | rtx op2 = operands[2]; | |
1676 | rtx op3 = operands[3]; | |
1677 | rtx tmp; | |
27b097f8 | 1678 | HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); |
df10b6d4 MM |
1679 | |
1680 | if (ele == 0) | |
1681 | tmp = op1; | |
1682 | else | |
1683 | { | |
1684 | if (GET_CODE (op3) == SCRATCH) | |
1685 | op3 = gen_reg_rtx (V4SFmode); | |
1686 | emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2)); | |
1687 | tmp = op3; | |
1688 | } | |
1689 | emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); | |
1690 | DONE; | |
1691 | }" | |
1692 | [(set_attr "length" "4,8") | |
1693 | (set_attr "type" "fp")]) | |
1694 | ||
5aebfdad RH |
1695 | ;; Expand the builtin form of xxpermdi to canonical rtl. |
1696 | (define_expand "vsx_xxpermdi_<mode>" | |
1697 | [(match_operand:VSX_L 0 "vsx_register_operand" "") | |
1698 | (match_operand:VSX_L 1 "vsx_register_operand" "") | |
1699 | (match_operand:VSX_L 2 "vsx_register_operand" "") | |
1700 | (match_operand:QI 3 "u5bit_cint_operand" "")] | |
29e6733c | 1701 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
5aebfdad RH |
1702 | { |
1703 | rtx target = operands[0]; | |
1704 | rtx op0 = operands[1]; | |
1705 | rtx op1 = operands[2]; | |
1706 | int mask = INTVAL (operands[3]); | |
1707 | rtx perm0 = GEN_INT ((mask >> 1) & 1); | |
1708 | rtx perm1 = GEN_INT ((mask & 1) + 2); | |
1709 | rtx (*gen) (rtx, rtx, rtx, rtx, rtx); | |
1710 | ||
1711 | if (<MODE>mode == V2DFmode) | |
1712 | gen = gen_vsx_xxpermdi2_v2df_1; | |
1713 | else | |
1714 | { | |
1715 | gen = gen_vsx_xxpermdi2_v2di_1; | |
1716 | if (<MODE>mode != V2DImode) | |
1717 | { | |
1718 | target = gen_lowpart (V2DImode, target); | |
c6d5ff83 MM |
1719 | op0 = gen_lowpart (V2DImode, op0); |
1720 | op1 = gen_lowpart (V2DImode, op1); | |
5aebfdad RH |
1721 | } |
1722 | } | |
54c4bfd7 BS |
1723 | /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a |
1724 | transformation we don't want; it is necessary for | |
1725 | rs6000_expand_vec_perm_const_1 but not for this use. So we | |
1726 | prepare for that by reversing the transformation here. */ | |
1727 | if (BYTES_BIG_ENDIAN) | |
1728 | emit_insn (gen (target, op0, op1, perm0, perm1)); | |
1729 | else | |
1730 | { | |
1731 | rtx p0 = GEN_INT (3 - INTVAL (perm1)); | |
1732 | rtx p1 = GEN_INT (3 - INTVAL (perm0)); | |
1733 | emit_insn (gen (target, op1, op0, p0, p1)); | |
1734 | } | |
5aebfdad RH |
1735 | DONE; |
1736 | }) | |
29e6733c | 1737 | |
5aebfdad | 1738 | (define_insn "vsx_xxpermdi2_<mode>_1" |
29e6733c | 1739 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd") |
5aebfdad RH |
1740 | (vec_select:VSX_D |
1741 | (vec_concat:<VS_double> | |
1742 | (match_operand:VSX_D 1 "vsx_register_operand" "wd") | |
1743 | (match_operand:VSX_D 2 "vsx_register_operand" "wd")) | |
1744 | (parallel [(match_operand 3 "const_0_to_1_operand" "") | |
1745 | (match_operand 4 "const_2_to_3_operand" "")])))] | |
29e6733c MM |
1746 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
1747 | { | |
8adcc78b BS |
1748 | int op3, op4, mask; |
1749 | ||
1750 | /* For little endian, swap operands and invert/swap selectors | |
1751 | to get the correct xxpermdi. The operand swap sets up the | |
1752 | inputs as a little endian array. The selectors are swapped | |
1753 | because they are defined to use big endian ordering. The | |
1754 | selectors are inverted to get the correct doublewords for | |
1755 | little endian ordering. */ | |
1756 | if (BYTES_BIG_ENDIAN) | |
1757 | { | |
1758 | op3 = INTVAL (operands[3]); | |
1759 | op4 = INTVAL (operands[4]); | |
1760 | } | |
1761 | else | |
1762 | { | |
1763 | op3 = 3 - INTVAL (operands[4]); | |
1764 | op4 = 3 - INTVAL (operands[3]); | |
1765 | } | |
1766 | ||
1767 | mask = (op3 << 1) | (op4 - 2); | |
5aebfdad | 1768 | operands[3] = GEN_INT (mask); |
8adcc78b BS |
1769 | |
1770 | if (BYTES_BIG_ENDIAN) | |
1771 | return "xxpermdi %x0,%x1,%x2,%3"; | |
1772 | else | |
1773 | return "xxpermdi %x0,%x2,%x1,%3"; | |
29e6733c MM |
1774 | } |
1775 | [(set_attr "type" "vecperm")]) | |
1776 | ||
5aebfdad RH |
1777 | (define_expand "vec_perm_const<mode>" |
1778 | [(match_operand:VSX_D 0 "vsx_register_operand" "") | |
1779 | (match_operand:VSX_D 1 "vsx_register_operand" "") | |
1780 | (match_operand:VSX_D 2 "vsx_register_operand" "") | |
1781 | (match_operand:V2DI 3 "" "")] | |
1782 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
1783 | { | |
1784 | if (rs6000_expand_vec_perm_const (operands)) | |
1785 | DONE; | |
1786 | else | |
1787 | FAIL; | |
1788 | }) | |
1789 | ||
1790 | ;; Expanders for builtins | |
1791 | (define_expand "vsx_mergel_<mode>" | |
68d3bacf BS |
1792 | [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) |
1793 | (use (match_operand:VSX_D 1 "vsx_register_operand" "")) | |
1794 | (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] | |
5aebfdad | 1795 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
68d3bacf BS |
1796 | { |
1797 | rtvec v; | |
1798 | rtx x; | |
1799 | ||
1800 | /* Special handling for LE with -maltivec=be. */ | |
1801 | if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) | |
1802 | { | |
1803 | v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); | |
1804 | x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); | |
1805 | } | |
1806 | else | |
1807 | { | |
1808 | v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); | |
1809 | x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); | |
1810 | } | |
1811 | ||
1812 | x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); | |
1813 | emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); | |
c3e96073 | 1814 | DONE; |
68d3bacf | 1815 | }) |
5aebfdad RH |
1816 | |
1817 | (define_expand "vsx_mergeh_<mode>" | |
68d3bacf BS |
1818 | [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) |
1819 | (use (match_operand:VSX_D 1 "vsx_register_operand" "")) | |
1820 | (use (match_operand:VSX_D 2 "vsx_register_operand" ""))] | |
5aebfdad | 1821 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
68d3bacf BS |
1822 | { |
1823 | rtvec v; | |
1824 | rtx x; | |
1825 | ||
1826 | /* Special handling for LE with -maltivec=be. */ | |
1827 | if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) | |
1828 | { | |
1829 | v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); | |
1830 | x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); | |
1831 | } | |
1832 | else | |
1833 | { | |
1834 | v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); | |
1835 | x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); | |
1836 | } | |
1837 | ||
1838 | x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); | |
1839 | emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); | |
c3e96073 | 1840 | DONE; |
68d3bacf | 1841 | }) |
5aebfdad | 1842 | |
29e6733c MM |
1843 | ;; V2DF/V2DI splat |
1844 | (define_insn "vsx_splat_<mode>" | |
1845 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa") | |
1846 | (vec_duplicate:VSX_D | |
9ef13bd0 | 1847 | (match_operand:<VS_scalar> 1 "splat_input_operand" "ws,f,Z,wa,wa,Z")))] |
29e6733c MM |
1848 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
1849 | "@ | |
1850 | xxpermdi %x0,%x1,%x1,0 | |
1851 | xxpermdi %x0,%x1,%x1,0 | |
1852 | lxvdsx %x0,%y1 | |
1853 | xxpermdi %x0,%x1,%x1,0 | |
1854 | xxpermdi %x0,%x1,%x1,0 | |
1855 | lxvdsx %x0,%y1" | |
1856 | [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")]) | |
1857 | ||
1858 | ;; V4SF/V4SI splat | |
1859 | (define_insn "vsx_xxspltw_<mode>" | |
1860 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") | |
1861 | (vec_duplicate:VSX_W | |
1862 | (vec_select:<VS_scalar> | |
1863 | (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") | |
1864 | (parallel | |
1865 | [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))] | |
1866 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
bf53d4b8 BS |
1867 | { |
1868 | if (!BYTES_BIG_ENDIAN) | |
1869 | operands[2] = GEN_INT (3 - INTVAL (operands[2])); | |
1870 | ||
1871 | return "xxspltw %x0,%x1,%2"; | |
1872 | } | |
1873 | [(set_attr "type" "vecperm")]) | |
1874 | ||
1875 | (define_insn "vsx_xxspltw_<mode>_direct" | |
1876 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") | |
1877 | (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") | |
1878 | (match_operand:QI 2 "u5bit_cint_operand" "i,i")] | |
1879 | UNSPEC_VSX_XXSPLTW))] | |
1880 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
29e6733c MM |
1881 | "xxspltw %x0,%x1,%2" |
1882 | [(set_attr "type" "vecperm")]) | |
1883 | ||
1884 | ;; V4SF/V4SI interleave | |
1885 | (define_insn "vsx_xxmrghw_<mode>" | |
1886 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") | |
5aebfdad RH |
1887 | (vec_select:VSX_W |
1888 | (vec_concat:<VS_double> | |
1889 | (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") | |
1890 | (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa")) | |
1891 | (parallel [(const_int 0) (const_int 4) | |
1892 | (const_int 1) (const_int 5)])))] | |
29e6733c | 1893 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
ed79f4d0 BS |
1894 | { |
1895 | if (BYTES_BIG_ENDIAN) | |
1896 | return "xxmrghw %x0,%x1,%x2"; | |
1897 | else | |
1898 | return "xxmrglw %x0,%x2,%x1"; | |
1899 | } | |
29e6733c MM |
1900 | [(set_attr "type" "vecperm")]) |
1901 | ||
1902 | (define_insn "vsx_xxmrglw_<mode>" | |
1903 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") | |
5aebfdad RH |
1904 | (vec_select:VSX_W |
1905 | (vec_concat:<VS_double> | |
1906 | (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") | |
1907 | (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa")) | |
1908 | (parallel [(const_int 2) (const_int 6) | |
1909 | (const_int 3) (const_int 7)])))] | |
29e6733c | 1910 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
ed79f4d0 BS |
1911 | { |
1912 | if (BYTES_BIG_ENDIAN) | |
1913 | return "xxmrglw %x0,%x1,%x2"; | |
1914 | else | |
1915 | return "xxmrghw %x0,%x2,%x1"; | |
1916 | } | |
29e6733c MM |
1917 | [(set_attr "type" "vecperm")]) |
1918 | ||
1919 | ;; Shift left double by word immediate | |
1920 | (define_insn "vsx_xxsldwi_<mode>" | |
1921 | [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa") | |
1922 | (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa") | |
1923 | (match_operand:VSX_L 2 "vsx_register_operand" "wa") | |
1924 | (match_operand:QI 3 "u5bit_cint_operand" "i")] | |
1925 | UNSPEC_VSX_SLDWI))] | |
1926 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
1927 | "xxsldwi %x0,%x1,%x2,%3" | |
1928 | [(set_attr "type" "vecperm")]) | |
df10b6d4 MM |
1929 | |
1930 | \f | |
1931 | ;; Vector reduction insns and splitters | |
1932 | ||
1933 | (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df" | |
1934 | [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa") | |
1935 | (VEC_reduc:V2DF | |
1936 | (vec_concat:V2DF | |
1937 | (vec_select:DF | |
1938 | (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa") | |
1939 | (parallel [(const_int 1)])) | |
1940 | (vec_select:DF | |
1941 | (match_dup 1) | |
1942 | (parallel [(const_int 0)]))) | |
1943 | (match_dup 1))) | |
1944 | (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))] | |
1945 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
1946 | "#" | |
1947 | "" | |
1948 | [(const_int 0)] | |
1949 | " | |
1950 | { | |
1951 | rtx tmp = (GET_CODE (operands[2]) == SCRATCH) | |
1952 | ? gen_reg_rtx (V2DFmode) | |
1953 | : operands[2]; | |
1954 | emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx)); | |
1955 | emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1])); | |
1956 | DONE; | |
1957 | }" | |
1958 | [(set_attr "length" "8") | |
1959 | (set_attr "type" "veccomplex")]) | |
1960 | ||
1961 | (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf" | |
1962 | [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa") | |
1963 | (VEC_reduc:V4SF | |
1964 | (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) | |
1965 | (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))) | |
1966 | (clobber (match_scratch:V4SF 2 "=&wf,&wa")) | |
1967 | (clobber (match_scratch:V4SF 3 "=&wf,&wa"))] | |
1968 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
1969 | "#" | |
1970 | "" | |
1971 | [(const_int 0)] | |
1972 | " | |
1973 | { | |
1974 | rtx op0 = operands[0]; | |
1975 | rtx op1 = operands[1]; | |
1976 | rtx tmp2, tmp3, tmp4; | |
1977 | ||
1978 | if (can_create_pseudo_p ()) | |
1979 | { | |
1980 | tmp2 = gen_reg_rtx (V4SFmode); | |
1981 | tmp3 = gen_reg_rtx (V4SFmode); | |
1982 | tmp4 = gen_reg_rtx (V4SFmode); | |
1983 | } | |
1984 | else | |
1985 | { | |
1986 | tmp2 = operands[2]; | |
1987 | tmp3 = operands[3]; | |
1988 | tmp4 = tmp2; | |
1989 | } | |
1990 | ||
1991 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); | |
1992 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); | |
1993 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); | |
1994 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3)); | |
1995 | DONE; | |
1996 | }" | |
1997 | [(set_attr "length" "16") | |
1998 | (set_attr "type" "veccomplex")]) | |
1999 | ||
2000 | ;; Combiner patterns with the vector reduction patterns that knows we can get | |
2001 | ;; to the top element of the V2DF array without doing an extract. | |
2002 | ||
2003 | (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar" | |
2004 | [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa") | |
2005 | (vec_select:DF | |
2006 | (VEC_reduc:V2DF | |
2007 | (vec_concat:V2DF | |
2008 | (vec_select:DF | |
2009 | (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa") | |
2010 | (parallel [(const_int 1)])) | |
2011 | (vec_select:DF | |
2012 | (match_dup 1) | |
2013 | (parallel [(const_int 0)]))) | |
2014 | (match_dup 1)) | |
2015 | (parallel [(const_int 1)]))) | |
2016 | (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))] | |
2017 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2018 | "#" | |
2019 | "" | |
2020 | [(const_int 0)] | |
2021 | " | |
2022 | { | |
2023 | rtx hi = gen_highpart (DFmode, operands[1]); | |
2024 | rtx lo = (GET_CODE (operands[2]) == SCRATCH) | |
2025 | ? gen_reg_rtx (DFmode) | |
2026 | : operands[2]; | |
2027 | ||
2028 | emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx)); | |
2029 | emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo)); | |
2030 | DONE; | |
2031 | }" | |
2032 | [(set_attr "length" "8") | |
2033 | (set_attr "type" "veccomplex")]) | |
2034 | ||
2035 | (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar" | |
2036 | [(set (match_operand:SF 0 "vfloat_operand" "=f,?f") | |
2037 | (vec_select:SF | |
2038 | (VEC_reduc:V4SF | |
2039 | (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) | |
2040 | (match_operand:V4SF 1 "vfloat_operand" "wf,wa")) | |
2041 | (parallel [(const_int 3)]))) | |
2042 | (clobber (match_scratch:V4SF 2 "=&wf,&wa")) | |
2043 | (clobber (match_scratch:V4SF 3 "=&wf,&wa")) | |
2044 | (clobber (match_scratch:V4SF 4 "=0,0"))] | |
2045 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2046 | "#" | |
2047 | "" | |
2048 | [(const_int 0)] | |
2049 | " | |
2050 | { | |
2051 | rtx op0 = operands[0]; | |
2052 | rtx op1 = operands[1]; | |
2053 | rtx tmp2, tmp3, tmp4, tmp5; | |
2054 | ||
2055 | if (can_create_pseudo_p ()) | |
2056 | { | |
2057 | tmp2 = gen_reg_rtx (V4SFmode); | |
2058 | tmp3 = gen_reg_rtx (V4SFmode); | |
2059 | tmp4 = gen_reg_rtx (V4SFmode); | |
2060 | tmp5 = gen_reg_rtx (V4SFmode); | |
2061 | } | |
2062 | else | |
2063 | { | |
2064 | tmp2 = operands[2]; | |
2065 | tmp3 = operands[3]; | |
2066 | tmp4 = tmp2; | |
2067 | tmp5 = operands[4]; | |
2068 | } | |
2069 | ||
2070 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); | |
2071 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); | |
2072 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); | |
2073 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3)); | |
2074 | emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5)); | |
2075 | DONE; | |
2076 | }" | |
2077 | [(set_attr "length" "20") | |
2078 | (set_attr "type" "veccomplex")]) | |
d86e633a MM |
2079 | |
2080 | \f | |
2081 | ;; Power8 Vector fusion. The fused ops must be physically adjacent. | |
2082 | (define_peephole | |
2083 | [(set (match_operand:P 0 "base_reg_operand" "") | |
2084 | (match_operand:P 1 "short_cint_operand" "")) | |
2085 | (set (match_operand:VSX_M2 2 "vsx_register_operand" "") | |
2086 | (mem:VSX_M2 (plus:P (match_dup 0) | |
2087 | (match_operand:P 3 "int_reg_operand" ""))))] | |
4615f96c | 2088 | "TARGET_VSX && TARGET_P8_FUSION" |
d86e633a MM |
2089 | "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3" |
2090 | [(set_attr "length" "8") | |
2091 | (set_attr "type" "vecload")]) | |
2092 | ||
2093 | (define_peephole | |
2094 | [(set (match_operand:P 0 "base_reg_operand" "") | |
2095 | (match_operand:P 1 "short_cint_operand" "")) | |
2096 | (set (match_operand:VSX_M2 2 "vsx_register_operand" "") | |
2097 | (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "") | |
2098 | (match_dup 0))))] | |
4615f96c | 2099 | "TARGET_VSX && TARGET_P8_FUSION" |
d86e633a MM |
2100 | "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3" |
2101 | [(set_attr "length" "8") | |
2102 | (set_attr "type" "vecload")]) |