]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/vsx.md
re PR target/72863 (Powerpc64le: redundant swaps when using vec_vsx_ld/st)
[gcc.git] / gcc / config / rs6000 / vsx.md
1 ;; VSX patterns.
2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5 ;; This file is part of GCC.
6
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
11
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
23
24 ;; Iterator for both scalar and vector floating point types supported by VSX
25 (define_mode_iterator VSX_B [DF V4SF V2DF])
26
27 ;; Iterator for the 2 64-bit vector types
28 (define_mode_iterator VSX_D [V2DF V2DI])
29
30 ;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
31 ;; lxvd2x to properly handle swapping words on little endian
32 (define_mode_iterator VSX_LE [V2DF V2DI V1TI])
33
34 ;; Mode iterator to handle swapping words on little endian for the 128-bit
35 ;; types that goes in a single vector register.
36 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
37 (TF "FLOAT128_VECTOR_P (TFmode)")
38 (TI "TARGET_VSX_TIMODE")])
39
40 ;; Iterator for the 2 32-bit vector types
41 (define_mode_iterator VSX_W [V4SF V4SI])
42
43 ;; Iterator for the DF types
44 (define_mode_iterator VSX_DF [V2DF DF])
45
46 ;; Iterator for vector floating point types supported by VSX
47 (define_mode_iterator VSX_F [V4SF V2DF])
48
49 ;; Iterator for logical types supported by VSX
50 (define_mode_iterator VSX_L [V16QI
51 V8HI
52 V4SI
53 V2DI
54 V4SF
55 V2DF
56 V1TI
57 TI
58 (KF "FLOAT128_VECTOR_P (KFmode)")
59 (TF "FLOAT128_VECTOR_P (TFmode)")])
60
61 ;; Iterator for memory moves.
62 (define_mode_iterator VSX_M [V16QI
63 V8HI
64 V4SI
65 V2DI
66 V4SF
67 V2DF
68 V1TI
69 (KF "FLOAT128_VECTOR_P (KFmode)")
70 (TF "FLOAT128_VECTOR_P (TFmode)")
71 (TI "TARGET_VSX_TIMODE")])
72
73 ;; Map into the appropriate load/store name based on the type
74 (define_mode_attr VSm [(V16QI "vw4")
75 (V8HI "vw4")
76 (V4SI "vw4")
77 (V4SF "vw4")
78 (V2DF "vd2")
79 (V2DI "vd2")
80 (DF "d")
81 (TF "vd2")
82 (KF "vd2")
83 (V1TI "vd2")
84 (TI "vd2")])
85
86 ;; Map into the appropriate suffix based on the type
87 (define_mode_attr VSs [(V16QI "sp")
88 (V8HI "sp")
89 (V4SI "sp")
90 (V4SF "sp")
91 (V2DF "dp")
92 (V2DI "dp")
93 (DF "dp")
94 (SF "sp")
95 (TF "dp")
96 (KF "dp")
97 (V1TI "dp")
98 (TI "dp")])
99
100 ;; Map the register class used
101 (define_mode_attr VSr [(V16QI "v")
102 (V8HI "v")
103 (V4SI "v")
104 (V4SF "wf")
105 (V2DI "wd")
106 (V2DF "wd")
107 (DI "wi")
108 (DF "ws")
109 (SF "ww")
110 (TF "wp")
111 (KF "wq")
112 (V1TI "v")
113 (TI "wt")])
114
115 ;; Map the register class used for float<->int conversions (floating point side)
116 ;; VSr2 is the preferred register class, VSr3 is any register class that will
117 ;; hold the data
118 (define_mode_attr VSr2 [(V2DF "wd")
119 (V4SF "wf")
120 (DF "ws")
121 (SF "ww")
122 (DI "wi")])
123
124 (define_mode_attr VSr3 [(V2DF "wa")
125 (V4SF "wa")
126 (DF "ws")
127 (SF "ww")
128 (DI "wi")])
129
130 ;; Map the register class for sp<->dp float conversions, destination
131 (define_mode_attr VSr4 [(SF "ws")
132 (DF "f")
133 (V2DF "wd")
134 (V4SF "v")])
135
136 ;; Map the register class for sp<->dp float conversions, source
137 (define_mode_attr VSr5 [(SF "ws")
138 (DF "f")
139 (V2DF "v")
140 (V4SF "wd")])
141
142 ;; The VSX register class that a type can occupy, even if it is not the
143 ;; preferred register class (VSr is the preferred register class that will get
144 ;; allocated first).
145 (define_mode_attr VSa [(V16QI "wa")
146 (V8HI "wa")
147 (V4SI "wa")
148 (V4SF "wa")
149 (V2DI "wa")
150 (V2DF "wa")
151 (DI "wi")
152 (DF "ws")
153 (SF "ww")
154 (V1TI "wa")
155 (TI "wt")
156 (TF "wp")
157 (KF "wq")])
158
159 ;; Same size integer type for floating point data
160 (define_mode_attr VSi [(V4SF "v4si")
161 (V2DF "v2di")
162 (DF "di")])
163
164 (define_mode_attr VSI [(V4SF "V4SI")
165 (V2DF "V2DI")
166 (DF "DI")])
167
168 ;; Word size for same size conversion
169 (define_mode_attr VSc [(V4SF "w")
170 (V2DF "d")
171 (DF "d")])
172
173 ;; Map into either s or v, depending on whether this is a scalar or vector
174 ;; operation
175 (define_mode_attr VSv [(V16QI "v")
176 (V8HI "v")
177 (V4SI "v")
178 (V4SF "v")
179 (V2DI "v")
180 (V2DF "v")
181 (V1TI "v")
182 (DF "s")
183 (KF "v")])
184
185 ;; Appropriate type for add ops (and other simple FP ops)
186 (define_mode_attr VStype_simple [(V2DF "vecdouble")
187 (V4SF "vecfloat")
188 (DF "fp")])
189
190 (define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
191 (V4SF "fp_addsub_s")
192 (DF "fp_addsub_d")])
193
194 ;; Appropriate type for multiply ops
195 (define_mode_attr VStype_mul [(V2DF "vecdouble")
196 (V4SF "vecfloat")
197 (DF "dmul")])
198
199 (define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
200 (V4SF "fp_mul_s")
201 (DF "fp_mul_d")])
202
203 ;; Appropriate type for divide ops.
204 (define_mode_attr VStype_div [(V2DF "vecdiv")
205 (V4SF "vecfdiv")
206 (DF "ddiv")])
207
208 (define_mode_attr VSfptype_div [(V2DF "fp_div_d")
209 (V4SF "fp_div_s")
210 (DF "fp_div_d")])
211
212 ;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
213 ;; the scalar sqrt
214 (define_mode_attr VStype_sqrt [(V2DF "dsqrt")
215 (V4SF "ssqrt")
216 (DF "dsqrt")])
217
218 (define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
219 (V4SF "fp_sqrt_s")
220 (DF "fp_sqrt_d")])
221
222 ;; Iterator and modes for sp<->dp conversions
223 ;; Because scalar SF values are represented internally as double, use the
224 ;; V4SF type to represent this than SF.
225 (define_mode_iterator VSX_SPDP [DF V4SF V2DF])
226
227 (define_mode_attr VS_spdp_res [(DF "V4SF")
228 (V4SF "V2DF")
229 (V2DF "V4SF")])
230
231 (define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
232 (V4SF "xvcvspdp")
233 (V2DF "xvcvdpsp")])
234
235 (define_mode_attr VS_spdp_type [(DF "fp")
236 (V4SF "vecdouble")
237 (V2DF "vecdouble")])
238
239 ;; Map the scalar mode for a vector type
240 (define_mode_attr VS_scalar [(V1TI "TI")
241 (V2DF "DF")
242 (V2DI "DI")
243 (V4SF "SF")
244 (V4SI "SI")
245 (V8HI "HI")
246 (V16QI "QI")])
247
248 ;; Map to a double-sized vector mode
249 (define_mode_attr VS_double [(V4SI "V8SI")
250 (V4SF "V8SF")
251 (V2DI "V4DI")
252 (V2DF "V4DF")
253 (V1TI "V2TI")])
254
255 ;; Map register class for 64-bit element in 128-bit vector for direct moves
256 ;; to/from gprs
257 (define_mode_attr VS_64dm [(V2DF "wk")
258 (V2DI "wj")])
259
260 ;; Map register class for 64-bit element in 128-bit vector for normal register
261 ;; to register moves
262 (define_mode_attr VS_64reg [(V2DF "ws")
263 (V2DI "wi")])
264
265 ;; Iterators for loading constants with xxspltib
266 (define_mode_iterator VSINT_84 [V4SI V2DI DI])
267 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
268
269 ;; Iterator for ISA 3.0 vector extract/insert of integer vectors
270 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
271
272 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
273 ;; insert to validate the operand number.
274 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
275 (V8HI "const_0_to_7_operand")
276 (V4SI "const_0_to_3_operand")])
277
278 ;; Mode attribute to give the constraint for vector extract and insert
279 ;; operations.
280 (define_mode_attr VSX_EX [(V16QI "v")
281 (V8HI "v")
282 (V4SI "wa")])
283
284 ;; Constants for creating unspecs
285 (define_c_enum "unspec"
286 [UNSPEC_VSX_CONCAT
287 UNSPEC_VSX_CVDPSXWS
288 UNSPEC_VSX_CVDPUXWS
289 UNSPEC_VSX_CVSPDP
290 UNSPEC_VSX_CVSPDPN
291 UNSPEC_VSX_CVDPSPN
292 UNSPEC_VSX_CVSXWDP
293 UNSPEC_VSX_CVUXWDP
294 UNSPEC_VSX_CVSXDSP
295 UNSPEC_VSX_CVUXDSP
296 UNSPEC_VSX_CVSPSXDS
297 UNSPEC_VSX_CVSPUXDS
298 UNSPEC_VSX_TDIV
299 UNSPEC_VSX_TSQRT
300 UNSPEC_VSX_SET
301 UNSPEC_VSX_ROUND_I
302 UNSPEC_VSX_ROUND_IC
303 UNSPEC_VSX_SLDWI
304 UNSPEC_VSX_XXSPLTW
305 UNSPEC_VSX_XXSPLTD
306 UNSPEC_VSX_DIVSD
307 UNSPEC_VSX_DIVUD
308 UNSPEC_VSX_MULSD
309 UNSPEC_VSX_XVCVSXDDP
310 UNSPEC_VSX_XVCVUXDDP
311 UNSPEC_VSX_XVCVDPSXDS
312 UNSPEC_VSX_XVCVDPUXDS
313 UNSPEC_VSX_SIGN_EXTEND
314 UNSPEC_P9_MEMORY
315 UNSPEC_VSX_VSLO
316 UNSPEC_VSX_EXTRACT
317 UNSPEC_VSX_SXEXPDP
318 UNSPEC_VSX_SXSIGDP
319 UNSPEC_VSX_SIEXPDP
320 UNSPEC_VSX_SCMPEXPDP
321 UNSPEC_VSX_STSTDC
322 UNSPEC_VSX_VXEXP
323 UNSPEC_VSX_VXSIG
324 UNSPEC_VSX_VIEXP
325 UNSPEC_VSX_VTSTDC
326 ])
327
328 ;; VSX moves
329
330 ;; The patterns for LE permuted loads and stores come before the general
331 ;; VSX moves so they match first.
332 (define_insn_and_split "*vsx_le_perm_load_<mode>"
333 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
334 (match_operand:VSX_LE 1 "memory_operand" "Z"))]
335 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
336 "#"
337 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
338 [(set (match_dup 2)
339 (vec_select:<MODE>
340 (match_dup 1)
341 (parallel [(const_int 1) (const_int 0)])))
342 (set (match_dup 0)
343 (vec_select:<MODE>
344 (match_dup 2)
345 (parallel [(const_int 1) (const_int 0)])))]
346 "
347 {
348 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
349 : operands[0];
350 }
351 "
352 [(set_attr "type" "vecload")
353 (set_attr "length" "8")])
354
355 (define_insn_and_split "*vsx_le_perm_load_<mode>"
356 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
357 (match_operand:VSX_W 1 "memory_operand" "Z"))]
358 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
359 "#"
360 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
361 [(set (match_dup 2)
362 (vec_select:<MODE>
363 (match_dup 1)
364 (parallel [(const_int 2) (const_int 3)
365 (const_int 0) (const_int 1)])))
366 (set (match_dup 0)
367 (vec_select:<MODE>
368 (match_dup 2)
369 (parallel [(const_int 2) (const_int 3)
370 (const_int 0) (const_int 1)])))]
371 "
372 {
373 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
374 : operands[0];
375 }
376 "
377 [(set_attr "type" "vecload")
378 (set_attr "length" "8")])
379
380 (define_insn_and_split "*vsx_le_perm_load_v8hi"
381 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
382 (match_operand:V8HI 1 "memory_operand" "Z"))]
383 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
384 "#"
385 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
386 [(set (match_dup 2)
387 (vec_select:V8HI
388 (match_dup 1)
389 (parallel [(const_int 4) (const_int 5)
390 (const_int 6) (const_int 7)
391 (const_int 0) (const_int 1)
392 (const_int 2) (const_int 3)])))
393 (set (match_dup 0)
394 (vec_select:V8HI
395 (match_dup 2)
396 (parallel [(const_int 4) (const_int 5)
397 (const_int 6) (const_int 7)
398 (const_int 0) (const_int 1)
399 (const_int 2) (const_int 3)])))]
400 "
401 {
402 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
403 : operands[0];
404 }
405 "
406 [(set_attr "type" "vecload")
407 (set_attr "length" "8")])
408
409 (define_insn_and_split "*vsx_le_perm_load_v16qi"
410 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
411 (match_operand:V16QI 1 "memory_operand" "Z"))]
412 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
413 "#"
414 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
415 [(set (match_dup 2)
416 (vec_select:V16QI
417 (match_dup 1)
418 (parallel [(const_int 8) (const_int 9)
419 (const_int 10) (const_int 11)
420 (const_int 12) (const_int 13)
421 (const_int 14) (const_int 15)
422 (const_int 0) (const_int 1)
423 (const_int 2) (const_int 3)
424 (const_int 4) (const_int 5)
425 (const_int 6) (const_int 7)])))
426 (set (match_dup 0)
427 (vec_select:V16QI
428 (match_dup 2)
429 (parallel [(const_int 8) (const_int 9)
430 (const_int 10) (const_int 11)
431 (const_int 12) (const_int 13)
432 (const_int 14) (const_int 15)
433 (const_int 0) (const_int 1)
434 (const_int 2) (const_int 3)
435 (const_int 4) (const_int 5)
436 (const_int 6) (const_int 7)])))]
437 "
438 {
439 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
440 : operands[0];
441 }
442 "
443 [(set_attr "type" "vecload")
444 (set_attr "length" "8")])
445
446 (define_insn "*vsx_le_perm_store_<mode>"
447 [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
448 (match_operand:VSX_LE 1 "vsx_register_operand" "+<VSa>"))]
449 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
450 "#"
451 [(set_attr "type" "vecstore")
452 (set_attr "length" "12")])
453
454 (define_split
455 [(set (match_operand:VSX_LE 0 "memory_operand" "")
456 (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
457 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
458 [(set (match_dup 2)
459 (vec_select:<MODE>
460 (match_dup 1)
461 (parallel [(const_int 1) (const_int 0)])))
462 (set (match_dup 0)
463 (vec_select:<MODE>
464 (match_dup 2)
465 (parallel [(const_int 1) (const_int 0)])))]
466 {
467 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
468 : operands[1];
469 })
470
471 ;; The post-reload split requires that we re-permute the source
472 ;; register in case it is still live.
473 (define_split
474 [(set (match_operand:VSX_LE 0 "memory_operand" "")
475 (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
476 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
477 [(set (match_dup 1)
478 (vec_select:<MODE>
479 (match_dup 1)
480 (parallel [(const_int 1) (const_int 0)])))
481 (set (match_dup 0)
482 (vec_select:<MODE>
483 (match_dup 1)
484 (parallel [(const_int 1) (const_int 0)])))
485 (set (match_dup 1)
486 (vec_select:<MODE>
487 (match_dup 1)
488 (parallel [(const_int 1) (const_int 0)])))]
489 "")
490
491 (define_insn "*vsx_le_perm_store_<mode>"
492 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
493 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
494 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
495 "#"
496 [(set_attr "type" "vecstore")
497 (set_attr "length" "12")])
498
499 (define_split
500 [(set (match_operand:VSX_W 0 "memory_operand" "")
501 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
502 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
503 [(set (match_dup 2)
504 (vec_select:<MODE>
505 (match_dup 1)
506 (parallel [(const_int 2) (const_int 3)
507 (const_int 0) (const_int 1)])))
508 (set (match_dup 0)
509 (vec_select:<MODE>
510 (match_dup 2)
511 (parallel [(const_int 2) (const_int 3)
512 (const_int 0) (const_int 1)])))]
513 {
514 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
515 : operands[1];
516 })
517
518 ;; The post-reload split requires that we re-permute the source
519 ;; register in case it is still live.
520 (define_split
521 [(set (match_operand:VSX_W 0 "memory_operand" "")
522 (match_operand:VSX_W 1 "vsx_register_operand" ""))]
523 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
524 [(set (match_dup 1)
525 (vec_select:<MODE>
526 (match_dup 1)
527 (parallel [(const_int 2) (const_int 3)
528 (const_int 0) (const_int 1)])))
529 (set (match_dup 0)
530 (vec_select:<MODE>
531 (match_dup 1)
532 (parallel [(const_int 2) (const_int 3)
533 (const_int 0) (const_int 1)])))
534 (set (match_dup 1)
535 (vec_select:<MODE>
536 (match_dup 1)
537 (parallel [(const_int 2) (const_int 3)
538 (const_int 0) (const_int 1)])))]
539 "")
540
541 (define_insn "*vsx_le_perm_store_v8hi"
542 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
543 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
544 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
545 "#"
546 [(set_attr "type" "vecstore")
547 (set_attr "length" "12")])
548
549 (define_split
550 [(set (match_operand:V8HI 0 "memory_operand" "")
551 (match_operand:V8HI 1 "vsx_register_operand" ""))]
552 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
553 [(set (match_dup 2)
554 (vec_select:V8HI
555 (match_dup 1)
556 (parallel [(const_int 4) (const_int 5)
557 (const_int 6) (const_int 7)
558 (const_int 0) (const_int 1)
559 (const_int 2) (const_int 3)])))
560 (set (match_dup 0)
561 (vec_select:V8HI
562 (match_dup 2)
563 (parallel [(const_int 4) (const_int 5)
564 (const_int 6) (const_int 7)
565 (const_int 0) (const_int 1)
566 (const_int 2) (const_int 3)])))]
567 {
568 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
569 : operands[1];
570 })
571
572 ;; The post-reload split requires that we re-permute the source
573 ;; register in case it is still live.
574 (define_split
575 [(set (match_operand:V8HI 0 "memory_operand" "")
576 (match_operand:V8HI 1 "vsx_register_operand" ""))]
577 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
578 [(set (match_dup 1)
579 (vec_select:V8HI
580 (match_dup 1)
581 (parallel [(const_int 4) (const_int 5)
582 (const_int 6) (const_int 7)
583 (const_int 0) (const_int 1)
584 (const_int 2) (const_int 3)])))
585 (set (match_dup 0)
586 (vec_select:V8HI
587 (match_dup 1)
588 (parallel [(const_int 4) (const_int 5)
589 (const_int 6) (const_int 7)
590 (const_int 0) (const_int 1)
591 (const_int 2) (const_int 3)])))
592 (set (match_dup 1)
593 (vec_select:V8HI
594 (match_dup 1)
595 (parallel [(const_int 4) (const_int 5)
596 (const_int 6) (const_int 7)
597 (const_int 0) (const_int 1)
598 (const_int 2) (const_int 3)])))]
599 "")
600
601 (define_insn "*vsx_le_perm_store_v16qi"
602 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
603 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
604 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
605 "#"
606 [(set_attr "type" "vecstore")
607 (set_attr "length" "12")])
608
609 (define_split
610 [(set (match_operand:V16QI 0 "memory_operand" "")
611 (match_operand:V16QI 1 "vsx_register_operand" ""))]
612 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
613 [(set (match_dup 2)
614 (vec_select:V16QI
615 (match_dup 1)
616 (parallel [(const_int 8) (const_int 9)
617 (const_int 10) (const_int 11)
618 (const_int 12) (const_int 13)
619 (const_int 14) (const_int 15)
620 (const_int 0) (const_int 1)
621 (const_int 2) (const_int 3)
622 (const_int 4) (const_int 5)
623 (const_int 6) (const_int 7)])))
624 (set (match_dup 0)
625 (vec_select:V16QI
626 (match_dup 2)
627 (parallel [(const_int 8) (const_int 9)
628 (const_int 10) (const_int 11)
629 (const_int 12) (const_int 13)
630 (const_int 14) (const_int 15)
631 (const_int 0) (const_int 1)
632 (const_int 2) (const_int 3)
633 (const_int 4) (const_int 5)
634 (const_int 6) (const_int 7)])))]
635 {
636 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
637 : operands[1];
638 })
639
640 ;; The post-reload split requires that we re-permute the source
641 ;; register in case it is still live.
642 (define_split
643 [(set (match_operand:V16QI 0 "memory_operand" "")
644 (match_operand:V16QI 1 "vsx_register_operand" ""))]
645 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
646 [(set (match_dup 1)
647 (vec_select:V16QI
648 (match_dup 1)
649 (parallel [(const_int 8) (const_int 9)
650 (const_int 10) (const_int 11)
651 (const_int 12) (const_int 13)
652 (const_int 14) (const_int 15)
653 (const_int 0) (const_int 1)
654 (const_int 2) (const_int 3)
655 (const_int 4) (const_int 5)
656 (const_int 6) (const_int 7)])))
657 (set (match_dup 0)
658 (vec_select:V16QI
659 (match_dup 1)
660 (parallel [(const_int 8) (const_int 9)
661 (const_int 10) (const_int 11)
662 (const_int 12) (const_int 13)
663 (const_int 14) (const_int 15)
664 (const_int 0) (const_int 1)
665 (const_int 2) (const_int 3)
666 (const_int 4) (const_int 5)
667 (const_int 6) (const_int 7)])))
668 (set (match_dup 1)
669 (vec_select:V16QI
670 (match_dup 1)
671 (parallel [(const_int 8) (const_int 9)
672 (const_int 10) (const_int 11)
673 (const_int 12) (const_int 13)
674 (const_int 14) (const_int 15)
675 (const_int 0) (const_int 1)
676 (const_int 2) (const_int 3)
677 (const_int 4) (const_int 5)
678 (const_int 6) (const_int 7)])))]
679 "")
680
681 ;; Little endian word swapping for 128-bit types that are either scalars or the
682 ;; special V1TI container class, which it is not appropriate to use vec_select
683 ;; for the type.
684 (define_insn "*vsx_le_permute_<mode>"
685 [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
686 (rotate:VSX_LE_128
687 (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
688 (const_int 64)))]
689 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
690 "@
691 xxpermdi %x0,%x1,%x1,2
692 lxvd2x %x0,%y1
693 stxvd2x %x1,%y0"
694 [(set_attr "length" "4")
695 (set_attr "type" "vecperm,vecload,vecstore")])
696
697 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
698 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
699 (rotate:VSX_LE_128
700 (rotate:VSX_LE_128
701 (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
702 (const_int 64))
703 (const_int 64)))]
704 "!BYTES_BIG_ENDIAN && TARGET_VSX"
705 "@
706 #
707 xxlor %x0,%x1"
708 ""
709 [(set (match_dup 0) (match_dup 1))]
710 {
711 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
712 {
713 emit_note (NOTE_INSN_DELETED);
714 DONE;
715 }
716 }
717 [(set_attr "length" "0,4")
718 (set_attr "type" "veclogical")])
719
720 (define_insn_and_split "*vsx_le_perm_load_<mode>"
721 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>")
722 (match_operand:VSX_LE_128 1 "memory_operand" "Z"))]
723 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
724 "#"
725 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
726 [(set (match_dup 2)
727 (rotate:VSX_LE_128 (match_dup 1)
728 (const_int 64)))
729 (set (match_dup 0)
730 (rotate:VSX_LE_128 (match_dup 2)
731 (const_int 64)))]
732 "
733 {
734 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
735 : operands[0];
736 }
737 "
738 [(set_attr "type" "vecload")
739 (set_attr "length" "8")])
740
741 (define_insn "*vsx_le_perm_store_<mode>"
742 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z")
743 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>"))]
744 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
745 "#"
746 [(set_attr "type" "vecstore")
747 (set_attr "length" "12")])
748
749 (define_split
750 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
751 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
752 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
753 [(set (match_dup 2)
754 (rotate:VSX_LE_128 (match_dup 1)
755 (const_int 64)))
756 (set (match_dup 0)
757 (rotate:VSX_LE_128 (match_dup 2)
758 (const_int 64)))]
759 {
760 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
761 : operands[0];
762 })
763
764 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
765 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
766 ;; floating point are handled by the more generic swap elimination pass.
767 (define_peephole2
768 [(set (match_operand:TI 0 "vsx_register_operand" "")
769 (rotate:TI (match_operand:TI 1 "vsx_register_operand" "")
770 (const_int 64)))
771 (set (match_operand:TI 2 "vsx_register_operand" "")
772 (rotate:TI (match_dup 0)
773 (const_int 64)))]
774 "!BYTES_BIG_ENDIAN && TARGET_VSX && TARGET_VSX_TIMODE && !TARGET_P9_VECTOR
775 && (rtx_equal_p (operands[0], operands[2])
776 || peep2_reg_dead_p (2, operands[0]))"
777 [(set (match_dup 2) (match_dup 1))])
778
779 ;; The post-reload split requires that we re-permute the source
780 ;; register in case it is still live.
781 (define_split
782 [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
783 (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
784 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
785 [(set (match_dup 1)
786 (rotate:VSX_LE_128 (match_dup 1)
787 (const_int 64)))
788 (set (match_dup 0)
789 (rotate:VSX_LE_128 (match_dup 1)
790 (const_int 64)))
791 (set (match_dup 1)
792 (rotate:VSX_LE_128 (match_dup 1)
793 (const_int 64)))]
794 "")
795
796 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
797 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
798 (define_insn "xxspltib_v16qi"
799 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
800 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
801 "TARGET_P9_VECTOR"
802 {
803 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
804 return "xxspltib %x0,%2";
805 }
806 [(set_attr "type" "vecperm")])
807
808 (define_insn "xxspltib_<mode>_nosplit"
809 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
810 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
811 "TARGET_P9_VECTOR"
812 {
813 rtx op1 = operands[1];
814 int value = 256;
815 int num_insns = -1;
816
817 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
818 || num_insns != 1)
819 gcc_unreachable ();
820
821 operands[2] = GEN_INT (value & 0xff);
822 return "xxspltib %x0,%2";
823 }
824 [(set_attr "type" "vecperm")])
825
826 (define_insn_and_split "*xxspltib_<mode>_split"
827 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
828 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
829 "TARGET_P9_VECTOR"
830 "#"
831 "&& 1"
832 [(const_int 0)]
833 {
834 int value = 256;
835 int num_insns = -1;
836 rtx op0 = operands[0];
837 rtx op1 = operands[1];
838 rtx tmp = ((can_create_pseudo_p ())
839 ? gen_reg_rtx (V16QImode)
840 : gen_lowpart (V16QImode, op0));
841
842 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
843 || num_insns != 2)
844 gcc_unreachable ();
845
846 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
847
848 if (<MODE>mode == V2DImode)
849 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
850
851 else if (<MODE>mode == V4SImode)
852 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
853
854 else if (<MODE>mode == V8HImode)
855 emit_insn (gen_altivec_vupkhsb (op0, tmp));
856
857 else
858 gcc_unreachable ();
859
860 DONE;
861 }
862 [(set_attr "type" "vecperm")
863 (set_attr "length" "8")])
864
865
866 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
867 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
868 ;; all 1's, since the machine does not have to wait for the previous
869 ;; instruction using the register being set (such as a store waiting on a slow
870 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
871
872 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
873 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
874 ;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
875 (define_insn "*vsx_mov<mode>_64bit"
876 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
877 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,
878 ?&r, ??r, ??Y, ??r, wo, v,
879 ?<VSa>, *r, v, ??r, wZ, v")
880
881 (match_operand:VSX_M 1 "input_operand"
882 "<VSa>, ZwO, <VSa>, we, r, r,
883 wQ, Y, r, r, wE, jwM,
884 ?jwM, jwM, W, W, v, wZ"))]
885
886 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
887 && (register_operand (operands[0], <MODE>mode)
888 || register_operand (operands[1], <MODE>mode))"
889 {
890 return rs6000_output_move_128bit (operands);
891 }
892 [(set_attr "type"
893 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
894 store, load, store, *, vecsimple, vecsimple,
895 vecsimple, *, *, *, vecstore, vecload")
896
897 (set_attr "length"
898 "4, 4, 4, 8, 4, 8,
899 8, 8, 8, 8, 4, 4,
900 4, 8, 20, 20, 4, 4")])
901
902 ;; VSX store VSX load VSX move GPR load GPR store GPR move
903 ;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const
904 ;; LVX (VMX) STVX (VMX)
905 (define_insn "*vsx_mov<mode>_32bit"
906 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
907 "=ZwO, <VSa>, <VSa>, ??r, ??Y, ??r,
908 wo, v, ?<VSa>, *r, v, ??r,
909 wZ, v")
910
911 (match_operand:VSX_M 1 "input_operand"
912 "<VSa>, ZwO, <VSa>, Y, r, r,
913 wE, jwM, ?jwM, jwM, W, W,
914 v, wZ"))]
915
916 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
917 && (register_operand (operands[0], <MODE>mode)
918 || register_operand (operands[1], <MODE>mode))"
919 {
920 return rs6000_output_move_128bit (operands);
921 }
922 [(set_attr "type"
923 "vecstore, vecload, vecsimple, load, store, *,
924 vecsimple, vecsimple, vecsimple, *, *, *,
925 vecstore, vecload")
926
927 (set_attr "length"
928 "4, 4, 4, 16, 16, 16,
929 4, 4, 4, 16, 20, 32,
930 4, 4")])
931
932 ;; Explicit load/store expanders for the builtin functions
933 (define_expand "vsx_load_<mode>"
934 [(set (match_operand:VSX_M 0 "vsx_register_operand" "")
935 (match_operand:VSX_M 1 "memory_operand" ""))]
936 "VECTOR_MEM_VSX_P (<MODE>mode)"
937 {
938 /* Expand to swaps if needed, prior to swap optimization. */
939 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
940 {
941 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
942 DONE;
943 }
944 })
945
946 (define_expand "vsx_store_<mode>"
947 [(set (match_operand:VSX_M 0 "memory_operand" "")
948 (match_operand:VSX_M 1 "vsx_register_operand" ""))]
949 "VECTOR_MEM_VSX_P (<MODE>mode)"
950 {
951 /* Expand to swaps if needed, prior to swap optimization. */
952 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
953 {
954 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
955 DONE;
956 }
957 })
958
959 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
960 ;; when you really want their element-reversing behavior.
961 (define_insn "vsx_ld_elemrev_v2di"
962 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
963 (vec_select:V2DI
964 (match_operand:V2DI 1 "memory_operand" "Z")
965 (parallel [(const_int 1) (const_int 0)])))]
966 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
967 "lxvd2x %x0,%y1"
968 [(set_attr "type" "vecload")])
969
970 (define_insn "vsx_ld_elemrev_v2df"
971 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
972 (vec_select:V2DF
973 (match_operand:V2DF 1 "memory_operand" "Z")
974 (parallel [(const_int 1) (const_int 0)])))]
975 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
976 "lxvd2x %x0,%y1"
977 [(set_attr "type" "vecload")])
978
979 (define_insn "vsx_ld_elemrev_v4si"
980 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
981 (vec_select:V4SI
982 (match_operand:V4SI 1 "memory_operand" "Z")
983 (parallel [(const_int 3) (const_int 2)
984 (const_int 1) (const_int 0)])))]
985 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
986 "lxvw4x %x0,%y1"
987 [(set_attr "type" "vecload")])
988
989 (define_insn "vsx_ld_elemrev_v4sf"
990 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
991 (vec_select:V4SF
992 (match_operand:V4SF 1 "memory_operand" "Z")
993 (parallel [(const_int 3) (const_int 2)
994 (const_int 1) (const_int 0)])))]
995 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
996 "lxvw4x %x0,%y1"
997 [(set_attr "type" "vecload")])
998
999 (define_insn "vsx_ld_elemrev_v8hi"
1000 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1001 (vec_select:V8HI
1002 (match_operand:V8HI 1 "memory_operand" "Z")
1003 (parallel [(const_int 7) (const_int 6)
1004 (const_int 5) (const_int 4)
1005 (const_int 3) (const_int 2)
1006 (const_int 1) (const_int 0)])))]
1007 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1008 "lxvh8x %x0,%y1"
1009 [(set_attr "type" "vecload")])
1010
1011 (define_insn "vsx_ld_elemrev_v16qi"
1012 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1013 (vec_select:V16QI
1014 (match_operand:V16QI 1 "memory_operand" "Z")
1015 (parallel [(const_int 15) (const_int 14)
1016 (const_int 13) (const_int 12)
1017 (const_int 11) (const_int 10)
1018 (const_int 9) (const_int 8)
1019 (const_int 7) (const_int 6)
1020 (const_int 5) (const_int 4)
1021 (const_int 3) (const_int 2)
1022 (const_int 1) (const_int 0)])))]
1023 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1024 "lxvb16x %x0,%y1"
1025 [(set_attr "type" "vecload")])
1026
1027 (define_insn "vsx_st_elemrev_v2df"
1028 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1029 (vec_select:V2DF
1030 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1031 (parallel [(const_int 1) (const_int 0)])))]
1032 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1033 "stxvd2x %x1,%y0"
1034 [(set_attr "type" "vecstore")])
1035
1036 (define_insn "vsx_st_elemrev_v2di"
1037 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1038 (vec_select:V2DI
1039 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1040 (parallel [(const_int 1) (const_int 0)])))]
1041 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1042 "stxvd2x %x1,%y0"
1043 [(set_attr "type" "vecstore")])
1044
1045 (define_insn "vsx_st_elemrev_v4sf"
1046 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1047 (vec_select:V4SF
1048 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1049 (parallel [(const_int 3) (const_int 2)
1050 (const_int 1) (const_int 0)])))]
1051 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1052 "stxvw4x %x1,%y0"
1053 [(set_attr "type" "vecstore")])
1054
1055 (define_insn "vsx_st_elemrev_v4si"
1056 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1057 (vec_select:V4SI
1058 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1059 (parallel [(const_int 3) (const_int 2)
1060 (const_int 1) (const_int 0)])))]
1061 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1062 "stxvw4x %x1,%y0"
1063 [(set_attr "type" "vecstore")])
1064
1065 (define_insn "vsx_st_elemrev_v8hi"
1066 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1067 (vec_select:V8HI
1068 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1069 (parallel [(const_int 7) (const_int 6)
1070 (const_int 5) (const_int 4)
1071 (const_int 3) (const_int 2)
1072 (const_int 1) (const_int 0)])))]
1073 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1074 "stxvh8x %x1,%y0"
1075 [(set_attr "type" "vecstore")])
1076
1077 (define_insn "vsx_st_elemrev_v16qi"
1078 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1079 (vec_select:V16QI
1080 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1081 (parallel [(const_int 15) (const_int 14)
1082 (const_int 13) (const_int 12)
1083 (const_int 11) (const_int 10)
1084 (const_int 9) (const_int 8)
1085 (const_int 7) (const_int 6)
1086 (const_int 5) (const_int 4)
1087 (const_int 3) (const_int 2)
1088 (const_int 1) (const_int 0)])))]
1089 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1090 "stxvb16x %x1,%y0"
1091 [(set_attr "type" "vecstore")])
1092
1093 \f
1094 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1095 ;; instructions are now combined with the insn for the traditional floating
1096 ;; point unit.
1097 (define_insn "*vsx_add<mode>3"
1098 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1099 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1100 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1101 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1102 "xvadd<VSs> %x0,%x1,%x2"
1103 [(set_attr "type" "<VStype_simple>")
1104 (set_attr "fp_type" "<VSfptype_simple>")])
1105
1106 (define_insn "*vsx_sub<mode>3"
1107 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1108 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1109 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1110 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1111 "xvsub<VSs> %x0,%x1,%x2"
1112 [(set_attr "type" "<VStype_simple>")
1113 (set_attr "fp_type" "<VSfptype_simple>")])
1114
1115 (define_insn "*vsx_mul<mode>3"
1116 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1117 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1118 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1119 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1120 "xvmul<VSs> %x0,%x1,%x2"
1121 [(set_attr "type" "<VStype_simple>")
1122 (set_attr "fp_type" "<VSfptype_mul>")])
1123
1124 ; Emulate vector with scalar for vec_mul in V2DImode
1125 (define_insn_and_split "vsx_mul_v2di"
1126 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1127 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1128 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1129 UNSPEC_VSX_MULSD))]
1130 "VECTOR_MEM_VSX_P (V2DImode)"
1131 "#"
1132 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1133 [(const_int 0)]
1134 "
1135 {
1136 rtx op0 = operands[0];
1137 rtx op1 = operands[1];
1138 rtx op2 = operands[2];
1139 rtx op3 = gen_reg_rtx (DImode);
1140 rtx op4 = gen_reg_rtx (DImode);
1141 rtx op5 = gen_reg_rtx (DImode);
1142 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1143 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1144 emit_insn (gen_muldi3 (op5, op3, op4));
1145 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1146 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1147 emit_insn (gen_muldi3 (op3, op3, op4));
1148 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1149 DONE;
1150 }"
1151 [(set_attr "type" "mul")])
1152
1153 (define_insn "*vsx_div<mode>3"
1154 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1155 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1156 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1157 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1158 "xvdiv<VSs> %x0,%x1,%x2"
1159 [(set_attr "type" "<VStype_div>")
1160 (set_attr "fp_type" "<VSfptype_div>")])
1161
1162 ; Emulate vector with scalar for vec_div in V2DImode
1163 (define_insn_and_split "vsx_div_v2di"
1164 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1165 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1166 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1167 UNSPEC_VSX_DIVSD))]
1168 "VECTOR_MEM_VSX_P (V2DImode)"
1169 "#"
1170 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1171 [(const_int 0)]
1172 "
1173 {
1174 rtx op0 = operands[0];
1175 rtx op1 = operands[1];
1176 rtx op2 = operands[2];
1177 rtx op3 = gen_reg_rtx (DImode);
1178 rtx op4 = gen_reg_rtx (DImode);
1179 rtx op5 = gen_reg_rtx (DImode);
1180 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1181 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1182 emit_insn (gen_divdi3 (op5, op3, op4));
1183 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1184 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1185 emit_insn (gen_divdi3 (op3, op3, op4));
1186 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1187 DONE;
1188 }"
1189 [(set_attr "type" "div")])
1190
1191 (define_insn_and_split "vsx_udiv_v2di"
1192 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1193 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1194 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1195 UNSPEC_VSX_DIVUD))]
1196 "VECTOR_MEM_VSX_P (V2DImode)"
1197 "#"
1198 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
1199 [(const_int 0)]
1200 "
1201 {
1202 rtx op0 = operands[0];
1203 rtx op1 = operands[1];
1204 rtx op2 = operands[2];
1205 rtx op3 = gen_reg_rtx (DImode);
1206 rtx op4 = gen_reg_rtx (DImode);
1207 rtx op5 = gen_reg_rtx (DImode);
1208 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1209 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1210 emit_insn (gen_udivdi3 (op5, op3, op4));
1211 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1212 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1213 emit_insn (gen_udivdi3 (op3, op3, op4));
1214 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1215 DONE;
1216 }"
1217 [(set_attr "type" "div")])
1218
1219 ;; *tdiv* instruction returning the FG flag
1220 (define_expand "vsx_tdiv<mode>3_fg"
1221 [(set (match_dup 3)
1222 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1223 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1224 UNSPEC_VSX_TDIV))
1225 (set (match_operand:SI 0 "gpc_reg_operand" "")
1226 (gt:SI (match_dup 3)
1227 (const_int 0)))]
1228 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1229 {
1230 operands[3] = gen_reg_rtx (CCFPmode);
1231 })
1232
1233 ;; *tdiv* instruction returning the FE flag
1234 (define_expand "vsx_tdiv<mode>3_fe"
1235 [(set (match_dup 3)
1236 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")
1237 (match_operand:VSX_B 2 "vsx_register_operand" "")]
1238 UNSPEC_VSX_TDIV))
1239 (set (match_operand:SI 0 "gpc_reg_operand" "")
1240 (eq:SI (match_dup 3)
1241 (const_int 0)))]
1242 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1243 {
1244 operands[3] = gen_reg_rtx (CCFPmode);
1245 })
1246
1247 (define_insn "*vsx_tdiv<mode>3_internal"
1248 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1249 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1250 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
1251 UNSPEC_VSX_TDIV))]
1252 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1253 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1254 [(set_attr "type" "<VStype_simple>")
1255 (set_attr "fp_type" "<VSfptype_simple>")])
1256
1257 (define_insn "vsx_fre<mode>2"
1258 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1259 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1260 UNSPEC_FRES))]
1261 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1262 "xvre<VSs> %x0,%x1"
1263 [(set_attr "type" "<VStype_simple>")
1264 (set_attr "fp_type" "<VSfptype_simple>")])
1265
1266 (define_insn "*vsx_neg<mode>2"
1267 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1268 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1269 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1270 "xvneg<VSs> %x0,%x1"
1271 [(set_attr "type" "<VStype_simple>")
1272 (set_attr "fp_type" "<VSfptype_simple>")])
1273
1274 (define_insn "*vsx_abs<mode>2"
1275 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1276 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1277 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1278 "xvabs<VSs> %x0,%x1"
1279 [(set_attr "type" "<VStype_simple>")
1280 (set_attr "fp_type" "<VSfptype_simple>")])
1281
1282 (define_insn "vsx_nabs<mode>2"
1283 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1284 (neg:VSX_F
1285 (abs:VSX_F
1286 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
1287 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1288 "xvnabs<VSs> %x0,%x1"
1289 [(set_attr "type" "<VStype_simple>")
1290 (set_attr "fp_type" "<VSfptype_simple>")])
1291
1292 (define_insn "vsx_smax<mode>3"
1293 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1294 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1295 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1296 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1297 "xvmax<VSs> %x0,%x1,%x2"
1298 [(set_attr "type" "<VStype_simple>")
1299 (set_attr "fp_type" "<VSfptype_simple>")])
1300
1301 (define_insn "*vsx_smin<mode>3"
1302 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1303 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1304 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1305 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1306 "xvmin<VSs> %x0,%x1,%x2"
1307 [(set_attr "type" "<VStype_simple>")
1308 (set_attr "fp_type" "<VSfptype_simple>")])
1309
1310 (define_insn "*vsx_sqrt<mode>2"
1311 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1312 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1313 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1314 "xvsqrt<VSs> %x0,%x1"
1315 [(set_attr "type" "<VStype_sqrt>")
1316 (set_attr "fp_type" "<VSfptype_sqrt>")])
1317
1318 (define_insn "*vsx_rsqrte<mode>2"
1319 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1320 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1321 UNSPEC_RSQRT))]
1322 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1323 "xvrsqrte<VSs> %x0,%x1"
1324 [(set_attr "type" "<VStype_simple>")
1325 (set_attr "fp_type" "<VSfptype_simple>")])
1326
1327 ;; *tsqrt* returning the fg flag
1328 (define_expand "vsx_tsqrt<mode>2_fg"
1329 [(set (match_dup 3)
1330 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1331 UNSPEC_VSX_TSQRT))
1332 (set (match_operand:SI 0 "gpc_reg_operand" "")
1333 (gt:SI (match_dup 3)
1334 (const_int 0)))]
1335 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1336 {
1337 operands[3] = gen_reg_rtx (CCFPmode);
1338 })
1339
1340 ;; *tsqrt* returning the fe flag
1341 (define_expand "vsx_tsqrt<mode>2_fe"
1342 [(set (match_dup 3)
1343 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "")]
1344 UNSPEC_VSX_TSQRT))
1345 (set (match_operand:SI 0 "gpc_reg_operand" "")
1346 (eq:SI (match_dup 3)
1347 (const_int 0)))]
1348 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1349 {
1350 operands[3] = gen_reg_rtx (CCFPmode);
1351 })
1352
1353 (define_insn "*vsx_tsqrt<mode>2_internal"
1354 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
1355 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1356 UNSPEC_VSX_TSQRT))]
1357 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1358 "x<VSv>tsqrt<VSs> %0,%x1"
1359 [(set_attr "type" "<VStype_simple>")
1360 (set_attr "fp_type" "<VSfptype_simple>")])
1361
1362 ;; Fused vector multiply/add instructions. Support the classical Altivec
1363 ;; versions of fma, which allows the target to be a separate register from the
1364 ;; 3 inputs. Under VSX, the target must be either the addend or the first
1365 ;; multiply.
1366
1367 (define_insn "*vsx_fmav4sf4"
1368 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1369 (fma:V4SF
1370 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1371 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1372 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
1373 "VECTOR_UNIT_VSX_P (V4SFmode)"
1374 "@
1375 xvmaddasp %x0,%x1,%x2
1376 xvmaddmsp %x0,%x1,%x3
1377 xvmaddasp %x0,%x1,%x2
1378 xvmaddmsp %x0,%x1,%x3
1379 vmaddfp %0,%1,%2,%3"
1380 [(set_attr "type" "vecfloat")])
1381
1382 (define_insn "*vsx_fmav2df4"
1383 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1384 (fma:V2DF
1385 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1386 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1387 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
1388 "VECTOR_UNIT_VSX_P (V2DFmode)"
1389 "@
1390 xvmaddadp %x0,%x1,%x2
1391 xvmaddmdp %x0,%x1,%x3
1392 xvmaddadp %x0,%x1,%x2
1393 xvmaddmdp %x0,%x1,%x3"
1394 [(set_attr "type" "vecdouble")])
1395
1396 (define_insn "*vsx_fms<mode>4"
1397 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1398 (fma:VSX_F
1399 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1400 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1401 (neg:VSX_F
1402 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1403 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1404 "@
1405 xvmsuba<VSs> %x0,%x1,%x2
1406 xvmsubm<VSs> %x0,%x1,%x3
1407 xvmsuba<VSs> %x0,%x1,%x2
1408 xvmsubm<VSs> %x0,%x1,%x3"
1409 [(set_attr "type" "<VStype_mul>")])
1410
1411 (define_insn "*vsx_nfma<mode>4"
1412 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
1413 (neg:VSX_F
1414 (fma:VSX_F
1415 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1416 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1417 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
1418 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1419 "@
1420 xvnmadda<VSs> %x0,%x1,%x2
1421 xvnmaddm<VSs> %x0,%x1,%x3
1422 xvnmadda<VSs> %x0,%x1,%x2
1423 xvnmaddm<VSs> %x0,%x1,%x3"
1424 [(set_attr "type" "<VStype_mul>")
1425 (set_attr "fp_type" "<VSfptype_mul>")])
1426
1427 (define_insn "*vsx_nfmsv4sf4"
1428 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1429 (neg:V4SF
1430 (fma:V4SF
1431 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1432 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1433 (neg:V4SF
1434 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1435 "VECTOR_UNIT_VSX_P (V4SFmode)"
1436 "@
1437 xvnmsubasp %x0,%x1,%x2
1438 xvnmsubmsp %x0,%x1,%x3
1439 xvnmsubasp %x0,%x1,%x2
1440 xvnmsubmsp %x0,%x1,%x3
1441 vnmsubfp %0,%1,%2,%3"
1442 [(set_attr "type" "vecfloat")])
1443
1444 (define_insn "*vsx_nfmsv2df4"
1445 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
1446 (neg:V2DF
1447 (fma:V2DF
1448 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1449 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1450 (neg:V2DF
1451 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
1452 "VECTOR_UNIT_VSX_P (V2DFmode)"
1453 "@
1454 xvnmsubadp %x0,%x1,%x2
1455 xvnmsubmdp %x0,%x1,%x3
1456 xvnmsubadp %x0,%x1,%x2
1457 xvnmsubmdp %x0,%x1,%x3"
1458 [(set_attr "type" "vecdouble")])
1459
1460 ;; Vector conditional expressions (no scalar version for these instructions)
1461 (define_insn "vsx_eq<mode>"
1462 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1463 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1464 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1465 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1466 "xvcmpeq<VSs> %x0,%x1,%x2"
1467 [(set_attr "type" "<VStype_simple>")
1468 (set_attr "fp_type" "<VSfptype_simple>")])
1469
1470 (define_insn "vsx_gt<mode>"
1471 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1472 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1473 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1474 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1475 "xvcmpgt<VSs> %x0,%x1,%x2"
1476 [(set_attr "type" "<VStype_simple>")
1477 (set_attr "fp_type" "<VSfptype_simple>")])
1478
1479 (define_insn "*vsx_ge<mode>"
1480 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1481 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1482 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
1483 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1484 "xvcmpge<VSs> %x0,%x1,%x2"
1485 [(set_attr "type" "<VStype_simple>")
1486 (set_attr "fp_type" "<VSfptype_simple>")])
1487
1488 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
1489 ;; indicate a combined status
1490 (define_insn "*vsx_eq_<mode>_p"
1491 [(set (reg:CC 74)
1492 (unspec:CC
1493 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1494 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1495 UNSPEC_PREDICATE))
1496 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1497 (eq:VSX_F (match_dup 1)
1498 (match_dup 2)))]
1499 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1500 "xvcmpeq<VSs>. %x0,%x1,%x2"
1501 [(set_attr "type" "<VStype_simple>")])
1502
1503 (define_insn "*vsx_gt_<mode>_p"
1504 [(set (reg:CC 74)
1505 (unspec:CC
1506 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1507 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1508 UNSPEC_PREDICATE))
1509 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1510 (gt:VSX_F (match_dup 1)
1511 (match_dup 2)))]
1512 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1513 "xvcmpgt<VSs>. %x0,%x1,%x2"
1514 [(set_attr "type" "<VStype_simple>")])
1515
1516 (define_insn "*vsx_ge_<mode>_p"
1517 [(set (reg:CC 74)
1518 (unspec:CC
1519 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
1520 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
1521 UNSPEC_PREDICATE))
1522 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1523 (ge:VSX_F (match_dup 1)
1524 (match_dup 2)))]
1525 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1526 "xvcmpge<VSs>. %x0,%x1,%x2"
1527 [(set_attr "type" "<VStype_simple>")])
1528
1529 ;; Vector select
1530 (define_insn "*vsx_xxsel<mode>"
1531 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1532 (if_then_else:VSX_L
1533 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1534 (match_operand:VSX_L 4 "zero_constant" ""))
1535 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1536 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1537 "VECTOR_MEM_VSX_P (<MODE>mode)"
1538 "xxsel %x0,%x3,%x2,%x1"
1539 [(set_attr "type" "vecmove")])
1540
1541 (define_insn "*vsx_xxsel<mode>_uns"
1542 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1543 (if_then_else:VSX_L
1544 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
1545 (match_operand:VSX_L 4 "zero_constant" ""))
1546 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
1547 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
1548 "VECTOR_MEM_VSX_P (<MODE>mode)"
1549 "xxsel %x0,%x3,%x2,%x1"
1550 [(set_attr "type" "vecmove")])
1551
1552 ;; Copy sign
1553 (define_insn "vsx_copysign<mode>3"
1554 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1555 (unspec:VSX_F
1556 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1557 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
1558 UNSPEC_COPYSIGN))]
1559 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1560 "xvcpsgn<VSs> %x0,%x2,%x1"
1561 [(set_attr "type" "<VStype_simple>")
1562 (set_attr "fp_type" "<VSfptype_simple>")])
1563
1564 ;; For the conversions, limit the register class for the integer value to be
1565 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
1566 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
1567 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
1568 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
1569 ;; in allowing virtual registers.
1570 (define_insn "vsx_float<VSi><mode>2"
1571 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1572 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1573 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1574 "xvcvsx<VSc><VSs> %x0,%x1"
1575 [(set_attr "type" "<VStype_simple>")
1576 (set_attr "fp_type" "<VSfptype_simple>")])
1577
1578 (define_insn "vsx_floatuns<VSi><mode>2"
1579 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
1580 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
1581 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1582 "xvcvux<VSc><VSs> %x0,%x1"
1583 [(set_attr "type" "<VStype_simple>")
1584 (set_attr "fp_type" "<VSfptype_simple>")])
1585
1586 (define_insn "vsx_fix_trunc<mode><VSi>2"
1587 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1588 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1589 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1590 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
1591 [(set_attr "type" "<VStype_simple>")
1592 (set_attr "fp_type" "<VSfptype_simple>")])
1593
1594 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
1595 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
1596 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
1597 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1598 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
1599 [(set_attr "type" "<VStype_simple>")
1600 (set_attr "fp_type" "<VSfptype_simple>")])
1601
1602 ;; Math rounding functions
1603 (define_insn "vsx_x<VSv>r<VSs>i"
1604 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1605 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1606 UNSPEC_VSX_ROUND_I))]
1607 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1608 "x<VSv>r<VSs>i %x0,%x1"
1609 [(set_attr "type" "<VStype_simple>")
1610 (set_attr "fp_type" "<VSfptype_simple>")])
1611
1612 (define_insn "vsx_x<VSv>r<VSs>ic"
1613 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1614 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1615 UNSPEC_VSX_ROUND_IC))]
1616 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1617 "x<VSv>r<VSs>ic %x0,%x1"
1618 [(set_attr "type" "<VStype_simple>")
1619 (set_attr "fp_type" "<VSfptype_simple>")])
1620
1621 (define_insn "vsx_btrunc<mode>2"
1622 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1623 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
1624 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1625 "xvr<VSs>iz %x0,%x1"
1626 [(set_attr "type" "<VStype_simple>")
1627 (set_attr "fp_type" "<VSfptype_simple>")])
1628
1629 (define_insn "*vsx_b2trunc<mode>2"
1630 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1631 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
1632 UNSPEC_FRIZ))]
1633 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1634 "x<VSv>r<VSs>iz %x0,%x1"
1635 [(set_attr "type" "<VStype_simple>")
1636 (set_attr "fp_type" "<VSfptype_simple>")])
1637
1638 (define_insn "vsx_floor<mode>2"
1639 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1640 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1641 UNSPEC_FRIM))]
1642 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1643 "xvr<VSs>im %x0,%x1"
1644 [(set_attr "type" "<VStype_simple>")
1645 (set_attr "fp_type" "<VSfptype_simple>")])
1646
1647 (define_insn "vsx_ceil<mode>2"
1648 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1649 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
1650 UNSPEC_FRIP))]
1651 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1652 "xvr<VSs>ip %x0,%x1"
1653 [(set_attr "type" "<VStype_simple>")
1654 (set_attr "fp_type" "<VSfptype_simple>")])
1655
1656 \f
1657 ;; VSX convert to/from double vector
1658
1659 ;; Convert between single and double precision
1660 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
1661 ;; scalar single precision instructions internally use the double format.
1662 ;; Prefer the altivec registers, since we likely will need to do a vperm
1663 (define_insn "vsx_<VS_spdp_insn>"
1664 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
1665 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
1666 UNSPEC_VSX_CVSPDP))]
1667 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1668 "<VS_spdp_insn> %x0,%x1"
1669 [(set_attr "type" "<VS_spdp_type>")])
1670
1671 ;; xscvspdp, represent the scalar SF type as V4SF
1672 (define_insn "vsx_xscvspdp"
1673 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
1674 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1675 UNSPEC_VSX_CVSPDP))]
1676 "VECTOR_UNIT_VSX_P (V4SFmode)"
1677 "xscvspdp %x0,%x1"
1678 [(set_attr "type" "fp")])
1679
1680 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
1681 ;; format of scalars is actually DF.
1682 (define_insn "vsx_xscvdpsp_scalar"
1683 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1684 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
1685 UNSPEC_VSX_CVSPDP))]
1686 "VECTOR_UNIT_VSX_P (V4SFmode)"
1687 "xscvdpsp %x0,%x1"
1688 [(set_attr "type" "fp")])
1689
1690 ;; Same as vsx_xscvspdp, but use SF as the type
1691 (define_insn "vsx_xscvspdp_scalar2"
1692 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
1693 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
1694 UNSPEC_VSX_CVSPDP))]
1695 "VECTOR_UNIT_VSX_P (V4SFmode)"
1696 "xscvspdp %x0,%x1"
1697 [(set_attr "type" "fp")])
1698
1699 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
1700 (define_insn "vsx_xscvdpspn"
1701 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww,?ww")
1702 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
1703 UNSPEC_VSX_CVDPSPN))]
1704 "TARGET_XSCVDPSPN"
1705 "xscvdpspn %x0,%x1"
1706 [(set_attr "type" "fp")])
1707
1708 (define_insn "vsx_xscvspdpn"
1709 [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?ws")
1710 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
1711 UNSPEC_VSX_CVSPDPN))]
1712 "TARGET_XSCVSPDPN"
1713 "xscvspdpn %x0,%x1"
1714 [(set_attr "type" "fp")])
1715
1716 (define_insn "vsx_xscvdpspn_scalar"
1717 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,?wa")
1718 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww,ww")]
1719 UNSPEC_VSX_CVDPSPN))]
1720 "TARGET_XSCVDPSPN"
1721 "xscvdpspn %x0,%x1"
1722 [(set_attr "type" "fp")])
1723
1724 ;; Used by direct move to move a SFmode value from GPR to VSX register
1725 (define_insn "vsx_xscvspdpn_directmove"
1726 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
1727 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
1728 UNSPEC_VSX_CVSPDPN))]
1729 "TARGET_XSCVSPDPN"
1730 "xscvspdpn %x0,%x1"
1731 [(set_attr "type" "fp")])
1732
1733 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
1734
1735 (define_expand "vsx_xvcvsxddp_scale"
1736 [(match_operand:V2DF 0 "vsx_register_operand" "")
1737 (match_operand:V2DI 1 "vsx_register_operand" "")
1738 (match_operand:QI 2 "immediate_operand" "")]
1739 "VECTOR_UNIT_VSX_P (V2DFmode)"
1740 {
1741 rtx op0 = operands[0];
1742 rtx op1 = operands[1];
1743 int scale = INTVAL(operands[2]);
1744 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
1745 if (scale != 0)
1746 rs6000_scale_v2df (op0, op0, -scale);
1747 DONE;
1748 })
1749
1750 (define_insn "vsx_xvcvsxddp"
1751 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1752 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1753 UNSPEC_VSX_XVCVSXDDP))]
1754 "VECTOR_UNIT_VSX_P (V2DFmode)"
1755 "xvcvsxddp %x0,%x1"
1756 [(set_attr "type" "vecdouble")])
1757
1758 (define_expand "vsx_xvcvuxddp_scale"
1759 [(match_operand:V2DF 0 "vsx_register_operand" "")
1760 (match_operand:V2DI 1 "vsx_register_operand" "")
1761 (match_operand:QI 2 "immediate_operand" "")]
1762 "VECTOR_UNIT_VSX_P (V2DFmode)"
1763 {
1764 rtx op0 = operands[0];
1765 rtx op1 = operands[1];
1766 int scale = INTVAL(operands[2]);
1767 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
1768 if (scale != 0)
1769 rs6000_scale_v2df (op0, op0, -scale);
1770 DONE;
1771 })
1772
1773 (define_insn "vsx_xvcvuxddp"
1774 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1775 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
1776 UNSPEC_VSX_XVCVUXDDP))]
1777 "VECTOR_UNIT_VSX_P (V2DFmode)"
1778 "xvcvuxddp %x0,%x1"
1779 [(set_attr "type" "vecdouble")])
1780
1781 (define_expand "vsx_xvcvdpsxds_scale"
1782 [(match_operand:V2DI 0 "vsx_register_operand" "")
1783 (match_operand:V2DF 1 "vsx_register_operand" "")
1784 (match_operand:QI 2 "immediate_operand" "")]
1785 "VECTOR_UNIT_VSX_P (V2DFmode)"
1786 {
1787 rtx op0 = operands[0];
1788 rtx op1 = operands[1];
1789 rtx tmp;
1790 int scale = INTVAL (operands[2]);
1791 if (scale == 0)
1792 tmp = op1;
1793 else
1794 {
1795 tmp = gen_reg_rtx (V2DFmode);
1796 rs6000_scale_v2df (tmp, op1, scale);
1797 }
1798 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
1799 DONE;
1800 })
1801
1802 (define_insn "vsx_xvcvdpsxds"
1803 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1804 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1805 UNSPEC_VSX_XVCVDPSXDS))]
1806 "VECTOR_UNIT_VSX_P (V2DFmode)"
1807 "xvcvdpsxds %x0,%x1"
1808 [(set_attr "type" "vecdouble")])
1809
1810 (define_expand "vsx_xvcvdpuxds_scale"
1811 [(match_operand:V2DI 0 "vsx_register_operand" "")
1812 (match_operand:V2DF 1 "vsx_register_operand" "")
1813 (match_operand:QI 2 "immediate_operand" "")]
1814 "VECTOR_UNIT_VSX_P (V2DFmode)"
1815 {
1816 rtx op0 = operands[0];
1817 rtx op1 = operands[1];
1818 rtx tmp;
1819 int scale = INTVAL (operands[2]);
1820 if (scale == 0)
1821 tmp = op1;
1822 else
1823 {
1824 tmp = gen_reg_rtx (V2DFmode);
1825 rs6000_scale_v2df (tmp, op1, scale);
1826 }
1827 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
1828 DONE;
1829 })
1830
1831 (define_insn "vsx_xvcvdpuxds"
1832 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1833 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
1834 UNSPEC_VSX_XVCVDPUXDS))]
1835 "VECTOR_UNIT_VSX_P (V2DFmode)"
1836 "xvcvdpuxds %x0,%x1"
1837 [(set_attr "type" "vecdouble")])
1838
1839 ;; Convert from 64-bit to 32-bit types
1840 ;; Note, favor the Altivec registers since the usual use of these instructions
1841 ;; is in vector converts and we need to use the Altivec vperm instruction.
1842
1843 (define_insn "vsx_xvcvdpsxws"
1844 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1845 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1846 UNSPEC_VSX_CVDPSXWS))]
1847 "VECTOR_UNIT_VSX_P (V2DFmode)"
1848 "xvcvdpsxws %x0,%x1"
1849 [(set_attr "type" "vecdouble")])
1850
1851 (define_insn "vsx_xvcvdpuxws"
1852 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
1853 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
1854 UNSPEC_VSX_CVDPUXWS))]
1855 "VECTOR_UNIT_VSX_P (V2DFmode)"
1856 "xvcvdpuxws %x0,%x1"
1857 [(set_attr "type" "vecdouble")])
1858
1859 (define_insn "vsx_xvcvsxdsp"
1860 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1861 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1862 UNSPEC_VSX_CVSXDSP))]
1863 "VECTOR_UNIT_VSX_P (V2DFmode)"
1864 "xvcvsxdsp %x0,%x1"
1865 [(set_attr "type" "vecfloat")])
1866
1867 (define_insn "vsx_xvcvuxdsp"
1868 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wd,?wa")
1869 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
1870 UNSPEC_VSX_CVUXDSP))]
1871 "VECTOR_UNIT_VSX_P (V2DFmode)"
1872 "xvcvuxwdp %x0,%x1"
1873 [(set_attr "type" "vecdouble")])
1874
1875 ;; Convert from 32-bit to 64-bit types
1876 (define_insn "vsx_xvcvsxwdp"
1877 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1878 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1879 UNSPEC_VSX_CVSXWDP))]
1880 "VECTOR_UNIT_VSX_P (V2DFmode)"
1881 "xvcvsxwdp %x0,%x1"
1882 [(set_attr "type" "vecdouble")])
1883
1884 (define_insn "vsx_xvcvuxwdp"
1885 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1886 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
1887 UNSPEC_VSX_CVUXWDP))]
1888 "VECTOR_UNIT_VSX_P (V2DFmode)"
1889 "xvcvuxwdp %x0,%x1"
1890 [(set_attr "type" "vecdouble")])
1891
1892 (define_insn "vsx_xvcvspsxds"
1893 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1894 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1895 UNSPEC_VSX_CVSPSXDS))]
1896 "VECTOR_UNIT_VSX_P (V2DFmode)"
1897 "xvcvspsxds %x0,%x1"
1898 [(set_attr "type" "vecdouble")])
1899
1900 (define_insn "vsx_xvcvspuxds"
1901 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
1902 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
1903 UNSPEC_VSX_CVSPUXDS))]
1904 "VECTOR_UNIT_VSX_P (V2DFmode)"
1905 "xvcvspuxds %x0,%x1"
1906 [(set_attr "type" "vecdouble")])
1907
1908 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
1909 ;; since the xvrdpiz instruction does not truncate the value if the floating
1910 ;; point value is < LONG_MIN or > LONG_MAX.
1911 (define_insn "*vsx_float_fix_v2df2"
1912 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1913 (float:V2DF
1914 (fix:V2DI
1915 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
1916 "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
1917 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
1918 && !flag_trapping_math && TARGET_FRIZ"
1919 "xvrdpiz %x0,%x1"
1920 [(set_attr "type" "vecdouble")
1921 (set_attr "fp_type" "fp_addsub_d")])
1922
1923 \f
1924 ;; Permute operations
1925
1926 ;; Build a V2DF/V2DI vector from two scalars
1927 (define_insn "vsx_concat_<mode>"
1928 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1929 (vec_concat:VSX_D
1930 (match_operand:<VS_scalar> 1 "vsx_register_operand" "<VS_64reg>,<VSa>")
1931 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")))]
1932 "VECTOR_MEM_VSX_P (<MODE>mode)"
1933 {
1934 if (BYTES_BIG_ENDIAN)
1935 return "xxpermdi %x0,%x1,%x2,0";
1936 else
1937 return "xxpermdi %x0,%x2,%x1,0";
1938 }
1939 [(set_attr "type" "vecperm")])
1940
1941 ;; Special purpose concat using xxpermdi to glue two single precision values
1942 ;; together, relying on the fact that internally scalar floats are represented
1943 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
1944 (define_insn "vsx_concat_v2sf"
1945 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
1946 (unspec:V2DF
1947 [(match_operand:SF 1 "vsx_register_operand" "f,f")
1948 (match_operand:SF 2 "vsx_register_operand" "f,f")]
1949 UNSPEC_VSX_CONCAT))]
1950 "VECTOR_MEM_VSX_P (V2DFmode)"
1951 {
1952 if (BYTES_BIG_ENDIAN)
1953 return "xxpermdi %x0,%x1,%x2,0";
1954 else
1955 return "xxpermdi %x0,%x2,%x1,0";
1956 }
1957 [(set_attr "type" "vecperm")])
1958
1959 ;; xxpermdi for little endian loads and stores. We need several of
1960 ;; these since the form of the PARALLEL differs by mode.
1961 (define_insn "*vsx_xxpermdi2_le_<mode>"
1962 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
1963 (vec_select:VSX_LE
1964 (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
1965 (parallel [(const_int 1) (const_int 0)])))]
1966 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1967 "xxpermdi %x0,%x1,%x1,2"
1968 [(set_attr "type" "vecperm")])
1969
1970 (define_insn "*vsx_xxpermdi4_le_<mode>"
1971 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
1972 (vec_select:VSX_W
1973 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
1974 (parallel [(const_int 2) (const_int 3)
1975 (const_int 0) (const_int 1)])))]
1976 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
1977 "xxpermdi %x0,%x1,%x1,2"
1978 [(set_attr "type" "vecperm")])
1979
1980 (define_insn "*vsx_xxpermdi8_le_V8HI"
1981 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1982 (vec_select:V8HI
1983 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1984 (parallel [(const_int 4) (const_int 5)
1985 (const_int 6) (const_int 7)
1986 (const_int 0) (const_int 1)
1987 (const_int 2) (const_int 3)])))]
1988 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
1989 "xxpermdi %x0,%x1,%x1,2"
1990 [(set_attr "type" "vecperm")])
1991
1992 (define_insn "*vsx_xxpermdi16_le_V16QI"
1993 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1994 (vec_select:V16QI
1995 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1996 (parallel [(const_int 8) (const_int 9)
1997 (const_int 10) (const_int 11)
1998 (const_int 12) (const_int 13)
1999 (const_int 14) (const_int 15)
2000 (const_int 0) (const_int 1)
2001 (const_int 2) (const_int 3)
2002 (const_int 4) (const_int 5)
2003 (const_int 6) (const_int 7)])))]
2004 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
2005 "xxpermdi %x0,%x1,%x1,2"
2006 [(set_attr "type" "vecperm")])
2007
2008 ;; lxvd2x for little endian loads. We need several of
2009 ;; these since the form of the PARALLEL differs by mode.
2010 (define_insn "*vsx_lxvd2x2_le_<mode>"
2011 [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=<VSa>")
2012 (vec_select:VSX_LE
2013 (match_operand:VSX_LE 1 "memory_operand" "Z")
2014 (parallel [(const_int 1) (const_int 0)])))]
2015 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2016 "lxvd2x %x0,%y1"
2017 [(set_attr "type" "vecload")])
2018
2019 (define_insn "*vsx_lxvd2x4_le_<mode>"
2020 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2021 (vec_select:VSX_W
2022 (match_operand:VSX_W 1 "memory_operand" "Z")
2023 (parallel [(const_int 2) (const_int 3)
2024 (const_int 0) (const_int 1)])))]
2025 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2026 "lxvd2x %x0,%y1"
2027 [(set_attr "type" "vecload")])
2028
2029 (define_insn "*vsx_lxvd2x8_le_V8HI"
2030 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2031 (vec_select:V8HI
2032 (match_operand:V8HI 1 "memory_operand" "Z")
2033 (parallel [(const_int 4) (const_int 5)
2034 (const_int 6) (const_int 7)
2035 (const_int 0) (const_int 1)
2036 (const_int 2) (const_int 3)])))]
2037 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2038 "lxvd2x %x0,%y1"
2039 [(set_attr "type" "vecload")])
2040
2041 (define_insn "*vsx_lxvd2x16_le_V16QI"
2042 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2043 (vec_select:V16QI
2044 (match_operand:V16QI 1 "memory_operand" "Z")
2045 (parallel [(const_int 8) (const_int 9)
2046 (const_int 10) (const_int 11)
2047 (const_int 12) (const_int 13)
2048 (const_int 14) (const_int 15)
2049 (const_int 0) (const_int 1)
2050 (const_int 2) (const_int 3)
2051 (const_int 4) (const_int 5)
2052 (const_int 6) (const_int 7)])))]
2053 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2054 "lxvd2x %x0,%y1"
2055 [(set_attr "type" "vecload")])
2056
2057 ;; stxvd2x for little endian stores. We need several of
2058 ;; these since the form of the PARALLEL differs by mode.
2059 (define_insn "*vsx_stxvd2x2_le_<mode>"
2060 [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
2061 (vec_select:VSX_LE
2062 (match_operand:VSX_LE 1 "vsx_register_operand" "<VSa>")
2063 (parallel [(const_int 1) (const_int 0)])))]
2064 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2065 "stxvd2x %x1,%y0"
2066 [(set_attr "type" "vecstore")])
2067
2068 (define_insn "*vsx_stxvd2x4_le_<mode>"
2069 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
2070 (vec_select:VSX_W
2071 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
2072 (parallel [(const_int 2) (const_int 3)
2073 (const_int 0) (const_int 1)])))]
2074 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
2075 "stxvd2x %x1,%y0"
2076 [(set_attr "type" "vecstore")])
2077
2078 (define_insn "*vsx_stxvd2x8_le_V8HI"
2079 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
2080 (vec_select:V8HI
2081 (match_operand:V8HI 1 "vsx_register_operand" "wa")
2082 (parallel [(const_int 4) (const_int 5)
2083 (const_int 6) (const_int 7)
2084 (const_int 0) (const_int 1)
2085 (const_int 2) (const_int 3)])))]
2086 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
2087 "stxvd2x %x1,%y0"
2088 [(set_attr "type" "vecstore")])
2089
2090 (define_insn "*vsx_stxvd2x16_le_V16QI"
2091 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
2092 (vec_select:V16QI
2093 (match_operand:V16QI 1 "vsx_register_operand" "wa")
2094 (parallel [(const_int 8) (const_int 9)
2095 (const_int 10) (const_int 11)
2096 (const_int 12) (const_int 13)
2097 (const_int 14) (const_int 15)
2098 (const_int 0) (const_int 1)
2099 (const_int 2) (const_int 3)
2100 (const_int 4) (const_int 5)
2101 (const_int 6) (const_int 7)])))]
2102 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
2103 "stxvd2x %x1,%y0"
2104 [(set_attr "type" "vecstore")])
2105
2106 ;; Convert a TImode value into V1TImode
2107 (define_expand "vsx_set_v1ti"
2108 [(match_operand:V1TI 0 "nonimmediate_operand" "")
2109 (match_operand:V1TI 1 "nonimmediate_operand" "")
2110 (match_operand:TI 2 "input_operand" "")
2111 (match_operand:QI 3 "u5bit_cint_operand" "")]
2112 "VECTOR_MEM_VSX_P (V1TImode)"
2113 {
2114 if (operands[3] != const0_rtx)
2115 gcc_unreachable ();
2116
2117 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
2118 DONE;
2119 })
2120
2121 ;; Set the element of a V2DI/VD2F mode
2122 (define_insn "vsx_set_<mode>"
2123 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
2124 (unspec:VSX_D
2125 [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
2126 (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
2127 (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
2128 UNSPEC_VSX_SET))]
2129 "VECTOR_MEM_VSX_P (<MODE>mode)"
2130 {
2131 int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
2132 if (INTVAL (operands[3]) == idx_first)
2133 return \"xxpermdi %x0,%x2,%x1,1\";
2134 else if (INTVAL (operands[3]) == 1 - idx_first)
2135 return \"xxpermdi %x0,%x1,%x2,0\";
2136 else
2137 gcc_unreachable ();
2138 }
2139 [(set_attr "type" "vecperm")])
2140
2141 ;; Extract a DF/DI element from V2DF/V2DI
2142 ;; Optimize cases were we can do a simple or direct move.
2143 ;; Or see if we can avoid doing the move at all
2144
2145 ;; There are some unresolved problems with reload that show up if an Altivec
2146 ;; register was picked. Limit the scalar value to FPRs for now.
2147
2148 (define_insn "vsx_extract_<mode>"
2149 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
2150
2151 (vec_select:<VS_scalar>
2152 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo")
2153
2154 (parallel
2155 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
2156 "VECTOR_MEM_VSX_P (<MODE>mode)"
2157 {
2158 int element = INTVAL (operands[2]);
2159 int op0_regno = REGNO (operands[0]);
2160 int op1_regno = REGNO (operands[1]);
2161 int fldDM;
2162
2163 gcc_assert (IN_RANGE (element, 0, 1));
2164 gcc_assert (VSX_REGNO_P (op1_regno));
2165
2166 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
2167 {
2168 if (op0_regno == op1_regno)
2169 return ASM_COMMENT_START " vec_extract to same register";
2170
2171 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
2172 && TARGET_POWERPC64)
2173 return "mfvsrd %0,%x1";
2174
2175 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
2176 return "fmr %0,%1";
2177
2178 else if (VSX_REGNO_P (op0_regno))
2179 return "xxlor %x0,%x1,%x1";
2180
2181 else
2182 gcc_unreachable ();
2183 }
2184
2185 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
2186 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
2187 return "mfvsrld %0,%x1";
2188
2189 else if (VSX_REGNO_P (op0_regno))
2190 {
2191 fldDM = element << 1;
2192 if (!BYTES_BIG_ENDIAN)
2193 fldDM = 3 - fldDM;
2194 operands[3] = GEN_INT (fldDM);
2195 return "xxpermdi %x0,%x1,%x1,%3";
2196 }
2197
2198 else
2199 gcc_unreachable ();
2200 }
2201 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
2202
2203 ;; Optimize extracting a single scalar element from memory.
2204 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
2205 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
2206 (vec_select:<VSX_D:VS_scalar>
2207 (match_operand:VSX_D 1 "memory_operand" "m,m")
2208 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
2209 (clobber (match_scratch:P 3 "=&b,&b"))]
2210 "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
2211 "#"
2212 "&& reload_completed"
2213 [(set (match_dup 0) (match_dup 4))]
2214 {
2215 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2216 operands[3], <VSX_D:VS_scalar>mode);
2217 }
2218 [(set_attr "type" "fpload,load")
2219 (set_attr "length" "8")])
2220
2221 ;; Optimize storing a single scalar element that is the right location to
2222 ;; memory
2223 (define_insn "*vsx_extract_<mode>_store"
2224 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,o")
2225 (vec_select:<VS_scalar>
2226 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
2227 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
2228 "VECTOR_MEM_VSX_P (<MODE>mode)"
2229 "@
2230 stfd%U0%X0 %1,%0
2231 stxsd%U0x %x1,%y0
2232 stxsd %1,%0"
2233 [(set_attr "type" "fpstore")
2234 (set_attr "length" "4")])
2235
2236 ;; Variable V2DI/V2DF extract shift
2237 (define_insn "vsx_vslo_<mode>"
2238 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
2239 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
2240 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
2241 UNSPEC_VSX_VSLO))]
2242 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2243 "vslo %0,%1,%2"
2244 [(set_attr "type" "vecperm")])
2245
2246 ;; Variable V2DI/V2DF extract
2247 (define_insn_and_split "vsx_extract_<mode>_var"
2248 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
2249 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
2250 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2251 UNSPEC_VSX_EXTRACT))
2252 (clobber (match_scratch:DI 3 "=r,&b,&b"))
2253 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2254 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2255 "#"
2256 "&& reload_completed"
2257 [(const_int 0)]
2258 {
2259 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2260 operands[3], operands[4]);
2261 DONE;
2262 })
2263
2264 ;; Extract a SF element from V4SF
2265 (define_insn_and_split "vsx_extract_v4sf"
2266 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2267 (vec_select:SF
2268 (match_operand:V4SF 1 "vsx_register_operand" "wa")
2269 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
2270 (clobber (match_scratch:V4SF 3 "=0"))]
2271 "VECTOR_UNIT_VSX_P (V4SFmode)"
2272 "#"
2273 "&& 1"
2274 [(const_int 0)]
2275 {
2276 rtx op0 = operands[0];
2277 rtx op1 = operands[1];
2278 rtx op2 = operands[2];
2279 rtx op3 = operands[3];
2280 rtx tmp;
2281 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
2282
2283 if (ele == 0)
2284 tmp = op1;
2285 else
2286 {
2287 if (GET_CODE (op3) == SCRATCH)
2288 op3 = gen_reg_rtx (V4SFmode);
2289 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
2290 tmp = op3;
2291 }
2292 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
2293 DONE;
2294 }
2295 [(set_attr "length" "8")
2296 (set_attr "type" "fp")])
2297
2298 (define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
2299 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
2300 (vec_select:SF
2301 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
2302 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
2303 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
2304 "VECTOR_MEM_VSX_P (V4SFmode)"
2305 "#"
2306 "&& reload_completed"
2307 [(set (match_dup 0) (match_dup 4))]
2308 {
2309 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2310 operands[3], SFmode);
2311 }
2312 [(set_attr "type" "fpload,fpload,fpload,load")
2313 (set_attr "length" "8")])
2314
2315 ;; Variable V4SF extract
2316 (define_insn_and_split "vsx_extract_v4sf_var"
2317 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
2318 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
2319 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
2320 UNSPEC_VSX_EXTRACT))
2321 (clobber (match_scratch:DI 3 "=r,&b,&b"))
2322 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
2323 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT
2324 && TARGET_UPPER_REGS_SF"
2325 "#"
2326 "&& reload_completed"
2327 [(const_int 0)]
2328 {
2329 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2330 operands[3], operands[4]);
2331 DONE;
2332 })
2333
2334 ;; Expand the builtin form of xxpermdi to canonical rtl.
2335 (define_expand "vsx_xxpermdi_<mode>"
2336 [(match_operand:VSX_L 0 "vsx_register_operand" "")
2337 (match_operand:VSX_L 1 "vsx_register_operand" "")
2338 (match_operand:VSX_L 2 "vsx_register_operand" "")
2339 (match_operand:QI 3 "u5bit_cint_operand" "")]
2340 "VECTOR_MEM_VSX_P (<MODE>mode)"
2341 {
2342 rtx target = operands[0];
2343 rtx op0 = operands[1];
2344 rtx op1 = operands[2];
2345 int mask = INTVAL (operands[3]);
2346 rtx perm0 = GEN_INT ((mask >> 1) & 1);
2347 rtx perm1 = GEN_INT ((mask & 1) + 2);
2348 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
2349
2350 if (<MODE>mode == V2DFmode)
2351 gen = gen_vsx_xxpermdi2_v2df_1;
2352 else
2353 {
2354 gen = gen_vsx_xxpermdi2_v2di_1;
2355 if (<MODE>mode != V2DImode)
2356 {
2357 target = gen_lowpart (V2DImode, target);
2358 op0 = gen_lowpart (V2DImode, op0);
2359 op1 = gen_lowpart (V2DImode, op1);
2360 }
2361 }
2362 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
2363 transformation we don't want; it is necessary for
2364 rs6000_expand_vec_perm_const_1 but not for this use. So we
2365 prepare for that by reversing the transformation here. */
2366 if (BYTES_BIG_ENDIAN)
2367 emit_insn (gen (target, op0, op1, perm0, perm1));
2368 else
2369 {
2370 rtx p0 = GEN_INT (3 - INTVAL (perm1));
2371 rtx p1 = GEN_INT (3 - INTVAL (perm0));
2372 emit_insn (gen (target, op1, op0, p0, p1));
2373 }
2374 DONE;
2375 })
2376
2377 (define_insn "vsx_xxpermdi2_<mode>_1"
2378 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
2379 (vec_select:VSX_D
2380 (vec_concat:<VS_double>
2381 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
2382 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
2383 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2384 (match_operand 4 "const_2_to_3_operand" "")])))]
2385 "VECTOR_MEM_VSX_P (<MODE>mode)"
2386 {
2387 int op3, op4, mask;
2388
2389 /* For little endian, swap operands and invert/swap selectors
2390 to get the correct xxpermdi. The operand swap sets up the
2391 inputs as a little endian array. The selectors are swapped
2392 because they are defined to use big endian ordering. The
2393 selectors are inverted to get the correct doublewords for
2394 little endian ordering. */
2395 if (BYTES_BIG_ENDIAN)
2396 {
2397 op3 = INTVAL (operands[3]);
2398 op4 = INTVAL (operands[4]);
2399 }
2400 else
2401 {
2402 op3 = 3 - INTVAL (operands[4]);
2403 op4 = 3 - INTVAL (operands[3]);
2404 }
2405
2406 mask = (op3 << 1) | (op4 - 2);
2407 operands[3] = GEN_INT (mask);
2408
2409 if (BYTES_BIG_ENDIAN)
2410 return "xxpermdi %x0,%x1,%x2,%3";
2411 else
2412 return "xxpermdi %x0,%x2,%x1,%3";
2413 }
2414 [(set_attr "type" "vecperm")])
2415
2416 (define_expand "vec_perm_const<mode>"
2417 [(match_operand:VSX_D 0 "vsx_register_operand" "")
2418 (match_operand:VSX_D 1 "vsx_register_operand" "")
2419 (match_operand:VSX_D 2 "vsx_register_operand" "")
2420 (match_operand:V2DI 3 "" "")]
2421 "VECTOR_MEM_VSX_P (<MODE>mode)"
2422 {
2423 if (rs6000_expand_vec_perm_const (operands))
2424 DONE;
2425 else
2426 FAIL;
2427 })
2428
2429 ;; Extraction of a single element in a small integer vector. None of the small
2430 ;; types are currently allowed in a vector register, so we extract to a DImode
2431 ;; and either do a direct move or store.
2432 (define_expand "vsx_extract_<mode>"
2433 [(parallel [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand")
2434 (vec_select:<VS_scalar>
2435 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
2436 (parallel [(match_operand:QI 2 "const_int_operand")])))
2437 (clobber (match_dup 3))])]
2438 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2439 {
2440 operands[3] = gen_rtx_SCRATCH ((TARGET_VEXTRACTUB) ? DImode : <MODE>mode);
2441 })
2442
2443 ;; Under ISA 3.0, we can use the byte/half-word/word integer stores if we are
2444 ;; extracting a vector element and storing it to memory, rather than using
2445 ;; direct move to a GPR and a GPR store.
2446 (define_insn_and_split "*vsx_extract_<mode>_p9"
2447 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r,Z")
2448 (vec_select:<VS_scalar>
2449 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,<VSX_EX>")
2450 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
2451 (clobber (match_scratch:DI 3 "=<VSX_EX>,<VSX_EX>"))]
2452 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
2453 "#"
2454 "&& (reload_completed || MEM_P (operands[0]))"
2455 [(const_int 0)]
2456 {
2457 rtx dest = operands[0];
2458 rtx src = operands[1];
2459 rtx element = operands[2];
2460 rtx di_tmp = operands[3];
2461
2462 if (GET_CODE (di_tmp) == SCRATCH)
2463 di_tmp = gen_reg_rtx (DImode);
2464
2465 emit_insn (gen_vsx_extract_<mode>_di (di_tmp, src, element));
2466
2467 if (REG_P (dest))
2468 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), di_tmp);
2469 else if (SUBREG_P (dest))
2470 emit_move_insn (gen_rtx_REG (DImode, subreg_regno (dest)), di_tmp);
2471 else if (MEM_P (operands[0]))
2472 {
2473 if (can_create_pseudo_p ())
2474 dest = rs6000_address_for_fpconvert (dest);
2475
2476 if (<MODE>mode == V16QImode)
2477 emit_insn (gen_p9_stxsibx (dest, di_tmp));
2478 else if (<MODE>mode == V8HImode)
2479 emit_insn (gen_p9_stxsihx (dest, di_tmp));
2480 else if (<MODE>mode == V4SImode)
2481 emit_insn (gen_stfiwx (dest, di_tmp));
2482 else
2483 gcc_unreachable ();
2484 }
2485 else
2486 gcc_unreachable ();
2487
2488 DONE;
2489 }
2490 [(set_attr "type" "vecsimple,fpstore")])
2491
2492 (define_insn "vsx_extract_<mode>_di"
2493 [(set (match_operand:DI 0 "gpc_reg_operand" "=<VSX_EX>")
2494 (zero_extend:DI
2495 (vec_select:<VS_scalar>
2496 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>")
2497 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))))]
2498 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
2499 {
2500 int element = INTVAL (operands[2]);
2501 int unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
2502 int offset = ((VECTOR_ELT_ORDER_BIG)
2503 ? unit_size * element
2504 : unit_size * (GET_MODE_NUNITS (<MODE>mode) - 1 - element));
2505
2506 operands[2] = GEN_INT (offset);
2507 if (unit_size == 4)
2508 return "xxextractuw %x0,%x1,%2";
2509 else
2510 return "vextractu<wd> %0,%1,%2";
2511 }
2512 [(set_attr "type" "vecsimple")])
2513
2514 (define_insn_and_split "*vsx_extract_<mode>_p8"
2515 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
2516 (vec_select:<VS_scalar>
2517 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
2518 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
2519 (clobber (match_scratch:VSX_EXTRACT_I 3 "=v"))]
2520 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2521 "#"
2522 "&& reload_completed"
2523 [(const_int 0)]
2524 {
2525 rtx dest = operands[0];
2526 rtx src = operands[1];
2527 rtx element = operands[2];
2528 rtx vec_tmp = operands[3];
2529 int value;
2530
2531 if (!VECTOR_ELT_ORDER_BIG)
2532 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
2533
2534 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
2535 instruction. */
2536 value = INTVAL (element);
2537 if (<MODE>mode == V16QImode)
2538 {
2539 if (value != 7)
2540 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
2541 else
2542 vec_tmp = src;
2543 }
2544 else if (<MODE>mode == V8HImode)
2545 {
2546 if (value != 3)
2547 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
2548 else
2549 vec_tmp = src;
2550 }
2551 else if (<MODE>mode == V4SImode)
2552 {
2553 if (value != 1)
2554 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
2555 else
2556 vec_tmp = src;
2557 }
2558 else
2559 gcc_unreachable ();
2560
2561 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
2562 gen_rtx_REG (DImode, REGNO (vec_tmp)));
2563 DONE;
2564 }
2565 [(set_attr "type" "mftgpr")])
2566
2567 ;; Optimize extracting a single scalar element from memory.
2568 (define_insn_and_split "*vsx_extract_<mode>_load"
2569 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
2570 (vec_select:<VS_scalar>
2571 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
2572 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
2573 (clobber (match_scratch:DI 3 "=&b"))]
2574 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2575 "#"
2576 "&& reload_completed"
2577 [(set (match_dup 0) (match_dup 4))]
2578 {
2579 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
2580 operands[3], <VS_scalar>mode);
2581 }
2582 [(set_attr "type" "load")
2583 (set_attr "length" "8")])
2584
2585 ;; Variable V16QI/V8HI/V4SI extract
2586 (define_insn_and_split "vsx_extract_<mode>_var"
2587 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r")
2588 (unspec:<VS_scalar>
2589 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "v,m")
2590 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
2591 UNSPEC_VSX_EXTRACT))
2592 (clobber (match_scratch:DI 3 "=r,&b"))
2593 (clobber (match_scratch:V2DI 4 "=&v,X"))]
2594 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
2595 "#"
2596 "&& reload_completed"
2597 [(const_int 0)]
2598 {
2599 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
2600 operands[3], operands[4]);
2601 DONE;
2602 })
2603
2604 ;; Expanders for builtins
2605 (define_expand "vsx_mergel_<mode>"
2606 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2607 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2608 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2609 "VECTOR_MEM_VSX_P (<MODE>mode)"
2610 {
2611 rtvec v;
2612 rtx x;
2613
2614 /* Special handling for LE with -maltivec=be. */
2615 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2616 {
2617 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2618 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2619 }
2620 else
2621 {
2622 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2623 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2624 }
2625
2626 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2627 emit_insn (gen_rtx_SET (operands[0], x));
2628 DONE;
2629 })
2630
2631 (define_expand "vsx_mergeh_<mode>"
2632 [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
2633 (use (match_operand:VSX_D 1 "vsx_register_operand" ""))
2634 (use (match_operand:VSX_D 2 "vsx_register_operand" ""))]
2635 "VECTOR_MEM_VSX_P (<MODE>mode)"
2636 {
2637 rtvec v;
2638 rtx x;
2639
2640 /* Special handling for LE with -maltivec=be. */
2641 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
2642 {
2643 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
2644 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
2645 }
2646 else
2647 {
2648 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
2649 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
2650 }
2651
2652 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
2653 emit_insn (gen_rtx_SET (operands[0], x));
2654 DONE;
2655 })
2656
2657 ;; V2DF/V2DI splat
2658 (define_insn "vsx_splat_<mode>"
2659 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>,<VSa>,we")
2660 (vec_duplicate:VSX_D
2661 (match_operand:<VS_scalar> 1 "splat_input_operand" "<VS_64reg>,Z,b")))]
2662 "VECTOR_MEM_VSX_P (<MODE>mode)"
2663 "@
2664 xxpermdi %x0,%x1,%x1,0
2665 lxvdsx %x0,%y1
2666 mtvsrdd %x0,%1,%1"
2667 [(set_attr "type" "vecperm,vecload,mftgpr")])
2668
2669 ;; V4SI splat (ISA 3.0)
2670 ;; When SI's are allowed in VSX registers, add XXSPLTW support
2671 (define_expand "vsx_splat_<mode>"
2672 [(set (match_operand:VSX_W 0 "vsx_register_operand" "")
2673 (vec_duplicate:VSX_W
2674 (match_operand:<VS_scalar> 1 "splat_input_operand" "")))]
2675 "TARGET_P9_VECTOR"
2676 {
2677 if (MEM_P (operands[1]))
2678 operands[1] = rs6000_address_for_fpconvert (operands[1]);
2679 else if (!REG_P (operands[1]))
2680 operands[1] = force_reg (<VS_scalar>mode, operands[1]);
2681 })
2682
2683 (define_insn "*vsx_splat_v4si_internal"
2684 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
2685 (vec_duplicate:V4SI
2686 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
2687 "TARGET_P9_VECTOR"
2688 "@
2689 mtvsrws %x0,%1
2690 lxvwsx %x0,%y1"
2691 [(set_attr "type" "mftgpr,vecload")])
2692
2693 ;; V4SF splat (ISA 3.0)
2694 (define_insn_and_split "*vsx_splat_v4sf_internal"
2695 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
2696 (vec_duplicate:V4SF
2697 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
2698 "TARGET_P9_VECTOR"
2699 "@
2700 lxvwsx %x0,%y1
2701 #
2702 mtvsrws %x0,%1"
2703 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
2704 [(set (match_dup 0)
2705 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
2706 (set (match_dup 0)
2707 (unspec:V4SF [(match_dup 0)
2708 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
2709 ""
2710 [(set_attr "type" "vecload,vecperm,mftgpr")
2711 (set_attr "length" "4,8,4")])
2712
2713 ;; V4SF/V4SI splat from a vector element
2714 (define_insn "vsx_xxspltw_<mode>"
2715 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2716 (vec_duplicate:VSX_W
2717 (vec_select:<VS_scalar>
2718 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2719 (parallel
2720 [(match_operand:QI 2 "u5bit_cint_operand" "i,i")]))))]
2721 "VECTOR_MEM_VSX_P (<MODE>mode)"
2722 {
2723 if (!BYTES_BIG_ENDIAN)
2724 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
2725
2726 return "xxspltw %x0,%x1,%2";
2727 }
2728 [(set_attr "type" "vecperm")])
2729
2730 (define_insn "vsx_xxspltw_<mode>_direct"
2731 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2732 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2733 (match_operand:QI 2 "u5bit_cint_operand" "i,i")]
2734 UNSPEC_VSX_XXSPLTW))]
2735 "VECTOR_MEM_VSX_P (<MODE>mode)"
2736 "xxspltw %x0,%x1,%2"
2737 [(set_attr "type" "vecperm")])
2738
2739 ;; V2DF/V2DI splat for use by vec_splat builtin
2740 (define_insn "vsx_xxspltd_<mode>"
2741 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2742 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
2743 (match_operand:QI 2 "u5bit_cint_operand" "i")]
2744 UNSPEC_VSX_XXSPLTD))]
2745 "VECTOR_MEM_VSX_P (<MODE>mode)"
2746 {
2747 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
2748 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
2749 return "xxpermdi %x0,%x1,%x1,0";
2750 else
2751 return "xxpermdi %x0,%x1,%x1,3";
2752 }
2753 [(set_attr "type" "vecperm")])
2754
2755 ;; V4SF/V4SI interleave
2756 (define_insn "vsx_xxmrghw_<mode>"
2757 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2758 (vec_select:VSX_W
2759 (vec_concat:<VS_double>
2760 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2761 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
2762 (parallel [(const_int 0) (const_int 4)
2763 (const_int 1) (const_int 5)])))]
2764 "VECTOR_MEM_VSX_P (<MODE>mode)"
2765 {
2766 if (BYTES_BIG_ENDIAN)
2767 return "xxmrghw %x0,%x1,%x2";
2768 else
2769 return "xxmrglw %x0,%x2,%x1";
2770 }
2771 [(set_attr "type" "vecperm")])
2772
2773 (define_insn "vsx_xxmrglw_<mode>"
2774 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
2775 (vec_select:VSX_W
2776 (vec_concat:<VS_double>
2777 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
2778 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
2779 (parallel [(const_int 2) (const_int 6)
2780 (const_int 3) (const_int 7)])))]
2781 "VECTOR_MEM_VSX_P (<MODE>mode)"
2782 {
2783 if (BYTES_BIG_ENDIAN)
2784 return "xxmrglw %x0,%x1,%x2";
2785 else
2786 return "xxmrghw %x0,%x2,%x1";
2787 }
2788 [(set_attr "type" "vecperm")])
2789
2790 ;; Shift left double by word immediate
2791 (define_insn "vsx_xxsldwi_<mode>"
2792 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
2793 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
2794 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
2795 (match_operand:QI 3 "u5bit_cint_operand" "i")]
2796 UNSPEC_VSX_SLDWI))]
2797 "VECTOR_MEM_VSX_P (<MODE>mode)"
2798 "xxsldwi %x0,%x1,%x2,%3"
2799 [(set_attr "type" "vecperm")])
2800
2801 \f
2802 ;; Vector reduction insns and splitters
2803
2804 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
2805 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
2806 (VEC_reduc:V2DF
2807 (vec_concat:V2DF
2808 (vec_select:DF
2809 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2810 (parallel [(const_int 1)]))
2811 (vec_select:DF
2812 (match_dup 1)
2813 (parallel [(const_int 0)])))
2814 (match_dup 1)))
2815 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
2816 "VECTOR_UNIT_VSX_P (V2DFmode)"
2817 "#"
2818 ""
2819 [(const_int 0)]
2820 "
2821 {
2822 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
2823 ? gen_reg_rtx (V2DFmode)
2824 : operands[2];
2825 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
2826 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
2827 DONE;
2828 }"
2829 [(set_attr "length" "8")
2830 (set_attr "type" "veccomplex")])
2831
2832 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
2833 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
2834 (VEC_reduc:V4SF
2835 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2836 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
2837 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2838 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
2839 "VECTOR_UNIT_VSX_P (V4SFmode)"
2840 "#"
2841 ""
2842 [(const_int 0)]
2843 "
2844 {
2845 rtx op0 = operands[0];
2846 rtx op1 = operands[1];
2847 rtx tmp2, tmp3, tmp4;
2848
2849 if (can_create_pseudo_p ())
2850 {
2851 tmp2 = gen_reg_rtx (V4SFmode);
2852 tmp3 = gen_reg_rtx (V4SFmode);
2853 tmp4 = gen_reg_rtx (V4SFmode);
2854 }
2855 else
2856 {
2857 tmp2 = operands[2];
2858 tmp3 = operands[3];
2859 tmp4 = tmp2;
2860 }
2861
2862 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2863 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2864 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2865 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
2866 DONE;
2867 }"
2868 [(set_attr "length" "16")
2869 (set_attr "type" "veccomplex")])
2870
2871 ;; Combiner patterns with the vector reduction patterns that knows we can get
2872 ;; to the top element of the V2DF array without doing an extract.
2873
2874 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
2875 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
2876 (vec_select:DF
2877 (VEC_reduc:V2DF
2878 (vec_concat:V2DF
2879 (vec_select:DF
2880 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
2881 (parallel [(const_int 1)]))
2882 (vec_select:DF
2883 (match_dup 1)
2884 (parallel [(const_int 0)])))
2885 (match_dup 1))
2886 (parallel [(const_int 1)])))
2887 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
2888 "VECTOR_UNIT_VSX_P (V2DFmode)"
2889 "#"
2890 ""
2891 [(const_int 0)]
2892 "
2893 {
2894 rtx hi = gen_highpart (DFmode, operands[1]);
2895 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
2896 ? gen_reg_rtx (DFmode)
2897 : operands[2];
2898
2899 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
2900 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
2901 DONE;
2902 }"
2903 [(set_attr "length" "8")
2904 (set_attr "type" "veccomplex")])
2905
2906 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
2907 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
2908 (vec_select:SF
2909 (VEC_reduc:V4SF
2910 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
2911 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
2912 (parallel [(const_int 3)])))
2913 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
2914 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
2915 (clobber (match_scratch:V4SF 4 "=0,0"))]
2916 "VECTOR_UNIT_VSX_P (V4SFmode)"
2917 "#"
2918 ""
2919 [(const_int 0)]
2920 "
2921 {
2922 rtx op0 = operands[0];
2923 rtx op1 = operands[1];
2924 rtx tmp2, tmp3, tmp4, tmp5;
2925
2926 if (can_create_pseudo_p ())
2927 {
2928 tmp2 = gen_reg_rtx (V4SFmode);
2929 tmp3 = gen_reg_rtx (V4SFmode);
2930 tmp4 = gen_reg_rtx (V4SFmode);
2931 tmp5 = gen_reg_rtx (V4SFmode);
2932 }
2933 else
2934 {
2935 tmp2 = operands[2];
2936 tmp3 = operands[3];
2937 tmp4 = tmp2;
2938 tmp5 = operands[4];
2939 }
2940
2941 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
2942 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
2943 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
2944 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
2945 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
2946 DONE;
2947 }"
2948 [(set_attr "length" "20")
2949 (set_attr "type" "veccomplex")])
2950
2951 \f
2952 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
2953 (define_peephole
2954 [(set (match_operand:P 0 "base_reg_operand" "")
2955 (match_operand:P 1 "short_cint_operand" ""))
2956 (set (match_operand:VSX_M 2 "vsx_register_operand" "")
2957 (mem:VSX_M (plus:P (match_dup 0)
2958 (match_operand:P 3 "int_reg_operand" ""))))]
2959 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2960 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
2961 [(set_attr "length" "8")
2962 (set_attr "type" "vecload")])
2963
2964 (define_peephole
2965 [(set (match_operand:P 0 "base_reg_operand" "")
2966 (match_operand:P 1 "short_cint_operand" ""))
2967 (set (match_operand:VSX_M 2 "vsx_register_operand" "")
2968 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
2969 (match_dup 0))))]
2970 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
2971 "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
2972 [(set_attr "length" "8")
2973 (set_attr "type" "vecload")])
2974
2975 \f
2976 ;; ISA 3.0 vector extend sign support
2977
2978 (define_insn "vsx_sign_extend_qi_<mode>"
2979 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
2980 (unspec:VSINT_84
2981 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2982 UNSPEC_VSX_SIGN_EXTEND))]
2983 "TARGET_P9_VECTOR"
2984 "vextsb2<wd> %0,%1"
2985 [(set_attr "type" "vecexts")])
2986
2987 (define_insn "vsx_sign_extend_hi_<mode>"
2988 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
2989 (unspec:VSINT_84
2990 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
2991 UNSPEC_VSX_SIGN_EXTEND))]
2992 "TARGET_P9_VECTOR"
2993 "vextsh2<wd> %0,%1"
2994 [(set_attr "type" "vecexts")])
2995
2996 (define_insn "*vsx_sign_extend_si_v2di"
2997 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
2998 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
2999 UNSPEC_VSX_SIGN_EXTEND))]
3000 "TARGET_P9_VECTOR"
3001 "vextsw2d %0,%1"
3002 [(set_attr "type" "vecexts")])
3003
3004 \f
3005 ;; ISA 3.0 memory operations
3006 (define_insn "p9_lxsi<wd>zx"
3007 [(set (match_operand:DI 0 "vsx_register_operand" "=wi")
3008 (unspec:DI [(zero_extend:DI
3009 (match_operand:QHI 1 "indexed_or_indirect_operand" "Z"))]
3010 UNSPEC_P9_MEMORY))]
3011 "TARGET_P9_VECTOR"
3012 "lxsi<wd>zx %x0,%y1"
3013 [(set_attr "type" "fpload")])
3014
3015 (define_insn "p9_stxsi<wd>x"
3016 [(set (match_operand:QHI 0 "reg_or_indexed_operand" "=r,Z")
3017 (unspec:QHI [(match_operand:DI 1 "vsx_register_operand" "wi,wi")]
3018 UNSPEC_P9_MEMORY))]
3019 "TARGET_P9_VECTOR"
3020 "@
3021 mfvsrd %0,%x1
3022 stxsi<wd>x %x1,%y0"
3023 [(set_attr "type" "mffgpr,fpstore")])
3024
3025 ;; ISA 3.0 Binary Floating-Point Support
3026
3027 ;; VSX Scalar Extract Exponent Double-Precision
3028 (define_insn "xsxexpdp"
3029 [(set (match_operand:DI 0 "register_operand" "=r")
3030 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
3031 UNSPEC_VSX_SXEXPDP))]
3032 "TARGET_P9_VECTOR && TARGET_64BIT"
3033 "xsxexpdp %0,%x1"
3034 [(set_attr "type" "integer")])
3035
3036 ;; VSX Scalar Extract Significand Double-Precision
3037 (define_insn "xsxsigdp"
3038 [(set (match_operand:DI 0 "register_operand" "=r")
3039 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
3040 UNSPEC_VSX_SXSIGDP))]
3041 "TARGET_P9_VECTOR && TARGET_64BIT"
3042 "xsxsigdp %0,%x1"
3043 [(set_attr "type" "integer")])
3044
3045 ;; VSX Scalar Insert Exponent Double-Precision
3046 (define_insn "xsiexpdp"
3047 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
3048 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
3049 (match_operand:DI 2 "register_operand" "r")]
3050 UNSPEC_VSX_SIEXPDP))]
3051 "TARGET_P9_VECTOR && TARGET_64BIT"
3052 "xsiexpdp %x0,%1,%2"
3053 [(set_attr "type" "fpsimple")])
3054
3055 ;; VSX Scalar Compare Exponents Double-Precision
3056 (define_expand "xscmpexpdp_<code>"
3057 [(set (match_dup 3)
3058 (compare:CCFP
3059 (unspec:DF
3060 [(match_operand:DF 1 "vsx_register_operand" "wa")
3061 (match_operand:DF 2 "vsx_register_operand" "wa")]
3062 UNSPEC_VSX_SCMPEXPDP)
3063 (const_int 0)))
3064 (set (match_operand:SI 0 "register_operand" "=r")
3065 (CMP_TEST:SI (match_dup 3)
3066 (const_int 0)))]
3067 "TARGET_P9_VECTOR"
3068 {
3069 operands[3] = gen_reg_rtx (CCFPmode);
3070 })
3071
3072 (define_insn "*xscmpexpdp"
3073 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
3074 (compare:CCFP
3075 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
3076 (match_operand:DF 2 "vsx_register_operand" "wa")]
3077 UNSPEC_VSX_SCMPEXPDP)
3078 (match_operand:SI 3 "zero_constant" "j")))]
3079 "TARGET_P9_VECTOR"
3080 "xscmpexpdp %0,%x1,%x2"
3081 [(set_attr "type" "fpcompare")])
3082
3083 ;; VSX Scalar Test Data Class Double- and Single-Precision
3084 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
3085 ;; if any of the conditions tested by operand 2 are satisfied.
3086 ;; The gt and unordered bits are cleared to zero.)
3087 (define_expand "xststdc<Fvsx>"
3088 [(set (match_dup 3)
3089 (compare:CCFP
3090 (unspec:SFDF
3091 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3092 (match_operand:SI 2 "u7bit_cint_operand" "n")]
3093 UNSPEC_VSX_STSTDC)
3094 (match_dup 4)))
3095 (set (match_operand:SI 0 "register_operand" "=r")
3096 (eq:SI (match_dup 3)
3097 (const_int 0)))]
3098 "TARGET_P9_VECTOR"
3099 {
3100 operands[3] = gen_reg_rtx (CCFPmode);
3101 operands[4] = CONST0_RTX (SImode);
3102 })
3103
3104 ;; The VSX Scalar Test Data Class Double- and Single-Precision
3105 ;; instruction may also be used to test for negative value.
3106 (define_expand "xststdcneg<Fvsx>"
3107 [(set (match_dup 2)
3108 (compare:CCFP
3109 (unspec:SFDF
3110 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3111 (const_int 0)]
3112 UNSPEC_VSX_STSTDC)
3113 (match_dup 3)))
3114 (set (match_operand:SI 0 "register_operand" "=r")
3115 (lt:SI (match_dup 2)
3116 (const_int 0)))]
3117 "TARGET_P9_VECTOR"
3118 {
3119 operands[2] = gen_reg_rtx (CCFPmode);
3120 operands[3] = CONST0_RTX (SImode);
3121 })
3122
3123 (define_insn "*xststdc<Fvsx>"
3124 [(set (match_operand:CCFP 0 "" "=y")
3125 (compare:CCFP
3126 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
3127 (match_operand:SI 2 "u7bit_cint_operand" "n")]
3128 UNSPEC_VSX_STSTDC)
3129 (match_operand:SI 3 "zero_constant" "j")))]
3130 "TARGET_P9_VECTOR"
3131 "xststdc<Fvsx> %0,%x1,%2"
3132 [(set_attr "type" "fpcompare")])
3133
3134 ;; VSX Vector Extract Exponent Double and Single Precision
3135 (define_insn "xvxexp<VSs>"
3136 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3137 (unspec:VSX_F
3138 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
3139 UNSPEC_VSX_VXEXP))]
3140 "TARGET_P9_VECTOR"
3141 "xvxexp<VSs> %x0,%x1"
3142 [(set_attr "type" "vecsimple")])
3143
3144 ;; VSX Vector Extract Significand Double and Single Precision
3145 (define_insn "xvxsig<VSs>"
3146 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3147 (unspec:VSX_F
3148 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
3149 UNSPEC_VSX_VXSIG))]
3150 "TARGET_P9_VECTOR"
3151 "xvxsig<VSs> %x0,%x1"
3152 [(set_attr "type" "vecsimple")])
3153
3154 ;; VSX Vector Insert Exponent Double and Single Precision
3155 (define_insn "xviexp<VSs>"
3156 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
3157 (unspec:VSX_F
3158 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
3159 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
3160 UNSPEC_VSX_VIEXP))]
3161 "TARGET_P9_VECTOR"
3162 "xviexp<VSs> %x0,%x1,%x2"
3163 [(set_attr "type" "vecsimple")])
3164
3165 ;; VSX Vector Test Data Class Double and Single Precision
3166 ;; The corresponding elements of the result vector are all ones
3167 ;; if any of the conditions tested by operand 3 are satisfied.
3168 (define_insn "xvtstdc<VSs>"
3169 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
3170 (unspec:<VSI>
3171 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
3172 (match_operand:SI 2 "u7bit_cint_operand" "n")]
3173 UNSPEC_VSX_VTSTDC))]
3174 "TARGET_P9_VECTOR"
3175 "xvtstdc<VSs> %x0,%x1,%2"
3176 [(set_attr "type" "vecsimple")])
This page took 0.202301 seconds and 5 git commands to generate.