]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/vsx.md
Revert patches
[gcc.git] / gcc / config / rs6000 / vsx.md
1 ;; VSX patterns.
2 ;; Copyright (C) 2009-2023 Free Software Foundation, Inc.
3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5 ;; This file is part of GCC.
6
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published
9 ;; by the Free Software Foundation; either version 3, or (at your
10 ;; option) any later version.
11
12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 ;; License for more details.
16
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; Iterator for comparison types
22 (define_code_iterator CMP_TEST [eq lt gt unordered])
23
24 ;; Mode attribute for vector floate and floato conversions
25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
26
27 ;; Iterator for both scalar and vector floating point types supported by VSX
28 (define_mode_iterator VSX_B [DF V4SF V2DF])
29
30 ;; Iterator for the 2 64-bit vector types
31 (define_mode_iterator VSX_D [V2DF V2DI])
32
33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
34 ;; types that goes in a single vector register.
35 (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
36 (TF "FLOAT128_VECTOR_P (TFmode)")
37 TI
38 V1TI])
39
40 ;; Iterator for the 2 32-bit vector types
41 (define_mode_iterator VSX_W [V4SF V4SI])
42
43 ;; Iterator for the DF types
44 (define_mode_iterator VSX_DF [V2DF DF])
45
46 ;; Iterator for vector floating point types supported by VSX
47 (define_mode_iterator VSX_F [V4SF V2DF])
48
49 ;; Iterator for logical types supported by VSX
50 (define_mode_iterator VSX_L [V16QI
51 V8HI
52 V4SI
53 V2DI
54 V4SF
55 V2DF
56 V1TI
57 TI
58 (KF "FLOAT128_VECTOR_P (KFmode)")
59 (TF "FLOAT128_VECTOR_P (TFmode)")])
60
61 ;; Iterator for memory moves.
62 (define_mode_iterator VSX_M [V16QI
63 V8HI
64 V4SI
65 V2DI
66 V4SF
67 V2DF
68 V1TI
69 (KF "FLOAT128_VECTOR_P (KFmode)")
70 (TF "FLOAT128_VECTOR_P (TFmode)")
71 TI])
72
73 (define_mode_attr VSX_XXBR [(V8HI "h")
74 (V4SI "w")
75 (V4SF "w")
76 (V2DF "d")
77 (V2DI "d")
78 (V1TI "q")])
79
80 ;; Map into the appropriate load/store name based on the type
81 (define_mode_attr VSm [(V16QI "vw4")
82 (V8HI "vw4")
83 (V4SI "vw4")
84 (V4SF "vw4")
85 (V2DF "vd2")
86 (V2DI "vd2")
87 (DF "d")
88 (TF "vd2")
89 (KF "vd2")
90 (V1TI "vd2")
91 (TI "vd2")])
92
93 ;; Map the register class used
94 (define_mode_attr VSr [(V16QI "v")
95 (V8HI "v")
96 (V4SI "v")
97 (V4SF "wa")
98 (V2DI "wa")
99 (V2DF "wa")
100 (DI "wa")
101 (DF "wa")
102 (SF "wa")
103 (TF "wa")
104 (KF "wa")
105 (V1TI "v")
106 (TI "wa")])
107
108 ;; What value we need in the "isa" field, to make the IEEE QP float work.
109 (define_mode_attr VSisa [(V16QI "*")
110 (V8HI "*")
111 (V4SI "*")
112 (V4SF "*")
113 (V2DI "*")
114 (V2DF "*")
115 (DI "*")
116 (DF "*")
117 (SF "*")
118 (V1TI "*")
119 (TI "*")
120 (TF "p9tf")
121 (KF "p9kf")])
122
123 ;; A mode attribute to disparage use of GPR registers, except for scalar
124 ;; integer modes.
125 (define_mode_attr ??r [(V16QI "??r")
126 (V8HI "??r")
127 (V4SI "??r")
128 (V4SF "??r")
129 (V2DI "??r")
130 (V2DF "??r")
131 (V1TI "??r")
132 (KF "??r")
133 (TF "??r")
134 (TI "r")])
135
136 ;; A mode attribute used for 128-bit constant values.
137 (define_mode_attr nW [(V16QI "W")
138 (V8HI "W")
139 (V4SI "W")
140 (V4SF "W")
141 (V2DI "W")
142 (V2DF "W")
143 (V1TI "W")
144 (KF "W")
145 (TF "W")
146 (TI "n")])
147
148 ;; Same size integer type for floating point data
149 (define_mode_attr VSi [(V4SF "v4si")
150 (V2DF "v2di")
151 (DF "di")])
152
153 (define_mode_attr VSI [(V4SF "V4SI")
154 (V2DF "V2DI")
155 (DF "DI")])
156
157 ;; Word size for same size conversion
158 (define_mode_attr VSc [(V4SF "w")
159 (V2DF "d")
160 (DF "d")])
161
162 ;; Map into either s or v, depending on whether this is a scalar or vector
163 ;; operation
164 (define_mode_attr VSv [(V16QI "v")
165 (V8HI "v")
166 (V4SI "v")
167 (V4SF "v")
168 (V2DI "v")
169 (V2DF "v")
170 (V1TI "v")
171 (DF "s")
172 (KF "v")])
173
174 ;; Appropriate type for add ops (and other simple FP ops)
175 (define_mode_attr VStype_simple [(V2DF "vecdouble")
176 (V4SF "vecfloat")
177 (DF "fp")])
178
179 ;; Appropriate type for multiply ops
180 (define_mode_attr VStype_mul [(V2DF "vecdouble")
181 (V4SF "vecfloat")
182 (DF "dmul")])
183
184 ;; Appropriate type for divide ops.
185 (define_mode_attr VStype_div [(V2DF "vecdiv")
186 (V4SF "vecfdiv")
187 (DF "ddiv")])
188
189 ;; Map to a double-sized vector mode
190 (define_mode_attr VS_double [(V4SI "V8SI")
191 (V4SF "V8SF")
192 (V2DI "V4DI")
193 (V2DF "V4DF")
194 (V1TI "V2TI")])
195
196 ;; Iterators for loading constants with xxspltib
197 (define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
198 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
199
200 ;; Vector reverse byte modes
201 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
202
203 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
204 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
205 ;; done on ISA 2.07 and not just ISA 3.0.
206 (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
207 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
208 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
209
210 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
211 (V8HI "h")
212 (V4SI "w")])
213
214 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
215 ;; insert to validate the operand number.
216 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
217 (V8HI "const_0_to_7_operand")
218 (V4SI "const_0_to_3_operand")])
219
220 ;; Mode attribute to give the constraint for vector extract and insert
221 ;; operations.
222 (define_mode_attr VSX_EX [(V16QI "v")
223 (V8HI "v")
224 (V4SI "wa")])
225
226 ;; Mode iterator for binary floating types other than double to
227 ;; optimize convert to that floating point type from an extract
228 ;; of an integer type
229 (define_mode_iterator VSX_EXTRACT_FL [SF
230 (IF "FLOAT128_2REG_P (IFmode)")
231 (KF "TARGET_FLOAT128_HW")
232 (TF "FLOAT128_2REG_P (TFmode)
233 || (FLOAT128_IEEE_P (TFmode)
234 && TARGET_FLOAT128_HW)")])
235
236 ;; Mode iterator for binary floating types that have a direct conversion
237 ;; from 64-bit integer to floating point
238 (define_mode_iterator FL_CONV [SF
239 DF
240 (KF "TARGET_FLOAT128_HW")
241 (TF "TARGET_FLOAT128_HW
242 && FLOAT128_IEEE_P (TFmode)")])
243
244 ;; Iterator for the 2 short vector types to do a splat from an integer
245 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
246
247 ;; Mode attribute to give the count for the splat instruction to splat
248 ;; the value in the 64-bit integer slot
249 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
250
251 ;; Mode attribute to give the suffix for the splat instruction
252 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
253
254 ;; Iterator for the move to mask instructions
255 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
256 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
257
258 ;; Longer vec int modes for rotate/mask ops
259 ;; and Vector Integer Multiply/Divide/Modulo Instructions
260 (define_mode_iterator VIlong [V2DI V4SI])
261
262 ;; Constants for creating unspecs
263 (define_c_enum "unspec"
264 [UNSPEC_VSX_CONCAT
265 UNSPEC_VSX_CVDPSXWS
266 UNSPEC_VSX_CVDPUXWS
267 UNSPEC_VSX_CVSPDP
268 UNSPEC_VSX_CVHPSP
269 UNSPEC_VSX_CVSPDPN
270 UNSPEC_VSX_CVDPSPN
271 UNSPEC_VSX_CVSXWDP
272 UNSPEC_VSX_CVUXWDP
273 UNSPEC_VSX_CVSXDSP
274 UNSPEC_VSX_CVUXDSP
275 UNSPEC_VSX_FLOAT2
276 UNSPEC_VSX_UNS_FLOAT2
277 UNSPEC_VSX_FLOATE
278 UNSPEC_VSX_UNS_FLOATE
279 UNSPEC_VSX_FLOATO
280 UNSPEC_VSX_UNS_FLOATO
281 UNSPEC_VSX_TDIV
282 UNSPEC_VSX_TSQRT
283 UNSPEC_VSX_SET
284 UNSPEC_VSX_ROUND_I
285 UNSPEC_VSX_ROUND_IC
286 UNSPEC_VSX_SLDWI
287 UNSPEC_VSX_XXPERM
288
289 UNSPEC_VSX_XXSPLTW
290 UNSPEC_VSX_XXSPLTD
291 UNSPEC_VSX_DIVSD
292 UNSPEC_VSX_DIVUD
293 UNSPEC_VSX_DIVSQ
294 UNSPEC_VSX_DIVUQ
295 UNSPEC_VSX_DIVESQ
296 UNSPEC_VSX_DIVEUQ
297 UNSPEC_VSX_MODSQ
298 UNSPEC_VSX_MODUQ
299 UNSPEC_VSX_MULSD
300 UNSPEC_VSX_SIGN_EXTEND
301 UNSPEC_VSX_XVCVBF16SPN
302 UNSPEC_VSX_XVCVSPBF16
303 UNSPEC_VSX_XVCVSPSXDS
304 UNSPEC_VSX_XVCVSPHP
305 UNSPEC_VSX_VSLO
306 UNSPEC_VSX_EXTRACT
307 UNSPEC_VSX_SXEXPDP
308 UNSPEC_VSX_SXSIG
309 UNSPEC_VSX_SIEXPDP
310 UNSPEC_VSX_SIEXPQP
311 UNSPEC_VSX_SCMPEXPDP
312 UNSPEC_VSX_SCMPEXPQP
313 UNSPEC_VSX_STSTDC
314 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
315 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
316 UNSPEC_VSX_VXEXP
317 UNSPEC_VSX_VXSIG
318 UNSPEC_VSX_VIEXP
319 UNSPEC_VSX_VTSTDC
320 UNSPEC_VSX_VSIGNED2
321
322 UNSPEC_LXVL
323 UNSPEC_LXVLL
324 UNSPEC_LVSL_REG
325 UNSPEC_LVSR_REG
326 UNSPEC_STXVL
327 UNSPEC_STXVLL
328 UNSPEC_XL_LEN_R
329 UNSPEC_XST_LEN_R
330
331 UNSPEC_VCLZLSBB
332 UNSPEC_VCTZLSBB
333 UNSPEC_VEXTUBLX
334 UNSPEC_VEXTUHLX
335 UNSPEC_VEXTUWLX
336 UNSPEC_VEXTUBRX
337 UNSPEC_VEXTUHRX
338 UNSPEC_VEXTUWRX
339 UNSPEC_VCMPNEB
340 UNSPEC_VCMPNEZB
341 UNSPEC_VCMPNEH
342 UNSPEC_VCMPNEZH
343 UNSPEC_VCMPNEW
344 UNSPEC_VCMPNEZW
345 UNSPEC_XXEXTRACTUW
346 UNSPEC_XXINSERTW
347 UNSPEC_VSX_FIRST_MATCH_INDEX
348 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
349 UNSPEC_VSX_FIRST_MISMATCH_INDEX
350 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
351 UNSPEC_XXGENPCV
352 UNSPEC_MTVSBM
353 UNSPEC_EXTENDDITI2
354 UNSPEC_VCNTMB
355 UNSPEC_VEXPAND
356 UNSPEC_VEXTRACT
357 UNSPEC_EXTRACTL
358 UNSPEC_EXTRACTR
359 UNSPEC_INSERTL
360 UNSPEC_INSERTR
361 UNSPEC_REPLACE_ELT
362 UNSPEC_REPLACE_UN
363 UNSPEC_VDIVES
364 UNSPEC_VDIVEU
365 UNSPEC_VMSUMCUD
366 UNSPEC_XXEVAL
367 UNSPEC_XXSPLTIW
368 UNSPEC_XXSPLTIDP
369 UNSPEC_XXSPLTI32DX
370 UNSPEC_XXBLEND
371 UNSPEC_XXPERMX
372 ])
373
374 (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16
375 UNSPEC_VSX_XVCVBF16SPN])
376
377 (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
378 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
379
380 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
381 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
382
383 ;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
384 (define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
385 (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
386 (V2DI "d") (V2DF "d")])
387 (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
388 (V2DI "3") (V2DF "3")])
389 (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
390 (V2DI "8") (V2DF "8")])
391
392 ;; Like VM2 in altivec.md, just do char, short, int, long, float and double
393 (define_mode_iterator VM3 [V4SI
394 V8HI
395 V16QI
396 V4SF
397 V2DF
398 V2DI])
399
400 (define_mode_attr VM3_char [(V2DI "d")
401 (V4SI "w")
402 (V8HI "h")
403 (V16QI "b")
404 (V2DF "d")
405 (V4SF "w")])
406
407
408 ;; VSX moves
409
410 ;; The patterns for LE permuted loads and stores come before the general
411 ;; VSX moves so they match first.
412 (define_insn_and_split "*vsx_le_perm_load_<mode>"
413 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
414 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
415 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
416 "#"
417 "&& 1"
418 [(set (match_dup 2)
419 (vec_select:<MODE>
420 (match_dup 1)
421 (parallel [(const_int 1) (const_int 0)])))
422 (set (match_dup 0)
423 (vec_select:<MODE>
424 (match_dup 2)
425 (parallel [(const_int 1) (const_int 0)])))]
426 {
427 rtx mem = operands[1];
428
429 /* Don't apply the swap optimization if we've already performed register
430 allocation and the hard register destination is not in the altivec
431 range. */
432 if ((MEM_ALIGN (mem) >= 128)
433 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
434 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
435 {
436 rtx mem_address = XEXP (mem, 0);
437 enum machine_mode mode = GET_MODE (mem);
438
439 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
440 {
441 /* Replace the source memory address with masked address. */
442 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
443 emit_insn (lvx_set_expr);
444 DONE;
445 }
446 else if (rs6000_quadword_masked_address_p (mem_address))
447 {
448 /* This rtl is already in the form that matches lvx
449 instruction, so leave it alone. */
450 DONE;
451 }
452 /* Otherwise, fall through to transform into a swapping load. */
453 }
454 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
455 : operands[0];
456 }
457 [(set_attr "type" "vecload")
458 (set_attr "length" "8")])
459
460 (define_insn_and_split "*vsx_le_perm_load_<mode>"
461 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
462 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
463 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
464 "#"
465 "&& 1"
466 [(set (match_dup 2)
467 (vec_select:<MODE>
468 (match_dup 1)
469 (parallel [(const_int 2) (const_int 3)
470 (const_int 0) (const_int 1)])))
471 (set (match_dup 0)
472 (vec_select:<MODE>
473 (match_dup 2)
474 (parallel [(const_int 2) (const_int 3)
475 (const_int 0) (const_int 1)])))]
476 {
477 rtx mem = operands[1];
478
479 /* Don't apply the swap optimization if we've already performed register
480 allocation and the hard register destination is not in the altivec
481 range. */
482 if ((MEM_ALIGN (mem) >= 128)
483 && (!HARD_REGISTER_P (operands[0])
484 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
485 {
486 rtx mem_address = XEXP (mem, 0);
487 enum machine_mode mode = GET_MODE (mem);
488
489 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
490 {
491 /* Replace the source memory address with masked address. */
492 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
493 emit_insn (lvx_set_expr);
494 DONE;
495 }
496 else if (rs6000_quadword_masked_address_p (mem_address))
497 {
498 /* This rtl is already in the form that matches lvx
499 instruction, so leave it alone. */
500 DONE;
501 }
502 /* Otherwise, fall through to transform into a swapping load. */
503 }
504 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
505 : operands[0];
506 }
507 [(set_attr "type" "vecload")
508 (set_attr "length" "8")])
509
510 (define_insn_and_split "*vsx_le_perm_load_v8hi"
511 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
512 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
513 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
514 "#"
515 "&& 1"
516 [(set (match_dup 2)
517 (vec_select:V8HI
518 (match_dup 1)
519 (parallel [(const_int 4) (const_int 5)
520 (const_int 6) (const_int 7)
521 (const_int 0) (const_int 1)
522 (const_int 2) (const_int 3)])))
523 (set (match_dup 0)
524 (vec_select:V8HI
525 (match_dup 2)
526 (parallel [(const_int 4) (const_int 5)
527 (const_int 6) (const_int 7)
528 (const_int 0) (const_int 1)
529 (const_int 2) (const_int 3)])))]
530 {
531 rtx mem = operands[1];
532
533 /* Don't apply the swap optimization if we've already performed register
534 allocation and the hard register destination is not in the altivec
535 range. */
536 if ((MEM_ALIGN (mem) >= 128)
537 && (!HARD_REGISTER_P (operands[0])
538 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
539 {
540 rtx mem_address = XEXP (mem, 0);
541 enum machine_mode mode = GET_MODE (mem);
542
543 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
544 {
545 /* Replace the source memory address with masked address. */
546 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
547 emit_insn (lvx_set_expr);
548 DONE;
549 }
550 else if (rs6000_quadword_masked_address_p (mem_address))
551 {
552 /* This rtl is already in the form that matches lvx
553 instruction, so leave it alone. */
554 DONE;
555 }
556 /* Otherwise, fall through to transform into a swapping load. */
557 }
558 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
559 : operands[0];
560 }
561 [(set_attr "type" "vecload")
562 (set_attr "length" "8")])
563
564 (define_insn_and_split "*vsx_le_perm_load_v16qi"
565 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
566 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
567 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
568 "#"
569 "&& 1"
570 [(set (match_dup 2)
571 (vec_select:V16QI
572 (match_dup 1)
573 (parallel [(const_int 8) (const_int 9)
574 (const_int 10) (const_int 11)
575 (const_int 12) (const_int 13)
576 (const_int 14) (const_int 15)
577 (const_int 0) (const_int 1)
578 (const_int 2) (const_int 3)
579 (const_int 4) (const_int 5)
580 (const_int 6) (const_int 7)])))
581 (set (match_dup 0)
582 (vec_select:V16QI
583 (match_dup 2)
584 (parallel [(const_int 8) (const_int 9)
585 (const_int 10) (const_int 11)
586 (const_int 12) (const_int 13)
587 (const_int 14) (const_int 15)
588 (const_int 0) (const_int 1)
589 (const_int 2) (const_int 3)
590 (const_int 4) (const_int 5)
591 (const_int 6) (const_int 7)])))]
592 {
593 rtx mem = operands[1];
594
595 /* Don't apply the swap optimization if we've already performed register
596 allocation and the hard register destination is not in the altivec
597 range. */
598 if ((MEM_ALIGN (mem) >= 128)
599 && (!HARD_REGISTER_P (operands[0])
600 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
601 {
602 rtx mem_address = XEXP (mem, 0);
603 enum machine_mode mode = GET_MODE (mem);
604
605 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
606 {
607 /* Replace the source memory address with masked address. */
608 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
609 emit_insn (lvx_set_expr);
610 DONE;
611 }
612 else if (rs6000_quadword_masked_address_p (mem_address))
613 {
614 /* This rtl is already in the form that matches lvx
615 instruction, so leave it alone. */
616 DONE;
617 }
618 /* Otherwise, fall through to transform into a swapping load. */
619 }
620 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
621 : operands[0];
622 }
623 [(set_attr "type" "vecload")
624 (set_attr "length" "8")])
625
626 (define_insn "*vsx_le_perm_store_<mode>"
627 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
628 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
629 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
630 "#"
631 [(set_attr "type" "vecstore")
632 (set_attr "length" "12")])
633
634 (define_split
635 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
636 (match_operand:VSX_D 1 "vsx_register_operand"))]
637 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
638 [(set (match_dup 2)
639 (vec_select:<MODE>
640 (match_dup 1)
641 (parallel [(const_int 1) (const_int 0)])))
642 (set (match_dup 0)
643 (vec_select:<MODE>
644 (match_dup 2)
645 (parallel [(const_int 1) (const_int 0)])))]
646 {
647 rtx mem = operands[0];
648
649 /* Don't apply the swap optimization if we've already performed register
650 allocation and the hard register source is not in the altivec range. */
651 if ((MEM_ALIGN (mem) >= 128)
652 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
653 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
654 {
655 rtx mem_address = XEXP (mem, 0);
656 enum machine_mode mode = GET_MODE (mem);
657 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
658 {
659 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
660 emit_insn (stvx_set_expr);
661 DONE;
662 }
663 else if (rs6000_quadword_masked_address_p (mem_address))
664 {
665 /* This rtl is already in the form that matches stvx instruction,
666 so leave it alone. */
667 DONE;
668 }
669 /* Otherwise, fall through to transform into a swapping store. */
670 }
671
672 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
673 : operands[1];
674 })
675
676 ;; The post-reload split requires that we re-permute the source
677 ;; register in case it is still live.
678 (define_split
679 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
680 (match_operand:VSX_D 1 "vsx_register_operand"))]
681 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
682 [(set (match_dup 1)
683 (vec_select:<MODE>
684 (match_dup 1)
685 (parallel [(const_int 1) (const_int 0)])))
686 (set (match_dup 0)
687 (vec_select:<MODE>
688 (match_dup 1)
689 (parallel [(const_int 1) (const_int 0)])))
690 (set (match_dup 1)
691 (vec_select:<MODE>
692 (match_dup 1)
693 (parallel [(const_int 1) (const_int 0)])))]
694 "")
695
696 (define_insn "*vsx_le_perm_store_<mode>"
697 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
698 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
699 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
700 "#"
701 [(set_attr "type" "vecstore")
702 (set_attr "length" "12")])
703
704 (define_split
705 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
706 (match_operand:VSX_W 1 "vsx_register_operand"))]
707 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
708 [(set (match_dup 2)
709 (vec_select:<MODE>
710 (match_dup 1)
711 (parallel [(const_int 2) (const_int 3)
712 (const_int 0) (const_int 1)])))
713 (set (match_dup 0)
714 (vec_select:<MODE>
715 (match_dup 2)
716 (parallel [(const_int 2) (const_int 3)
717 (const_int 0) (const_int 1)])))]
718 {
719 rtx mem = operands[0];
720
721 /* Don't apply the swap optimization if we've already performed register
722 allocation and the hard register source is not in the altivec range. */
723 if ((MEM_ALIGN (mem) >= 128)
724 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
725 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
726 {
727 rtx mem_address = XEXP (mem, 0);
728 enum machine_mode mode = GET_MODE (mem);
729 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
730 {
731 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
732 emit_insn (stvx_set_expr);
733 DONE;
734 }
735 else if (rs6000_quadword_masked_address_p (mem_address))
736 {
737 /* This rtl is already in the form that matches stvx instruction,
738 so leave it alone. */
739 DONE;
740 }
741 /* Otherwise, fall through to transform into a swapping store. */
742 }
743
744 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
745 : operands[1];
746 })
747
748 ;; The post-reload split requires that we re-permute the source
749 ;; register in case it is still live.
750 (define_split
751 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
752 (match_operand:VSX_W 1 "vsx_register_operand"))]
753 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
754 [(set (match_dup 1)
755 (vec_select:<MODE>
756 (match_dup 1)
757 (parallel [(const_int 2) (const_int 3)
758 (const_int 0) (const_int 1)])))
759 (set (match_dup 0)
760 (vec_select:<MODE>
761 (match_dup 1)
762 (parallel [(const_int 2) (const_int 3)
763 (const_int 0) (const_int 1)])))
764 (set (match_dup 1)
765 (vec_select:<MODE>
766 (match_dup 1)
767 (parallel [(const_int 2) (const_int 3)
768 (const_int 0) (const_int 1)])))]
769 "")
770
771 (define_insn "*vsx_le_perm_store_v8hi"
772 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
773 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
774 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
775 "#"
776 [(set_attr "type" "vecstore")
777 (set_attr "length" "12")])
778
779 (define_split
780 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
781 (match_operand:V8HI 1 "vsx_register_operand"))]
782 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
783 [(set (match_dup 2)
784 (vec_select:V8HI
785 (match_dup 1)
786 (parallel [(const_int 4) (const_int 5)
787 (const_int 6) (const_int 7)
788 (const_int 0) (const_int 1)
789 (const_int 2) (const_int 3)])))
790 (set (match_dup 0)
791 (vec_select:V8HI
792 (match_dup 2)
793 (parallel [(const_int 4) (const_int 5)
794 (const_int 6) (const_int 7)
795 (const_int 0) (const_int 1)
796 (const_int 2) (const_int 3)])))]
797 {
798 rtx mem = operands[0];
799
800 /* Don't apply the swap optimization if we've already performed register
801 allocation and the hard register source is not in the altivec range. */
802 if ((MEM_ALIGN (mem) >= 128)
803 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
804 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
805 {
806 rtx mem_address = XEXP (mem, 0);
807 enum machine_mode mode = GET_MODE (mem);
808 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
809 {
810 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
811 emit_insn (stvx_set_expr);
812 DONE;
813 }
814 else if (rs6000_quadword_masked_address_p (mem_address))
815 {
816 /* This rtl is already in the form that matches stvx instruction,
817 so leave it alone. */
818 DONE;
819 }
820 /* Otherwise, fall through to transform into a swapping store. */
821 }
822
823 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
824 : operands[1];
825 })
826
827 ;; The post-reload split requires that we re-permute the source
828 ;; register in case it is still live.
829 (define_split
830 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
831 (match_operand:V8HI 1 "vsx_register_operand"))]
832 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
833 [(set (match_dup 1)
834 (vec_select:V8HI
835 (match_dup 1)
836 (parallel [(const_int 4) (const_int 5)
837 (const_int 6) (const_int 7)
838 (const_int 0) (const_int 1)
839 (const_int 2) (const_int 3)])))
840 (set (match_dup 0)
841 (vec_select:V8HI
842 (match_dup 1)
843 (parallel [(const_int 4) (const_int 5)
844 (const_int 6) (const_int 7)
845 (const_int 0) (const_int 1)
846 (const_int 2) (const_int 3)])))
847 (set (match_dup 1)
848 (vec_select:V8HI
849 (match_dup 1)
850 (parallel [(const_int 4) (const_int 5)
851 (const_int 6) (const_int 7)
852 (const_int 0) (const_int 1)
853 (const_int 2) (const_int 3)])))]
854 "")
855
856 (define_insn "*vsx_le_perm_store_v16qi"
857 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
858 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
859 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
860 "#"
861 [(set_attr "type" "vecstore")
862 (set_attr "length" "12")])
863
864 (define_split
865 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
866 (match_operand:V16QI 1 "vsx_register_operand"))]
867 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
868 [(set (match_dup 2)
869 (vec_select:V16QI
870 (match_dup 1)
871 (parallel [(const_int 8) (const_int 9)
872 (const_int 10) (const_int 11)
873 (const_int 12) (const_int 13)
874 (const_int 14) (const_int 15)
875 (const_int 0) (const_int 1)
876 (const_int 2) (const_int 3)
877 (const_int 4) (const_int 5)
878 (const_int 6) (const_int 7)])))
879 (set (match_dup 0)
880 (vec_select:V16QI
881 (match_dup 2)
882 (parallel [(const_int 8) (const_int 9)
883 (const_int 10) (const_int 11)
884 (const_int 12) (const_int 13)
885 (const_int 14) (const_int 15)
886 (const_int 0) (const_int 1)
887 (const_int 2) (const_int 3)
888 (const_int 4) (const_int 5)
889 (const_int 6) (const_int 7)])))]
890 {
891 rtx mem = operands[0];
892
893 /* Don't apply the swap optimization if we've already performed register
894 allocation and the hard register source is not in the altivec range. */
895 if ((MEM_ALIGN (mem) >= 128)
896 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
897 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
898 {
899 rtx mem_address = XEXP (mem, 0);
900 enum machine_mode mode = GET_MODE (mem);
901 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
902 {
903 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
904 emit_insn (stvx_set_expr);
905 DONE;
906 }
907 else if (rs6000_quadword_masked_address_p (mem_address))
908 {
909 /* This rtl is already in the form that matches stvx instruction,
910 so leave it alone. */
911 DONE;
912 }
913 /* Otherwise, fall through to transform into a swapping store. */
914 }
915
916 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
917 : operands[1];
918 })
919
920 ;; The post-reload split requires that we re-permute the source
921 ;; register in case it is still live.
922 (define_split
923 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
924 (match_operand:V16QI 1 "vsx_register_operand"))]
925 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
926 [(set (match_dup 1)
927 (vec_select:V16QI
928 (match_dup 1)
929 (parallel [(const_int 8) (const_int 9)
930 (const_int 10) (const_int 11)
931 (const_int 12) (const_int 13)
932 (const_int 14) (const_int 15)
933 (const_int 0) (const_int 1)
934 (const_int 2) (const_int 3)
935 (const_int 4) (const_int 5)
936 (const_int 6) (const_int 7)])))
937 (set (match_dup 0)
938 (vec_select:V16QI
939 (match_dup 1)
940 (parallel [(const_int 8) (const_int 9)
941 (const_int 10) (const_int 11)
942 (const_int 12) (const_int 13)
943 (const_int 14) (const_int 15)
944 (const_int 0) (const_int 1)
945 (const_int 2) (const_int 3)
946 (const_int 4) (const_int 5)
947 (const_int 6) (const_int 7)])))
948 (set (match_dup 1)
949 (vec_select:V16QI
950 (match_dup 1)
951 (parallel [(const_int 8) (const_int 9)
952 (const_int 10) (const_int 11)
953 (const_int 12) (const_int 13)
954 (const_int 14) (const_int 15)
955 (const_int 0) (const_int 1)
956 (const_int 2) (const_int 3)
957 (const_int 4) (const_int 5)
958 (const_int 6) (const_int 7)])))]
959 "")
960
961 ;; Little endian word swapping for 128-bit types that are either scalars or the
962 ;; special V1TI container class, which it is not appropriate to use vec_select
963 ;; for the type.
964 (define_insn "*vsx_le_permute_<mode>"
965 [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
966 (rotate:VEC_TI
967 (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
968 (const_int 64)))]
969 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
970 "@
971 xxpermdi %x0,%x1,%x1,2
972 lxvd2x %x0,%y1
973 stxvd2x %x1,%y0
974 mr %0,%L1\;mr %L0,%1
975 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
976 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
977 [(set_attr "length" "*,*,*,8,8,8")
978 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
979
980 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
981 [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa")
982 (rotate:VEC_TI
983 (rotate:VEC_TI
984 (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa")
985 (const_int 64))
986 (const_int 64)))]
987 "!BYTES_BIG_ENDIAN && TARGET_VSX"
988 "@
989 #
990 xxlor %x0,%x1"
991 "&& 1"
992 [(set (match_dup 0) (match_dup 1))]
993 {
994 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
995 {
996 emit_note (NOTE_INSN_DELETED);
997 DONE;
998 }
999 }
1000 [(set_attr "length" "0,4")
1001 (set_attr "type" "veclogical")])
1002
1003 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1004 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
1005 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1006 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1007 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
1008 "@
1009 #
1010 #"
1011 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1012 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
1013 [(const_int 0)]
1014 {
1015 rtx tmp = (can_create_pseudo_p ()
1016 ? gen_reg_rtx_and_attrs (operands[0])
1017 : operands[0]);
1018 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1019 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1020 DONE;
1021 }
1022 [(set_attr "type" "vecload,load")
1023 (set_attr "length" "8,8")
1024 (set_attr "isa" "<VSisa>,*")])
1025
1026 (define_insn "*vsx_le_perm_store_<mode>"
1027 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1028 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
1029 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1030 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1031 "@
1032 #
1033 #"
1034 [(set_attr "type" "vecstore,store")
1035 (set_attr "length" "12,8")
1036 (set_attr "isa" "<VSisa>,*")])
1037
1038 (define_split
1039 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1040 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1041 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
1042 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1043 [(const_int 0)]
1044 {
1045 rtx tmp = (can_create_pseudo_p ()
1046 ? gen_reg_rtx_and_attrs (operands[0])
1047 : operands[0]);
1048 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1049 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1050 DONE;
1051 })
1052
1053 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1054 ;; GPR registers on a little endian system.
1055 (define_peephole2
1056 [(set (match_operand:VEC_TI 0 "int_reg_operand")
1057 (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand")
1058 (const_int 64)))
1059 (set (match_operand:VEC_TI 2 "int_reg_operand")
1060 (rotate:VEC_TI (match_dup 0)
1061 (const_int 64)))]
1062 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1063 && (rtx_equal_p (operands[0], operands[2])
1064 || peep2_reg_dead_p (2, operands[0]))"
1065 [(set (match_dup 2) (match_dup 1))])
1066
1067 (define_peephole2
1068 [(set (match_operand:VEC_TI 0 "int_reg_operand")
1069 (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand")
1070 (const_int 64)))
1071 (set (match_operand:VEC_TI 2 "memory_operand")
1072 (rotate:VEC_TI (match_dup 0)
1073 (const_int 64)))]
1074 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1075 && peep2_reg_dead_p (2, operands[0])"
1076 [(set (match_dup 2) (match_dup 1))])
1077
1078 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1079 ;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1080 ;; floating point are handled by the more generic swap elimination pass.
1081 (define_peephole2
1082 [(set (match_operand:TI 0 "vsx_register_operand")
1083 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1084 (const_int 64)))
1085 (set (match_operand:TI 2 "vsx_register_operand")
1086 (rotate:TI (match_dup 0)
1087 (const_int 64)))]
1088 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1089 && (rtx_equal_p (operands[0], operands[2])
1090 || peep2_reg_dead_p (2, operands[0]))"
1091 [(set (match_dup 2) (match_dup 1))])
1092
1093 ;; The post-reload split requires that we re-permute the source
1094 ;; register in case it is still live.
1095 (define_split
1096 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1097 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1098 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
1099 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1100 [(const_int 0)]
1101 {
1102 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1103 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1104 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1105 DONE;
1106 })
1107
1108 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1109 ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1110 (define_insn "xxspltib_v16qi"
1111 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1112 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1113 "TARGET_P9_VECTOR"
1114 {
1115 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1116 return "xxspltib %x0,%2";
1117 }
1118 [(set_attr "type" "vecperm")])
1119
1120 (define_insn "xxspltib_<mode>_nosplit"
1121 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1122 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1123 "TARGET_P9_VECTOR"
1124 {
1125 rtx op1 = operands[1];
1126 int value = 256;
1127 int num_insns = -1;
1128
1129 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1130 || num_insns != 1)
1131 gcc_unreachable ();
1132
1133 operands[2] = GEN_INT (value & 0xff);
1134 return "xxspltib %x0,%2";
1135 }
1136 [(set_attr "type" "vecperm")])
1137
1138 (define_insn_and_split "*xxspltib_<mode>_split"
1139 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1140 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1141 "TARGET_P9_VECTOR"
1142 "#"
1143 "&& 1"
1144 [(const_int 0)]
1145 {
1146 int value = 256;
1147 int num_insns = -1;
1148 rtx op0 = operands[0];
1149 rtx op1 = operands[1];
1150 rtx tmp = ((can_create_pseudo_p ())
1151 ? gen_reg_rtx (V16QImode)
1152 : gen_lowpart (V16QImode, op0));
1153
1154 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1155 || num_insns != 2)
1156 gcc_unreachable ();
1157
1158 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1159
1160 if (<MODE>mode == V2DImode)
1161 emit_insn (gen_vsx_sign_extend_v16qi_v2di (op0, tmp));
1162
1163 else if (<MODE>mode == V4SImode)
1164 emit_insn (gen_vsx_sign_extend_v16qi_v4si (op0, tmp));
1165
1166 else if (<MODE>mode == V8HImode)
1167 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1168
1169 else
1170 gcc_unreachable ();
1171
1172 DONE;
1173 }
1174 [(set_attr "type" "vecperm")
1175 (set_attr "length" "8")])
1176
1177
1178 ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1179 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1180 ;; all 1's, since the machine does not have to wait for the previous
1181 ;; instruction using the register being set (such as a store waiting on a slow
1182 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1183
1184 ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1185 ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1186 ;; LXVKQ XXSPLTI*
1187 ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1188 (define_insn "vsx_mov<mode>_64bit"
1189 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1190 "=ZwO, wa, wa, r, we, ?wQ,
1191 ?&r, ??r, ??Y, <??r>, wa, v,
1192 wa, wa,
1193 ?wa, v, <??r>, wZ, v")
1194
1195 (match_operand:VSX_M 1 "input_operand"
1196 "wa, ZwO, wa, we, r, r,
1197 wQ, Y, r, r, wE, jwM,
1198 eQ, eP,
1199 ?jwM, W, <nW>, v, wZ"))]
1200
1201 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1202 && (register_operand (operands[0], <MODE>mode)
1203 || register_operand (operands[1], <MODE>mode))"
1204 {
1205 return rs6000_output_move_128bit (operands);
1206 }
1207 [(set_attr "type"
1208 "vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
1209 store, load, store, *, vecsimple, vecsimple,
1210 vecperm, vecperm,
1211 vecsimple, *, *, vecstore, vecload")
1212 (set_attr "num_insns"
1213 "*, *, *, 2, *, 2,
1214 2, 2, 2, 2, *, *,
1215 *, *,
1216 *, 5, 2, *, *")
1217 (set_attr "max_prefixed_insns"
1218 "*, *, *, *, *, 2,
1219 2, 2, 2, 2, *, *,
1220 *, *,
1221 *, *, *, *, *")
1222 (set_attr "length"
1223 "*, *, *, 8, *, 8,
1224 8, 8, 8, 8, *, *,
1225 *, *,
1226 *, 20, 8, *, *")
1227 (set_attr "isa"
1228 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1229 *, *, *, *, p9v, *,
1230 p10, p10,
1231 <VSisa>, *, *, *, *")
1232 (set_attr "prefixed"
1233 "*, *, *, *, *, *,
1234 *, *, *, *, *, *,
1235 *, yes,
1236 *, *, *, *, *")])
1237
1238 ;; VSX store VSX load VSX move GPR load GPR store GPR move
1239 ;; LXVKQ XXSPLTI*
1240 ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
1241 ;; LVX (VMX) STVX (VMX)
1242 (define_insn "*vsx_mov<mode>_32bit"
1243 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1244 "=ZwO, wa, wa, ??r, ??Y, <??r>,
1245 wa, wa,
1246 wa, v, ?wa, v, <??r>,
1247 wZ, v")
1248
1249 (match_operand:VSX_M 1 "input_operand"
1250 "wa, ZwO, wa, Y, r, r,
1251 eQ, eP,
1252 wE, jwM, ?jwM, W, <nW>,
1253 v, wZ"))]
1254
1255 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1256 && (register_operand (operands[0], <MODE>mode)
1257 || register_operand (operands[1], <MODE>mode))"
1258 {
1259 return rs6000_output_move_128bit (operands);
1260 }
1261 [(set_attr "type"
1262 "vecstore, vecload, vecsimple, load, store, *,
1263 vecperm, vecperm,
1264 vecsimple, vecsimple, vecsimple, *, *,
1265 vecstore, vecload")
1266 (set_attr "length"
1267 "*, *, *, 16, 16, 16,
1268 *, *,
1269 *, *, *, 20, 16,
1270 *, *")
1271 (set_attr "isa"
1272 "<VSisa>, <VSisa>, <VSisa>, *, *, *,
1273 p10, p10,
1274 p9v, *, <VSisa>, *, *,
1275 *, *")
1276 (set_attr "prefixed"
1277 "*, *, *, *, *, *,
1278 *, yes,
1279 *, *, *, *, *,
1280 *, *")])
1281
1282 ;; Explicit load/store expanders for the builtin functions
1283 (define_expand "vsx_load_<mode>"
1284 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1285 (match_operand:VSX_M 1 "memory_operand"))]
1286 "VECTOR_MEM_VSX_P (<MODE>mode)"
1287 {
1288 /* Expand to swaps if needed, prior to swap optimization. */
1289 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1290 && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
1291 {
1292 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1293 DONE;
1294 }
1295 })
1296
1297 (define_expand "vsx_store_<mode>"
1298 [(set (match_operand:VSX_M 0 "memory_operand")
1299 (match_operand:VSX_M 1 "vsx_register_operand"))]
1300 "VECTOR_MEM_VSX_P (<MODE>mode)"
1301 {
1302 /* Expand to swaps if needed, prior to swap optimization. */
1303 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1304 && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
1305 {
1306 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1307 DONE;
1308 }
1309 })
1310
1311 ;; Load rightmost element from load_data
1312 ;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
1313 (define_insn "vsx_lxvr<wd>x"
1314 [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
1315 (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))]
1316 "TARGET_POWER10"
1317 "lxvr<wd>x %x0,%y1"
1318 [(set_attr "type" "vecload")])
1319
1320 ;; Store rightmost element into store_data
1321 ;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
1322 (define_insn "vsx_stxvr<wd>x"
1323 [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
1324 (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
1325 "TARGET_POWER10"
1326 "stxvr<wd>x %x1,%y0"
1327 [(set_attr "type" "vecstore")])
1328
1329 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1330 ;; when you really want their element-reversing behavior.
1331 (define_insn "vsx_ld_elemrev_v2di"
1332 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1333 (vec_select:V2DI
1334 (match_operand:V2DI 1 "memory_operand" "Z")
1335 (parallel [(const_int 1) (const_int 0)])))]
1336 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1337 "lxvd2x %x0,%y1"
1338 [(set_attr "type" "vecload")])
1339
1340 (define_insn "vsx_ld_elemrev_v1ti"
1341 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1342 (vec_select:V1TI
1343 (match_operand:V1TI 1 "memory_operand" "Z")
1344 (parallel [(const_int 0)])))]
1345 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1346 {
1347 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1348 }
1349 [(set_attr "type" "vecload")])
1350
1351 (define_insn "vsx_ld_elemrev_v2df"
1352 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1353 (vec_select:V2DF
1354 (match_operand:V2DF 1 "memory_operand" "Z")
1355 (parallel [(const_int 1) (const_int 0)])))]
1356 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1357 "lxvd2x %x0,%y1"
1358 [(set_attr "type" "vecload")])
1359
1360 (define_insn "vsx_ld_elemrev_v4si"
1361 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1362 (vec_select:V4SI
1363 (match_operand:V4SI 1 "memory_operand" "Z")
1364 (parallel [(const_int 3) (const_int 2)
1365 (const_int 1) (const_int 0)])))]
1366 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1367 "lxvw4x %x0,%y1"
1368 [(set_attr "type" "vecload")])
1369
1370 (define_insn "vsx_ld_elemrev_v4sf"
1371 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1372 (vec_select:V4SF
1373 (match_operand:V4SF 1 "memory_operand" "Z")
1374 (parallel [(const_int 3) (const_int 2)
1375 (const_int 1) (const_int 0)])))]
1376 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1377 "lxvw4x %x0,%y1"
1378 [(set_attr "type" "vecload")])
1379
1380 (define_expand "vsx_ld_elemrev_v8hi"
1381 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1382 (vec_select:V8HI
1383 (match_operand:V8HI 1 "memory_operand" "Z")
1384 (parallel [(const_int 7) (const_int 6)
1385 (const_int 5) (const_int 4)
1386 (const_int 3) (const_int 2)
1387 (const_int 1) (const_int 0)])))]
1388 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1389 {
1390 if (!TARGET_P9_VECTOR)
1391 {
1392 rtx tmp = gen_reg_rtx (V4SImode);
1393 rtx subreg, subreg2, perm[16], pcv;
1394 /* 2 is leftmost element in register */
1395 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1396 int i;
1397
1398 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1399 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1400 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1401
1402 for (i = 0; i < 16; ++i)
1403 perm[i] = GEN_INT (reorder[i]);
1404
1405 pcv = force_reg (V16QImode,
1406 gen_rtx_CONST_VECTOR (V16QImode,
1407 gen_rtvec_v (16, perm)));
1408 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1409 subreg2, pcv));
1410 DONE;
1411 }
1412 })
1413
1414 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1415 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1416 (vec_select:V8HI
1417 (match_operand:V8HI 1 "memory_operand" "Z")
1418 (parallel [(const_int 7) (const_int 6)
1419 (const_int 5) (const_int 4)
1420 (const_int 3) (const_int 2)
1421 (const_int 1) (const_int 0)])))]
1422 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1423 "lxvh8x %x0,%y1"
1424 [(set_attr "type" "vecload")])
1425
1426 (define_expand "vsx_ld_elemrev_v16qi"
1427 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1428 (vec_select:V16QI
1429 (match_operand:V16QI 1 "memory_operand" "Z")
1430 (parallel [(const_int 15) (const_int 14)
1431 (const_int 13) (const_int 12)
1432 (const_int 11) (const_int 10)
1433 (const_int 9) (const_int 8)
1434 (const_int 7) (const_int 6)
1435 (const_int 5) (const_int 4)
1436 (const_int 3) (const_int 2)
1437 (const_int 1) (const_int 0)])))]
1438 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1439 {
1440 if (!TARGET_P9_VECTOR)
1441 {
1442 rtx tmp = gen_reg_rtx (V4SImode);
1443 rtx subreg, subreg2, perm[16], pcv;
1444 /* 3 is leftmost element in register */
1445 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1446 int i;
1447
1448 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1449 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1450 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1451
1452 for (i = 0; i < 16; ++i)
1453 perm[i] = GEN_INT (reorder[i]);
1454
1455 pcv = force_reg (V16QImode,
1456 gen_rtx_CONST_VECTOR (V16QImode,
1457 gen_rtvec_v (16, perm)));
1458 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1459 subreg2, pcv));
1460 DONE;
1461 }
1462 })
1463
1464 (define_insn "vsx_ld_elemrev_v16qi_internal"
1465 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1466 (vec_select:V16QI
1467 (match_operand:V16QI 1 "memory_operand" "Z")
1468 (parallel [(const_int 15) (const_int 14)
1469 (const_int 13) (const_int 12)
1470 (const_int 11) (const_int 10)
1471 (const_int 9) (const_int 8)
1472 (const_int 7) (const_int 6)
1473 (const_int 5) (const_int 4)
1474 (const_int 3) (const_int 2)
1475 (const_int 1) (const_int 0)])))]
1476 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1477 "lxvb16x %x0,%y1"
1478 [(set_attr "type" "vecload")])
1479
1480 (define_insn "vsx_st_elemrev_v1ti"
1481 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1482 (vec_select:V1TI
1483 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1484 (parallel [(const_int 0)])))
1485 (clobber (match_dup 1))]
1486 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1487 {
1488 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1489 }
1490 [(set_attr "type" "vecstore")])
1491
1492 (define_insn "vsx_st_elemrev_v2df"
1493 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1494 (vec_select:V2DF
1495 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1496 (parallel [(const_int 1) (const_int 0)])))]
1497 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1498 "stxvd2x %x1,%y0"
1499 [(set_attr "type" "vecstore")])
1500
1501 (define_insn "vsx_st_elemrev_v2di"
1502 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1503 (vec_select:V2DI
1504 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1505 (parallel [(const_int 1) (const_int 0)])))]
1506 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1507 "stxvd2x %x1,%y0"
1508 [(set_attr "type" "vecstore")])
1509
1510 (define_insn "vsx_st_elemrev_v4sf"
1511 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1512 (vec_select:V4SF
1513 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1514 (parallel [(const_int 3) (const_int 2)
1515 (const_int 1) (const_int 0)])))]
1516 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1517 "stxvw4x %x1,%y0"
1518 [(set_attr "type" "vecstore")])
1519
1520 (define_insn "vsx_st_elemrev_v4si"
1521 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1522 (vec_select:V4SI
1523 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1524 (parallel [(const_int 3) (const_int 2)
1525 (const_int 1) (const_int 0)])))]
1526 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1527 "stxvw4x %x1,%y0"
1528 [(set_attr "type" "vecstore")])
1529
1530 (define_expand "vsx_st_elemrev_v8hi"
1531 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1532 (vec_select:V8HI
1533 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1534 (parallel [(const_int 7) (const_int 6)
1535 (const_int 5) (const_int 4)
1536 (const_int 3) (const_int 2)
1537 (const_int 1) (const_int 0)])))]
1538 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1539 {
1540 if (!TARGET_P9_VECTOR)
1541 {
1542 rtx mem_subreg, subreg, perm[16], pcv;
1543 rtx tmp = gen_reg_rtx (V8HImode);
1544 /* 2 is leftmost element in register */
1545 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1546 int i;
1547
1548 for (i = 0; i < 16; ++i)
1549 perm[i] = GEN_INT (reorder[i]);
1550
1551 pcv = force_reg (V16QImode,
1552 gen_rtx_CONST_VECTOR (V16QImode,
1553 gen_rtvec_v (16, perm)));
1554 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1555 operands[1], pcv));
1556 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1557 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1558 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1559 DONE;
1560 }
1561 })
1562
1563 (define_insn "*vsx_st_elemrev_v2di_internal"
1564 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1565 (vec_select:V2DI
1566 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1567 (parallel [(const_int 1) (const_int 0)])))]
1568 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1569 "stxvd2x %x1,%y0"
1570 [(set_attr "type" "vecstore")])
1571
1572 (define_insn "*vsx_st_elemrev_v8hi_internal"
1573 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1574 (vec_select:V8HI
1575 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1576 (parallel [(const_int 7) (const_int 6)
1577 (const_int 5) (const_int 4)
1578 (const_int 3) (const_int 2)
1579 (const_int 1) (const_int 0)])))]
1580 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1581 "stxvh8x %x1,%y0"
1582 [(set_attr "type" "vecstore")])
1583
1584 (define_expand "vsx_st_elemrev_v16qi"
1585 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1586 (vec_select:V16QI
1587 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1588 (parallel [(const_int 15) (const_int 14)
1589 (const_int 13) (const_int 12)
1590 (const_int 11) (const_int 10)
1591 (const_int 9) (const_int 8)
1592 (const_int 7) (const_int 6)
1593 (const_int 5) (const_int 4)
1594 (const_int 3) (const_int 2)
1595 (const_int 1) (const_int 0)])))]
1596 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1597 {
1598 if (!TARGET_P9_VECTOR)
1599 {
1600 rtx mem_subreg, subreg, perm[16], pcv;
1601 rtx tmp = gen_reg_rtx (V16QImode);
1602 /* 3 is leftmost element in register */
1603 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1604 int i;
1605
1606 for (i = 0; i < 16; ++i)
1607 perm[i] = GEN_INT (reorder[i]);
1608
1609 pcv = force_reg (V16QImode,
1610 gen_rtx_CONST_VECTOR (V16QImode,
1611 gen_rtvec_v (16, perm)));
1612 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1613 operands[1], pcv));
1614 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1615 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1616 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1617 DONE;
1618 }
1619 })
1620
1621 (define_insn "*vsx_st_elemrev_v16qi_internal"
1622 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1623 (vec_select:V16QI
1624 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1625 (parallel [(const_int 15) (const_int 14)
1626 (const_int 13) (const_int 12)
1627 (const_int 11) (const_int 10)
1628 (const_int 9) (const_int 8)
1629 (const_int 7) (const_int 6)
1630 (const_int 5) (const_int 4)
1631 (const_int 3) (const_int 2)
1632 (const_int 1) (const_int 0)])))]
1633 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1634 "stxvb16x %x1,%y0"
1635 [(set_attr "type" "vecstore")])
1636
1637 \f
1638 ;; VSX vector floating point arithmetic instructions. The VSX scalar
1639 ;; instructions are now combined with the insn for the traditional floating
1640 ;; point unit.
1641 (define_insn "*vsx_add<mode>3"
1642 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1643 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1644 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1645 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1646 "xvadd<sd>p %x0,%x1,%x2"
1647 [(set_attr "type" "<VStype_simple>")])
1648
1649 (define_insn "*vsx_sub<mode>3"
1650 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1651 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1652 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1653 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1654 "xvsub<sd>p %x0,%x1,%x2"
1655 [(set_attr "type" "<VStype_simple>")])
1656
1657 (define_insn "*vsx_mul<mode>3"
1658 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1659 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1660 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1661 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1662 "xvmul<sd>p %x0,%x1,%x2"
1663 [(set_attr "type" "<VStype_simple>")])
1664
1665 ; Emulate vector with scalar for vec_mul in V2DImode
1666 (define_insn_and_split "vsx_mul_v2di"
1667 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1668 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1669 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1670 UNSPEC_VSX_MULSD))]
1671 "VECTOR_MEM_VSX_P (V2DImode)"
1672 "#"
1673 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1674 [(const_int 0)]
1675 {
1676 rtx op0 = operands[0];
1677 rtx op1 = operands[1];
1678 rtx op2 = operands[2];
1679
1680 if (TARGET_POWER10)
1681 emit_insn (gen_mulv2di3 (op0, op1, op2) );
1682
1683 else
1684 {
1685 rtx op3 = gen_reg_rtx (DImode);
1686 rtx op4 = gen_reg_rtx (DImode);
1687 rtx op5 = gen_reg_rtx (DImode);
1688 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1689 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1690 if (TARGET_POWERPC64)
1691 emit_insn (gen_muldi3 (op5, op3, op4));
1692 else
1693 {
1694 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1695 emit_move_insn (op5, ret);
1696 }
1697 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1698 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1699 if (TARGET_POWERPC64)
1700 emit_insn (gen_muldi3 (op3, op3, op4));
1701 else
1702 {
1703 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1704 emit_move_insn (op3, ret);
1705 }
1706 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1707 }
1708 DONE;
1709 }
1710 [(set_attr "type" "mul")])
1711
1712 (define_insn "*vsx_div<mode>3"
1713 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1714 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1715 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1716 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1717 "xvdiv<sd>p %x0,%x1,%x2"
1718 [(set_attr "type" "<VStype_div>")])
1719
1720 ; Emulate vector with scalar for vec_div in V2DImode
1721 (define_insn_and_split "vsx_div_v2di"
1722 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1723 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1724 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1725 UNSPEC_VSX_DIVSD))]
1726 "VECTOR_MEM_VSX_P (V2DImode)"
1727 "#"
1728 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1729 [(const_int 0)]
1730 {
1731 rtx op0 = operands[0];
1732 rtx op1 = operands[1];
1733 rtx op2 = operands[2];
1734 rtx op3 = gen_reg_rtx (DImode);
1735 rtx op4 = gen_reg_rtx (DImode);
1736 rtx op5 = gen_reg_rtx (DImode);
1737 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1738 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1739 if (TARGET_POWERPC64)
1740 emit_insn (gen_divdi3 (op5, op3, op4));
1741 else
1742 {
1743 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1744 rtx target = emit_library_call_value (libfunc,
1745 op5, LCT_NORMAL, DImode,
1746 op3, DImode,
1747 op4, DImode);
1748 emit_move_insn (op5, target);
1749 }
1750 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1751 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1752 if (TARGET_POWERPC64)
1753 emit_insn (gen_divdi3 (op3, op3, op4));
1754 else
1755 {
1756 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1757 rtx target = emit_library_call_value (libfunc,
1758 op3, LCT_NORMAL, DImode,
1759 op3, DImode,
1760 op4, DImode);
1761 emit_move_insn (op3, target);
1762 }
1763 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1764 DONE;
1765 }
1766 [(set_attr "type" "div")])
1767
1768 (define_insn_and_split "vsx_udiv_v2di"
1769 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1770 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1771 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1772 UNSPEC_VSX_DIVUD))]
1773 "VECTOR_MEM_VSX_P (V2DImode)"
1774 "#"
1775 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1776 [(const_int 0)]
1777 {
1778 rtx op0 = operands[0];
1779 rtx op1 = operands[1];
1780 rtx op2 = operands[2];
1781
1782 if (TARGET_POWER10)
1783 emit_insn (gen_udivv2di3 (op0, op1, op2) );
1784 else
1785 {
1786 rtx op3 = gen_reg_rtx (DImode);
1787 rtx op4 = gen_reg_rtx (DImode);
1788 rtx op5 = gen_reg_rtx (DImode);
1789
1790 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1791 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1792
1793 if (TARGET_POWERPC64)
1794 emit_insn (gen_udivdi3 (op5, op3, op4));
1795 else
1796 {
1797 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1798 rtx target = emit_library_call_value (libfunc,
1799 op5, LCT_NORMAL, DImode,
1800 op3, DImode,
1801 op4, DImode);
1802 emit_move_insn (op5, target);
1803 }
1804 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1805 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1806
1807 if (TARGET_POWERPC64)
1808 emit_insn (gen_udivdi3 (op3, op3, op4));
1809 else
1810 {
1811 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1812 rtx target = emit_library_call_value (libfunc,
1813 op3, LCT_NORMAL, DImode,
1814 op3, DImode,
1815 op4, DImode);
1816 emit_move_insn (op3, target);
1817 }
1818 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1819 }
1820 DONE;
1821 }
1822 [(set_attr "type" "div")])
1823
1824 ;; Vector integer signed/unsigned divide
1825 (define_insn "vsx_div_v1ti"
1826 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1827 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1828 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1829 UNSPEC_VSX_DIVSQ))]
1830 "TARGET_POWER10"
1831 "vdivsq %0,%1,%2"
1832 [(set_attr "type" "div")])
1833
1834 (define_insn "vsx_udiv_v1ti"
1835 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1836 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1837 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1838 UNSPEC_VSX_DIVUQ))]
1839 "TARGET_POWER10"
1840 "vdivuq %0,%1,%2"
1841 [(set_attr "type" "div")])
1842
1843 (define_insn "vsx_dives_v1ti"
1844 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1845 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1846 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1847 UNSPEC_VSX_DIVESQ))]
1848 "TARGET_POWER10"
1849 "vdivesq %0,%1,%2"
1850 [(set_attr "type" "div")])
1851
1852 (define_insn "vsx_diveu_v1ti"
1853 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1854 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1855 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1856 UNSPEC_VSX_DIVEUQ))]
1857 "TARGET_POWER10"
1858 "vdiveuq %0,%1,%2"
1859 [(set_attr "type" "div")])
1860
1861 (define_insn "vsx_mods_v1ti"
1862 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1863 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1864 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1865 UNSPEC_VSX_MODSQ))]
1866 "TARGET_POWER10"
1867 "vmodsq %0,%1,%2"
1868 [(set_attr "type" "div")])
1869
1870 (define_insn "vsx_modu_v1ti"
1871 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1872 (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1873 (match_operand:V1TI 2 "vsx_register_operand" "v")]
1874 UNSPEC_VSX_MODUQ))]
1875 "TARGET_POWER10"
1876 "vmoduq %0,%1,%2"
1877 [(set_attr "type" "div")])
1878
1879 ;; *tdiv* instruction returning the FG flag
1880 (define_expand "vsx_tdiv<mode>3_fg"
1881 [(set (match_dup 3)
1882 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1883 (match_operand:VSX_B 2 "vsx_register_operand")]
1884 UNSPEC_VSX_TDIV))
1885 (set (match_operand:SI 0 "gpc_reg_operand")
1886 (gt:SI (match_dup 3)
1887 (const_int 0)))]
1888 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1889 {
1890 operands[3] = gen_reg_rtx (CCFPmode);
1891 })
1892
1893 ;; *tdiv* instruction returning the FE flag
1894 (define_expand "vsx_tdiv<mode>3_fe"
1895 [(set (match_dup 3)
1896 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1897 (match_operand:VSX_B 2 "vsx_register_operand")]
1898 UNSPEC_VSX_TDIV))
1899 (set (match_operand:SI 0 "gpc_reg_operand")
1900 (eq:SI (match_dup 3)
1901 (const_int 0)))]
1902 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1903 {
1904 operands[3] = gen_reg_rtx (CCFPmode);
1905 })
1906
1907 (define_insn "*vsx_tdiv<mode>3_internal"
1908 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1909 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1910 (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1911 UNSPEC_VSX_TDIV))]
1912 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1913 "x<VSv>tdiv<sd>p %0,%x1,%x2"
1914 [(set_attr "type" "<VStype_simple>")])
1915
1916 (define_insn "vsx_fre<mode>2"
1917 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1918 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1919 UNSPEC_FRES))]
1920 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1921 "xvre<sd>p %x0,%x1"
1922 [(set_attr "type" "<VStype_simple>")])
1923
1924 (define_insn "*vsx_neg<mode>2"
1925 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1926 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1927 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1928 "xvneg<sd>p %x0,%x1"
1929 [(set_attr "type" "<VStype_simple>")])
1930
1931 (define_insn "*vsx_abs<mode>2"
1932 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1933 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1934 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1935 "xvabs<sd>p %x0,%x1"
1936 [(set_attr "type" "<VStype_simple>")])
1937
1938 (define_insn "vsx_nabs<mode>2"
1939 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1940 (neg:VSX_F
1941 (abs:VSX_F
1942 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1943 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1944 "xvnabs<sd>p %x0,%x1"
1945 [(set_attr "type" "<VStype_simple>")])
1946
1947 (define_insn "vsx_smax<mode>3"
1948 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1949 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1950 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1951 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1952 "xvmax<sd>p %x0,%x1,%x2"
1953 [(set_attr "type" "<VStype_simple>")])
1954
1955 (define_insn "*vsx_smin<mode>3"
1956 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1957 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1958 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1959 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1960 "xvmin<sd>p %x0,%x1,%x2"
1961 [(set_attr "type" "<VStype_simple>")])
1962
1963 (define_insn "*vsx_sqrt<mode>2"
1964 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1965 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1966 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1967 "xvsqrt<sd>p %x0,%x1"
1968 [(set_attr "type" "<sd>sqrt")])
1969
1970 (define_insn "*vsx_rsqrte<mode>2"
1971 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1972 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1973 UNSPEC_RSQRT))]
1974 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1975 "xvrsqrte<sd>p %x0,%x1"
1976 [(set_attr "type" "<VStype_simple>")])
1977
1978 ;; *tsqrt* returning the fg flag
1979 (define_expand "vsx_tsqrt<mode>2_fg"
1980 [(set (match_dup 2)
1981 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1982 UNSPEC_VSX_TSQRT))
1983 (set (match_operand:SI 0 "gpc_reg_operand")
1984 (gt:SI (match_dup 2)
1985 (const_int 0)))]
1986 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1987 {
1988 operands[2] = gen_reg_rtx (CCFPmode);
1989 })
1990
1991 ;; *tsqrt* returning the fe flag
1992 (define_expand "vsx_tsqrt<mode>2_fe"
1993 [(set (match_dup 2)
1994 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1995 UNSPEC_VSX_TSQRT))
1996 (set (match_operand:SI 0 "gpc_reg_operand")
1997 (eq:SI (match_dup 2)
1998 (const_int 0)))]
1999 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2000 {
2001 operands[2] = gen_reg_rtx (CCFPmode);
2002 })
2003
2004 (define_insn "*vsx_tsqrt<mode>2_internal"
2005 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
2006 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2007 UNSPEC_VSX_TSQRT))]
2008 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2009 "x<VSv>tsqrt<sd>p %0,%x1"
2010 [(set_attr "type" "<VStype_simple>")])
2011
2012 ;; Fused vector multiply/add instructions. Do not generate the Altivec versions
2013 ;; of fma (vmaddfp and vnmsubfp). These instructions allows the target to be a
2014 ;; separate register from the 3 inputs, which can possibly save an extra move
2015 ;; being generated (assuming all registers are AltiVec registers). However,
2016 ;; vmaddfp and vnmsubfp can have different behaviors than the VSX instructions
2017 ;; in some corner cases due to VSCR[NJ] being set or if the addend is +0.0
2018 ;; instead of -0.0.
2019 (define_insn "*vsx_fmav4sf4"
2020 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
2021 (fma:V4SF
2022 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
2023 (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
2024 (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))]
2025 "VECTOR_UNIT_VSX_P (V4SFmode)"
2026 "@
2027 xvmaddasp %x0,%x1,%x2
2028 xvmaddmsp %x0,%x1,%x3"
2029 [(set_attr "type" "vecfloat")])
2030
2031 (define_insn "*vsx_fmav2df4"
2032 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
2033 (fma:V2DF
2034 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
2035 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
2036 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
2037 "VECTOR_UNIT_VSX_P (V2DFmode)"
2038 "@
2039 xvmaddadp %x0,%x1,%x2
2040 xvmaddmdp %x0,%x1,%x3"
2041 [(set_attr "type" "vecdouble")])
2042
2043 (define_insn "*vsx_fms<mode>4"
2044 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
2045 (fma:VSX_F
2046 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
2047 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
2048 (neg:VSX_F
2049 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
2050 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2051 "@
2052 xvmsuba<sd>p %x0,%x1,%x2
2053 xvmsubm<sd>p %x0,%x1,%x3"
2054 [(set_attr "type" "<VStype_mul>")])
2055
2056 (define_insn "*vsx_nfma<mode>4"
2057 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
2058 (neg:VSX_F
2059 (fma:VSX_F
2060 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
2061 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
2062 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
2063 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2064 "@
2065 xvnmadda<sd>p %x0,%x1,%x2
2066 xvnmaddm<sd>p %x0,%x1,%x3"
2067 [(set_attr "type" "<VStype_mul>")])
2068
2069 (define_insn "*vsx_nfmsv4sf4"
2070 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
2071 (neg:V4SF
2072 (fma:V4SF
2073 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
2074 (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
2075 (neg:V4SF
2076 (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))]
2077 "VECTOR_UNIT_VSX_P (V4SFmode)"
2078 "@
2079 xvnmsubasp %x0,%x1,%x2
2080 xvnmsubmsp %x0,%x1,%x3"
2081 [(set_attr "type" "vecfloat")])
2082
2083 (define_insn "*vsx_nfmsv2df4"
2084 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
2085 (neg:V2DF
2086 (fma:V2DF
2087 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
2088 (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
2089 (neg:V2DF
2090 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
2091 "VECTOR_UNIT_VSX_P (V2DFmode)"
2092 "@
2093 xvnmsubadp %x0,%x1,%x2
2094 xvnmsubmdp %x0,%x1,%x3"
2095 [(set_attr "type" "vecdouble")])
2096
2097 ;; Vector conditional expressions (no scalar version for these instructions)
2098 (define_insn "vsx_eq<mode>"
2099 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2100 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2101 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2102 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2103 "xvcmpeq<sd>p %x0,%x1,%x2"
2104 [(set_attr "type" "<VStype_simple>")])
2105
2106 (define_insn "vsx_gt<mode>"
2107 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2108 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2109 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2110 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2111 "xvcmpgt<sd>p %x0,%x1,%x2"
2112 [(set_attr "type" "<VStype_simple>")])
2113
2114 (define_insn "*vsx_ge<mode>"
2115 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2116 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2117 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2118 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2119 "xvcmpge<sd>p %x0,%x1,%x2"
2120 [(set_attr "type" "<VStype_simple>")])
2121
2122 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2123 ;; indicate a combined status
2124 (define_insn "*vsx_eq_<mode>_p"
2125 [(set (reg:CC CR6_REGNO)
2126 (unspec:CC
2127 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2128 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2129 UNSPEC_PREDICATE))
2130 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2131 (eq:VSX_F (match_dup 1)
2132 (match_dup 2)))]
2133 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2134 "xvcmpeq<sd>p. %x0,%x1,%x2"
2135 [(set_attr "type" "<VStype_simple>")])
2136
2137 (define_insn "*vsx_gt_<mode>_p"
2138 [(set (reg:CC CR6_REGNO)
2139 (unspec:CC
2140 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2141 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2142 UNSPEC_PREDICATE))
2143 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2144 (gt:VSX_F (match_dup 1)
2145 (match_dup 2)))]
2146 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2147 "xvcmpgt<sd>p. %x0,%x1,%x2"
2148 [(set_attr "type" "<VStype_simple>")])
2149
2150 ;; xvtlsbb BF,XB
2151 ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte
2152 ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
2153 (define_insn "*xvtlsbb_internal"
2154 [(set (match_operand:CC 0 "cc_reg_operand" "=y")
2155 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
2156 UNSPEC_XVTLSBB))]
2157 "TARGET_POWER10"
2158 "xvtlsbb %0,%x1"
2159 [(set_attr "type" "logical")])
2160
2161 ;; Vector Test Least Significant Bit by Byte
2162 ;; for the implementation of the builtin
2163 ;; __builtin_vec_test_lsbb_all_ones
2164 ;; int vec_test_lsbb_all_ones (vector unsigned char);
2165 ;; and
2166 ;; __builtin_vec_test_lsbb_all_zeros
2167 ;; int vec_test_lsbb_all_zeros (vector unsigned char);
2168 (define_expand "xvtlsbbo"
2169 [(set (match_dup 2)
2170 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2171 UNSPEC_XVTLSBB))
2172 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2173 (lt:SI (match_dup 2) (const_int 0)))]
2174 "TARGET_POWER10"
2175 {
2176 operands[2] = gen_reg_rtx (CCmode);
2177 })
2178 (define_expand "xvtlsbbz"
2179 [(set (match_dup 2)
2180 (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2181 UNSPEC_XVTLSBB))
2182 (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2183 (eq:SI (match_dup 2) (const_int 0)))]
2184 "TARGET_POWER10"
2185 {
2186 operands[2] = gen_reg_rtx (CCmode);
2187 })
2188
2189 (define_insn "*vsx_ge_<mode>_p"
2190 [(set (reg:CC CR6_REGNO)
2191 (unspec:CC
2192 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2193 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2194 UNSPEC_PREDICATE))
2195 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2196 (ge:VSX_F (match_dup 1)
2197 (match_dup 2)))]
2198 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2199 "xvcmpge<sd>p. %x0,%x1,%x2"
2200 [(set_attr "type" "<VStype_simple>")])
2201
2202 ;; Copy sign
2203 (define_insn "vsx_copysign<mode>3"
2204 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2205 (unspec:VSX_F
2206 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2207 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2208 UNSPEC_COPYSIGN))]
2209 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2210 "xvcpsgn<sd>p %x0,%x2,%x1"
2211 [(set_attr "type" "<VStype_simple>")])
2212
2213 ;; For the conversions, limit the register class for the integer value to be
2214 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2215 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2216 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2217 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2218 ;; in allowing virtual registers.
2219 (define_insn "vsx_float<VSi><mode>2"
2220 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2221 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2222 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2223 "xvcvsx<VSc><sd>p %x0,%x1"
2224 [(set_attr "type" "<VStype_simple>")])
2225
2226 (define_insn "vsx_floatuns<VSi><mode>2"
2227 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2228 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2229 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2230 "xvcvux<VSc><sd>p %x0,%x1"
2231 [(set_attr "type" "<VStype_simple>")])
2232
2233 (define_insn "vsx_fix_trunc<mode><VSi>2"
2234 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2235 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2236 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2237 "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2238 [(set_attr "type" "<VStype_simple>")])
2239
2240 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2241 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2242 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2243 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2244 "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2245 [(set_attr "type" "<VStype_simple>")])
2246
2247 ;; Math rounding functions
2248 (define_insn "vsx_x<VSv>r<sd>pi"
2249 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2250 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2251 UNSPEC_VSX_ROUND_I))]
2252 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2253 "x<VSv>r<sd>pi %x0,%x1"
2254 [(set_attr "type" "<VStype_simple>")])
2255
2256 (define_insn "vsx_x<VSv>r<sd>pic"
2257 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2258 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2259 UNSPEC_VSX_ROUND_IC))]
2260 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2261 "x<VSv>r<sd>pic %x0,%x1"
2262 [(set_attr "type" "<VStype_simple>")])
2263
2264 (define_insn "vsx_btrunc<mode>2"
2265 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2266 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2267 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2268 "xvr<sd>piz %x0,%x1"
2269 [(set_attr "type" "<VStype_simple>")])
2270
2271 (define_insn "*vsx_b2trunc<mode>2"
2272 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2273 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2274 UNSPEC_FRIZ))]
2275 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2276 "x<VSv>r<sd>piz %x0,%x1"
2277 [(set_attr "type" "<VStype_simple>")])
2278
2279 (define_insn "vsx_floor<mode>2"
2280 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2281 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2282 UNSPEC_FRIM))]
2283 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2284 "xvr<sd>pim %x0,%x1"
2285 [(set_attr "type" "<VStype_simple>")])
2286
2287 (define_insn "vsx_ceil<mode>2"
2288 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2289 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2290 UNSPEC_FRIP))]
2291 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2292 "xvr<sd>pip %x0,%x1"
2293 [(set_attr "type" "<VStype_simple>")])
2294
2295 \f
2296 ;; VSX convert to/from double vector
2297
2298 ;; Convert between single and double precision
2299 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2300 ;; scalar single precision instructions internally use the double format.
2301 ;; Prefer the altivec registers, since we likely will need to do a vperm
2302 (define_insn "vsx_xscvdpsp"
2303 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2304 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2305 UNSPEC_VSX_CVSPDP))]
2306 "VECTOR_UNIT_VSX_P (DFmode)"
2307 "xscvdpsp %x0,%x1"
2308 [(set_attr "type" "fp")])
2309
2310 (define_insn "vsx_xvcvspdp_be"
2311 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2312 (float_extend:V2DF
2313 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2314 (parallel [(const_int 0) (const_int 2)]))))]
2315 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2316 "xvcvspdp %x0,%x1"
2317 [(set_attr "type" "vecdouble")])
2318
2319 (define_insn "vsx_xvcvspdp_le"
2320 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2321 (float_extend:V2DF
2322 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2323 (parallel [(const_int 1) (const_int 3)]))))]
2324 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2325 "xvcvspdp %x0,%x1"
2326 [(set_attr "type" "vecdouble")])
2327
2328 (define_expand "vsx_xvcvspdp"
2329 [(match_operand:V2DF 0 "vsx_register_operand")
2330 (match_operand:V4SF 1 "vsx_register_operand")]
2331 "VECTOR_UNIT_VSX_P (V4SFmode)"
2332 {
2333 if (BYTES_BIG_ENDIAN)
2334 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2335 else
2336 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2337 DONE;
2338 })
2339
2340 (define_insn "vsx_xvcvdpsp"
2341 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2342 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2343 UNSPEC_VSX_CVSPDP))]
2344 "VECTOR_UNIT_VSX_P (V2DFmode)"
2345 "xvcvdpsp %x0,%x1"
2346 [(set_attr "type" "vecdouble")])
2347
2348 ;; xscvspdp, represent the scalar SF type as V4SF
2349 (define_insn "vsx_xscvspdp"
2350 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2351 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2352 UNSPEC_VSX_CVSPDP))]
2353 "VECTOR_UNIT_VSX_P (V4SFmode)"
2354 "xscvspdp %x0,%x1"
2355 [(set_attr "type" "fp")])
2356
2357 ;; Same as vsx_xscvspdp, but use SF as the type
2358 (define_insn "vsx_xscvspdp_scalar2"
2359 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2360 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2361 UNSPEC_VSX_CVSPDP))]
2362 "VECTOR_UNIT_VSX_P (V4SFmode)"
2363 "xscvspdp %x0,%x1"
2364 [(set_attr "type" "fp")])
2365
2366 ;; Generate xvcvhpsp instruction
2367 (define_insn "vsx_xvcvhpsp"
2368 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2369 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2370 UNSPEC_VSX_CVHPSP))]
2371 "TARGET_P9_VECTOR"
2372 "xvcvhpsp %x0,%x1"
2373 [(set_attr "type" "vecfloat")])
2374
2375 ;; Generate xvcvsphp
2376 (define_insn "vsx_xvcvsphp"
2377 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2378 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2379 UNSPEC_VSX_XVCVSPHP))]
2380 "TARGET_P9_VECTOR"
2381 "xvcvsphp %x0,%x1"
2382 [(set_attr "type" "vecfloat")])
2383
2384 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2385 ;; format of scalars is actually DF.
2386 (define_insn "vsx_xscvdpsp_scalar"
2387 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2388 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2389 UNSPEC_VSX_CVSPDP))]
2390 "VECTOR_UNIT_VSX_P (V4SFmode)"
2391 "xscvdpsp %x0,%x1"
2392 [(set_attr "type" "fp")])
2393
2394 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2395 (define_insn "vsx_xscvdpspn"
2396 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2397 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2398 UNSPEC_VSX_CVDPSPN))]
2399 "TARGET_XSCVDPSPN"
2400 "xscvdpspn %x0,%x1"
2401 [(set_attr "type" "fp")])
2402
2403 (define_insn "vsx_xscvspdpn"
2404 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2405 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2406 UNSPEC_VSX_CVSPDPN))]
2407 "TARGET_XSCVSPDPN"
2408 "xscvspdpn %x0,%x1"
2409 [(set_attr "type" "fp")])
2410
2411 (define_insn "vsx_xscvdpspn_scalar"
2412 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2413 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2414 UNSPEC_VSX_CVDPSPN))]
2415 "TARGET_XSCVDPSPN"
2416 "xscvdpspn %x0,%x1"
2417 [(set_attr "type" "fp")])
2418
2419 ;; Used by direct move to move a SFmode value from GPR to VSX register
2420 (define_insn "vsx_xscvspdpn_directmove"
2421 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2422 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2423 UNSPEC_VSX_CVSPDPN))]
2424 "TARGET_XSCVSPDPN"
2425 "xscvspdpn %x0,%x1"
2426 [(set_attr "type" "fp")])
2427
2428 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2429
2430 (define_insn "vsx_xvcv<su>xwsp"
2431 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2432 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2433 "VECTOR_UNIT_VSX_P (V4SFmode)"
2434 "xvcv<su>xwsp %x0,%x1"
2435 [(set_attr "type" "vecfloat")])
2436
2437 (define_insn "vsx_xvcv<su>xddp"
2438 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2439 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2440 "VECTOR_UNIT_VSX_P (V2DFmode)"
2441 "xvcv<su>xddp %x0,%x1"
2442 [(set_attr "type" "vecdouble")])
2443
2444 (define_insn "vsx_xvcvsp<su>xws"
2445 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2446 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2447 "VECTOR_UNIT_VSX_P (V4SFmode)"
2448 "xvcvsp<su>xws %x0,%x1"
2449 [(set_attr "type" "vecfloat")])
2450
2451 (define_insn "vsx_xvcvdp<su>xds"
2452 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2453 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2454 "VECTOR_UNIT_VSX_P (V2DFmode)"
2455 "xvcvdp<su>xds %x0,%x1"
2456 [(set_attr "type" "vecdouble")])
2457
2458 (define_expand "vsx_xvcvsxddp_scale"
2459 [(match_operand:V2DF 0 "vsx_register_operand")
2460 (match_operand:V2DI 1 "vsx_register_operand")
2461 (match_operand:QI 2 "immediate_operand")]
2462 "VECTOR_UNIT_VSX_P (V2DFmode)"
2463 {
2464 rtx op0 = operands[0];
2465 rtx op1 = operands[1];
2466 int scale = INTVAL(operands[2]);
2467 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2468 if (scale != 0)
2469 rs6000_scale_v2df (op0, op0, -scale);
2470 DONE;
2471 })
2472
2473 (define_expand "vsx_xvcvuxddp_scale"
2474 [(match_operand:V2DF 0 "vsx_register_operand")
2475 (match_operand:V2DI 1 "vsx_register_operand")
2476 (match_operand:QI 2 "immediate_operand")]
2477 "VECTOR_UNIT_VSX_P (V2DFmode)"
2478 {
2479 rtx op0 = operands[0];
2480 rtx op1 = operands[1];
2481 int scale = INTVAL(operands[2]);
2482 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2483 if (scale != 0)
2484 rs6000_scale_v2df (op0, op0, -scale);
2485 DONE;
2486 })
2487
2488 (define_expand "vsx_xvcvdpsxds_scale"
2489 [(match_operand:V2DI 0 "vsx_register_operand")
2490 (match_operand:V2DF 1 "vsx_register_operand")
2491 (match_operand:QI 2 "immediate_operand")]
2492 "VECTOR_UNIT_VSX_P (V2DFmode)"
2493 {
2494 rtx op0 = operands[0];
2495 rtx op1 = operands[1];
2496 rtx tmp;
2497 int scale = INTVAL (operands[2]);
2498 if (scale == 0)
2499 tmp = op1;
2500 else
2501 {
2502 tmp = gen_reg_rtx (V2DFmode);
2503 rs6000_scale_v2df (tmp, op1, scale);
2504 }
2505 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2506 DONE;
2507 })
2508
2509 ;; convert vector of 64-bit floating point numbers to vector of
2510 ;; 64-bit unsigned integer
2511 (define_expand "vsx_xvcvdpuxds_scale"
2512 [(match_operand:V2DI 0 "vsx_register_operand")
2513 (match_operand:V2DF 1 "vsx_register_operand")
2514 (match_operand:QI 2 "immediate_operand")]
2515 "VECTOR_UNIT_VSX_P (V2DFmode)"
2516 {
2517 rtx op0 = operands[0];
2518 rtx op1 = operands[1];
2519 rtx tmp;
2520 int scale = INTVAL (operands[2]);
2521 if (scale == 0)
2522 tmp = op1;
2523 else
2524 {
2525 tmp = gen_reg_rtx (V2DFmode);
2526 rs6000_scale_v2df (tmp, op1, scale);
2527 }
2528 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2529 DONE;
2530 })
2531
2532 ;; Convert from 64-bit to 32-bit types
2533 ;; Note, favor the Altivec registers since the usual use of these instructions
2534 ;; is in vector converts and we need to use the Altivec vperm instruction.
2535
2536 (define_insn "vsx_xvcvdpsxws"
2537 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2538 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2539 UNSPEC_VSX_CVDPSXWS))]
2540 "VECTOR_UNIT_VSX_P (V2DFmode)"
2541 "xvcvdpsxws %x0,%x1"
2542 [(set_attr "type" "vecdouble")])
2543
2544 (define_insn "vsx_xvcvdpuxws"
2545 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2546 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2547 UNSPEC_VSX_CVDPUXWS))]
2548 "VECTOR_UNIT_VSX_P (V2DFmode)"
2549 "xvcvdpuxws %x0,%x1"
2550 [(set_attr "type" "vecdouble")])
2551
2552 (define_insn "vsx_xvcvsxdsp"
2553 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2554 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2555 UNSPEC_VSX_CVSXDSP))]
2556 "VECTOR_UNIT_VSX_P (V2DFmode)"
2557 "xvcvsxdsp %x0,%x1"
2558 [(set_attr "type" "vecfloat")])
2559
2560 (define_insn "vsx_xvcvuxdsp"
2561 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2562 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2563 UNSPEC_VSX_CVUXDSP))]
2564 "VECTOR_UNIT_VSX_P (V2DFmode)"
2565 "xvcvuxdsp %x0,%x1"
2566 [(set_attr "type" "vecdouble")])
2567
2568 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2569 ;; 64-bit floating point numbers.
2570 (define_insn "vsx_xvcv<su>xwdp_be"
2571 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2572 (any_float:V2DF
2573 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2574 (parallel [(const_int 0) (const_int 2)]))))]
2575 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2576 "xvcv<su>xwdp %x0,%x1"
2577 [(set_attr "type" "vecdouble")])
2578
2579 (define_insn "vsx_xvcv<su>xwdp_le"
2580 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2581 (any_float:V2DF
2582 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2583 (parallel [(const_int 1) (const_int 3)]))))]
2584 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2585 "xvcv<su>xwdp %x0,%x1"
2586 [(set_attr "type" "vecdouble")])
2587
2588 (define_expand "vsx_xvcv<su>xwdp"
2589 [(match_operand:V2DF 0 "vsx_register_operand")
2590 (match_operand:V4SI 1 "vsx_register_operand")
2591 (any_float (pc))]
2592 "VECTOR_UNIT_VSX_P (V2DFmode)"
2593 {
2594 if (BYTES_BIG_ENDIAN)
2595 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2596 else
2597 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2598 DONE;
2599 })
2600
2601 (define_insn "vsx_xvcvsxwdp_df"
2602 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2603 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2604 UNSPEC_VSX_CVSXWDP))]
2605 "TARGET_VSX"
2606 "xvcvsxwdp %x0,%x1"
2607 [(set_attr "type" "vecdouble")])
2608
2609 (define_insn "vsx_xvcvuxwdp_df"
2610 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2611 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2612 UNSPEC_VSX_CVUXWDP))]
2613 "TARGET_VSX"
2614 "xvcvuxwdp %x0,%x1"
2615 [(set_attr "type" "vecdouble")])
2616
2617 ;; Convert vector of 32-bit floating point numbers to vector of
2618 ;; 64-bit signed/unsigned integers.
2619 (define_insn "vsx_xvcvsp<su>xds_be"
2620 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2621 (any_fix:V2DI
2622 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2623 (parallel [(const_int 0) (const_int 2)]))))]
2624 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2625 "xvcvsp<su>xds %x0,%x1"
2626 [(set_attr "type" "vecdouble")])
2627
2628 (define_insn "vsx_xvcvsp<su>xds_le"
2629 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2630 (any_fix:V2DI
2631 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2632 (parallel [(const_int 1) (const_int 3)]))))]
2633 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2634 "xvcvsp<su>xds %x0,%x1"
2635 [(set_attr "type" "vecdouble")])
2636
2637 (define_expand "vsx_xvcvsp<su>xds"
2638 [(match_operand:V2DI 0 "vsx_register_operand")
2639 (match_operand:V4SF 1 "vsx_register_operand")
2640 (any_fix (pc))]
2641 "VECTOR_UNIT_VSX_P (V2DFmode)"
2642 {
2643 if (BYTES_BIG_ENDIAN)
2644 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2645 else
2646 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2647 DONE;
2648 })
2649
2650 ;; Generate float2 double
2651 ;; convert two double to float
2652 (define_expand "float2_v2df"
2653 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2654 (use (match_operand:V2DF 1 "register_operand" "wa"))
2655 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2656 "VECTOR_UNIT_VSX_P (V4SFmode)"
2657 {
2658 rtx rtx_src1, rtx_src2, rtx_dst;
2659
2660 rtx_dst = operands[0];
2661 rtx_src1 = operands[1];
2662 rtx_src2 = operands[2];
2663
2664 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2665 DONE;
2666 })
2667
2668 ;; Generate float2
2669 ;; convert two long long signed ints to float
2670 (define_expand "float2_v2di"
2671 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2672 (use (match_operand:V2DI 1 "register_operand" "wa"))
2673 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2674 "VECTOR_UNIT_VSX_P (V4SFmode)"
2675 {
2676 rtx rtx_src1, rtx_src2, rtx_dst;
2677
2678 rtx_dst = operands[0];
2679 rtx_src1 = operands[1];
2680 rtx_src2 = operands[2];
2681
2682 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2683 DONE;
2684 })
2685
2686 ;; Generate uns_float2
2687 ;; convert two long long unsigned ints to float
2688 (define_expand "uns_float2_v2di"
2689 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2690 (use (match_operand:V2DI 1 "register_operand" "wa"))
2691 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2692 "VECTOR_UNIT_VSX_P (V4SFmode)"
2693 {
2694 rtx rtx_src1, rtx_src2, rtx_dst;
2695
2696 rtx_dst = operands[0];
2697 rtx_src1 = operands[1];
2698 rtx_src2 = operands[2];
2699
2700 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2701 DONE;
2702 })
2703
2704 ;; Generate floate
2705 ;; convert double or long long signed to float
2706 ;; (Only even words are valid, BE numbering)
2707 (define_expand "floate<mode>"
2708 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2709 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2710 "VECTOR_UNIT_VSX_P (V4SFmode)"
2711 {
2712 if (BYTES_BIG_ENDIAN)
2713 {
2714 /* Shift left one word to put even word correct location */
2715 rtx rtx_tmp;
2716 rtx rtx_val = GEN_INT (4);
2717
2718 rtx_tmp = gen_reg_rtx (V4SFmode);
2719 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2720 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2721 rtx_tmp, rtx_tmp, rtx_val));
2722 }
2723 else
2724 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2725
2726 DONE;
2727 })
2728
2729 ;; Generate uns_floate
2730 ;; convert long long unsigned to float
2731 ;; (Only even words are valid, BE numbering)
2732 (define_expand "unsfloatev2di"
2733 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2734 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2735 "VECTOR_UNIT_VSX_P (V4SFmode)"
2736 {
2737 if (BYTES_BIG_ENDIAN)
2738 {
2739 /* Shift left one word to put even word correct location */
2740 rtx rtx_tmp;
2741 rtx rtx_val = GEN_INT (4);
2742
2743 rtx_tmp = gen_reg_rtx (V4SFmode);
2744 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2745 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2746 rtx_tmp, rtx_tmp, rtx_val));
2747 }
2748 else
2749 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2750
2751 DONE;
2752 })
2753
2754 ;; Generate floato
2755 ;; convert double or long long signed to float
2756 ;; Only odd words are valid, BE numbering)
2757 (define_expand "floato<mode>"
2758 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2759 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2760 "VECTOR_UNIT_VSX_P (V4SFmode)"
2761 {
2762 if (BYTES_BIG_ENDIAN)
2763 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2764 else
2765 {
2766 /* Shift left one word to put odd word correct location */
2767 rtx rtx_tmp;
2768 rtx rtx_val = GEN_INT (4);
2769
2770 rtx_tmp = gen_reg_rtx (V4SFmode);
2771 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2772 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2773 rtx_tmp, rtx_tmp, rtx_val));
2774 }
2775 DONE;
2776 })
2777
2778 ;; Generate uns_floato
2779 ;; convert long long unsigned to float
2780 ;; (Only odd words are valid, BE numbering)
2781 (define_expand "unsfloatov2di"
2782 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2783 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2784 "VECTOR_UNIT_VSX_P (V4SFmode)"
2785 {
2786 if (BYTES_BIG_ENDIAN)
2787 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2788 else
2789 {
2790 /* Shift left one word to put odd word correct location */
2791 rtx rtx_tmp;
2792 rtx rtx_val = GEN_INT (4);
2793
2794 rtx_tmp = gen_reg_rtx (V4SFmode);
2795 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2796 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2797 rtx_tmp, rtx_tmp, rtx_val));
2798 }
2799 DONE;
2800 })
2801
2802 ;; Generate vsigned2
2803 ;; convert two double float vectors to a vector of single precision ints
2804 (define_expand "vsigned2_v2df"
2805 [(match_operand:V4SI 0 "register_operand" "=wa")
2806 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2807 (match_operand:V2DF 2 "register_operand" "wa")]
2808 UNSPEC_VSX_VSIGNED2)]
2809 "TARGET_VSX"
2810 {
2811 rtx rtx_src1, rtx_src2, rtx_dst;
2812 bool signed_convert=true;
2813
2814 rtx_dst = operands[0];
2815 rtx_src1 = operands[1];
2816 rtx_src2 = operands[2];
2817
2818 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2819 DONE;
2820 })
2821
2822 ;; Generate vsignedo_v2df
2823 ;; signed double float to int convert odd word
2824 (define_expand "vsignedo_v2df"
2825 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2826 (match_operand:V2DF 1 "register_operand" "wa"))]
2827 "TARGET_VSX"
2828 {
2829 if (BYTES_BIG_ENDIAN)
2830 {
2831 rtx rtx_tmp;
2832 rtx rtx_val = GEN_INT (12);
2833 rtx_tmp = gen_reg_rtx (V4SImode);
2834
2835 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2836
2837 /* Big endian word numbering for words in operand is 0 1 2 3.
2838 take (operand[1] operand[1]) and shift left one word
2839 0 1 2 3 0 1 2 3 => 1 2 3 0
2840 Words 1 and 3 are now are now where they need to be for result. */
2841
2842 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2843 rtx_tmp, rtx_val));
2844 }
2845 else
2846 /* Little endian word numbering for operand is 3 2 1 0.
2847 Result words 3 and 1 are where they need to be. */
2848 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2849
2850 DONE;
2851 }
2852 [(set_attr "type" "veccomplex")])
2853
2854 ;; Generate vsignede_v2df
2855 ;; signed double float to int even word
2856 (define_expand "vsignede_v2df"
2857 [(set (match_operand:V4SI 0 "register_operand" "=v")
2858 (match_operand:V2DF 1 "register_operand" "v"))]
2859 "TARGET_VSX"
2860 {
2861 if (BYTES_BIG_ENDIAN)
2862 /* Big endian word numbering for words in operand is 0 1
2863 Result words 0 is where they need to be. */
2864 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2865
2866 else
2867 {
2868 rtx rtx_tmp;
2869 rtx rtx_val = GEN_INT (12);
2870 rtx_tmp = gen_reg_rtx (V4SImode);
2871
2872 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2873
2874 /* Little endian word numbering for operand is 3 2 1 0.
2875 take (operand[1] operand[1]) and shift left three words
2876 0 1 2 3 0 1 2 3 => 3 0 1 2
2877 Words 0 and 2 are now where they need to be for the result. */
2878 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2879 rtx_tmp, rtx_val));
2880 }
2881 DONE;
2882 }
2883 [(set_attr "type" "veccomplex")])
2884
2885 ;; Generate unsigned2
2886 ;; convert two double float vectors to a vector of single precision
2887 ;; unsigned ints
2888 (define_expand "vunsigned2_v2df"
2889 [(match_operand:V4SI 0 "register_operand" "=v")
2890 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2891 (match_operand:V2DF 2 "register_operand" "v")]
2892 UNSPEC_VSX_VSIGNED2)]
2893 "TARGET_VSX"
2894 {
2895 rtx rtx_src1, rtx_src2, rtx_dst;
2896 bool signed_convert=false;
2897
2898 rtx_dst = operands[0];
2899 rtx_src1 = operands[1];
2900 rtx_src2 = operands[2];
2901
2902 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2903 DONE;
2904 })
2905
2906 ;; Generate vunsignedo_v2df
2907 ;; unsigned double float to int convert odd word
2908 (define_expand "vunsignedo_v2df"
2909 [(set (match_operand:V4SI 0 "register_operand" "=v")
2910 (match_operand:V2DF 1 "register_operand" "v"))]
2911 "TARGET_VSX"
2912 {
2913 if (BYTES_BIG_ENDIAN)
2914 {
2915 rtx rtx_tmp;
2916 rtx rtx_val = GEN_INT (12);
2917 rtx_tmp = gen_reg_rtx (V4SImode);
2918
2919 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2920
2921 /* Big endian word numbering for words in operand is 0 1 2 3.
2922 take (operand[1] operand[1]) and shift left one word
2923 0 1 2 3 0 1 2 3 => 1 2 3 0
2924 Words 1 and 3 are now are now where they need to be for result. */
2925
2926 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2927 rtx_tmp, rtx_val));
2928 }
2929 else
2930 /* Little endian word numbering for operand is 3 2 1 0.
2931 Result words 3 and 1 are where they need to be. */
2932 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2933
2934 DONE;
2935 }
2936 [(set_attr "type" "veccomplex")])
2937
2938 ;; Generate vunsignede_v2df
2939 ;; unsigned double float to int even word
2940 (define_expand "vunsignede_v2df"
2941 [(set (match_operand:V4SI 0 "register_operand" "=v")
2942 (match_operand:V2DF 1 "register_operand" "v"))]
2943 "TARGET_VSX"
2944 {
2945 if (BYTES_BIG_ENDIAN)
2946 /* Big endian word numbering for words in operand is 0 1
2947 Result words 0 is where they need to be. */
2948 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2949
2950 else
2951 {
2952 rtx rtx_tmp;
2953 rtx rtx_val = GEN_INT (12);
2954 rtx_tmp = gen_reg_rtx (V4SImode);
2955
2956 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2957
2958 /* Little endian word numbering for operand is 3 2 1 0.
2959 take (operand[1] operand[1]) and shift left three words
2960 0 1 2 3 0 1 2 3 => 3 0 1 2
2961 Words 0 and 2 are now where they need to be for the result. */
2962 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2963 rtx_tmp, rtx_val));
2964 }
2965 DONE;
2966 }
2967 [(set_attr "type" "veccomplex")])
2968
2969 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2970 ;; since the xvrdpiz instruction does not truncate the value if the floating
2971 ;; point value is < LONG_MIN or > LONG_MAX.
2972 (define_insn "*vsx_float_fix_v2df2"
2973 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2974 (float:V2DF
2975 (fix:V2DI
2976 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2977 "TARGET_HARD_FLOAT
2978 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2979 && !flag_trapping_math && TARGET_FRIZ"
2980 "xvrdpiz %x0,%x1"
2981 [(set_attr "type" "vecdouble")])
2982
2983 \f
2984 ;; Permute operations
2985
2986 ;; Build a V2DF/V2DI vector from two scalars
2987 (define_insn "vsx_concat_<mode>"
2988 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2989 (vec_concat:VSX_D
2990 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")
2991 (match_operand:<VEC_base> 2 "gpc_reg_operand" "wa,b")))]
2992 "VECTOR_MEM_VSX_P (<MODE>mode)"
2993 {
2994 if (which_alternative == 0)
2995 return (BYTES_BIG_ENDIAN
2996 ? "xxpermdi %x0,%x1,%x2,0"
2997 : "xxpermdi %x0,%x2,%x1,0");
2998
2999 else if (which_alternative == 1)
3000 return (BYTES_BIG_ENDIAN
3001 ? "mtvsrdd %x0,%1,%2"
3002 : "mtvsrdd %x0,%2,%1");
3003
3004 else
3005 gcc_unreachable ();
3006 }
3007 [(set_attr "type" "vecperm,vecmove")])
3008
3009 ;; Combiner patterns to allow creating XXPERMDI's to access either double
3010 ;; word element in a vector register.
3011 (define_insn "*vsx_concat_<mode>_1"
3012 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3013 (vec_concat:VSX_D
3014 (vec_select:<VEC_base>
3015 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
3016 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
3017 (match_operand:<VEC_base> 3 "gpc_reg_operand" "wa")))]
3018 "VECTOR_MEM_VSX_P (<MODE>mode)"
3019 {
3020 HOST_WIDE_INT dword = INTVAL (operands[2]);
3021 if (BYTES_BIG_ENDIAN)
3022 {
3023 operands[4] = GEN_INT (2*dword);
3024 return "xxpermdi %x0,%x1,%x3,%4";
3025 }
3026 else
3027 {
3028 operands[4] = GEN_INT (!dword);
3029 return "xxpermdi %x0,%x3,%x1,%4";
3030 }
3031 }
3032 [(set_attr "type" "vecperm")])
3033
3034 (define_insn "*vsx_concat_<mode>_2"
3035 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3036 (vec_concat:VSX_D
3037 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa")
3038 (vec_select:<VEC_base>
3039 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
3040 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
3041 "VECTOR_MEM_VSX_P (<MODE>mode)"
3042 {
3043 HOST_WIDE_INT dword = INTVAL (operands[3]);
3044 if (BYTES_BIG_ENDIAN)
3045 {
3046 operands[4] = GEN_INT (dword);
3047 return "xxpermdi %x0,%x1,%x2,%4";
3048 }
3049 else
3050 {
3051 operands[4] = GEN_INT (2 * !dword);
3052 return "xxpermdi %x0,%x2,%x1,%4";
3053 }
3054 }
3055 [(set_attr "type" "vecperm")])
3056
3057 (define_insn "*vsx_concat_<mode>_3"
3058 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3059 (vec_concat:VSX_D
3060 (vec_select:<VEC_base>
3061 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
3062 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
3063 (vec_select:<VEC_base>
3064 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
3065 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
3066 "VECTOR_MEM_VSX_P (<MODE>mode)"
3067 {
3068 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
3069 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
3070 if (BYTES_BIG_ENDIAN)
3071 {
3072 operands[5] = GEN_INT ((2 * dword1) + dword2);
3073 return "xxpermdi %x0,%x1,%x3,%5";
3074 }
3075 else
3076 {
3077 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
3078 return "xxpermdi %x0,%x3,%x1,%5";
3079 }
3080 }
3081 [(set_attr "type" "vecperm")])
3082
3083 ;; Special purpose concat using xxpermdi to glue two single precision values
3084 ;; together, relying on the fact that internally scalar floats are represented
3085 ;; as doubles. This is used to initialize a V4SF vector with 4 floats
3086 (define_insn "vsx_concat_v2sf"
3087 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
3088 (unspec:V2DF
3089 [(match_operand:SF 1 "vsx_register_operand" "wa")
3090 (match_operand:SF 2 "vsx_register_operand" "wa")]
3091 UNSPEC_VSX_CONCAT))]
3092 "VECTOR_MEM_VSX_P (V2DFmode)"
3093 {
3094 if (BYTES_BIG_ENDIAN)
3095 return "xxpermdi %x0,%x1,%x2,0";
3096 else
3097 return "xxpermdi %x0,%x2,%x1,0";
3098 }
3099 [(set_attr "type" "vecperm")])
3100
3101 ;; Concatenate 4 SImode elements into a V4SImode reg.
3102 (define_expand "vsx_init_v4si"
3103 [(use (match_operand:V4SI 0 "gpc_reg_operand"))
3104 (use (match_operand:SI 1 "gpc_reg_operand"))
3105 (use (match_operand:SI 2 "gpc_reg_operand"))
3106 (use (match_operand:SI 3 "gpc_reg_operand"))
3107 (use (match_operand:SI 4 "gpc_reg_operand"))]
3108 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3109 {
3110 rtx a = gen_lowpart_SUBREG (DImode, operands[1]);
3111 rtx b = gen_lowpart_SUBREG (DImode, operands[2]);
3112 rtx c = gen_lowpart_SUBREG (DImode, operands[3]);
3113 rtx d = gen_lowpart_SUBREG (DImode, operands[4]);
3114 if (!BYTES_BIG_ENDIAN)
3115 {
3116 std::swap (a, b);
3117 std::swap (c, d);
3118 }
3119
3120 rtx ab = gen_reg_rtx (DImode);
3121 rtx cd = gen_reg_rtx (DImode);
3122 emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b,
3123 GEN_INT (0xffffffff)));
3124 emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d,
3125 GEN_INT (0xffffffff)));
3126
3127 rtx abcd = gen_reg_rtx (V2DImode);
3128 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
3129 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
3130 DONE;
3131 })
3132
3133 ;; xxpermdi for little endian loads and stores. We need several of
3134 ;; these since the form of the PARALLEL differs by mode.
3135 (define_insn "*vsx_xxpermdi2_le_<mode>"
3136 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3137 (vec_select:VSX_D
3138 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3139 (parallel [(const_int 1) (const_int 0)])))]
3140 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3141 "xxpermdi %x0,%x1,%x1,2"
3142 [(set_attr "type" "vecperm")])
3143
3144 (define_insn "xxswapd_v16qi"
3145 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3146 (vec_select:V16QI
3147 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3148 (parallel [(const_int 8) (const_int 9)
3149 (const_int 10) (const_int 11)
3150 (const_int 12) (const_int 13)
3151 (const_int 14) (const_int 15)
3152 (const_int 0) (const_int 1)
3153 (const_int 2) (const_int 3)
3154 (const_int 4) (const_int 5)
3155 (const_int 6) (const_int 7)])))]
3156 "TARGET_VSX"
3157 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3158 ;; mnemonic xxpermdi instead.
3159 "xxpermdi %x0,%x1,%x1,2"
3160 [(set_attr "type" "vecperm")])
3161
3162 (define_insn "xxswapd_v8hi"
3163 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3164 (vec_select:V8HI
3165 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3166 (parallel [(const_int 4) (const_int 5)
3167 (const_int 6) (const_int 7)
3168 (const_int 0) (const_int 1)
3169 (const_int 2) (const_int 3)])))]
3170 "TARGET_VSX"
3171 ;; AIX does not support the extended mnemonic xxswapd. Use the basic
3172 ;; mnemonic xxpermdi instead.
3173 "xxpermdi %x0,%x1,%x1,2"
3174 [(set_attr "type" "vecperm")])
3175
3176 (define_insn "xxswapd_<mode>"
3177 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3178 (vec_select:VSX_W
3179 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3180 (parallel [(const_int 2) (const_int 3)
3181 (const_int 0) (const_int 1)])))]
3182 "TARGET_VSX"
3183 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3184 ;; mnemonic xxpermdi instead.
3185 "xxpermdi %x0,%x1,%x1,2"
3186 [(set_attr "type" "vecperm")])
3187
3188 (define_insn "xxswapd_<mode>"
3189 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3190 (vec_select:VSX_D
3191 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3192 (parallel [(const_int 1) (const_int 0)])))]
3193 "TARGET_VSX"
3194 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3195 ;; mnemonic xxpermdi instead.
3196 "xxpermdi %x0,%x1,%x1,2"
3197 [(set_attr "type" "vecperm")])
3198
3199 ;; Swap upper/lower 64-bit values in a 128-bit vector
3200 (define_insn "xxswapd_v1ti"
3201 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
3202 (subreg:V1TI
3203 (vec_select:V2DI
3204 (subreg:V2DI
3205 (match_operand:V1TI 1 "vsx_register_operand" "v") 0 )
3206 (parallel [(const_int 1)(const_int 0)]))
3207 0))]
3208 "TARGET_POWER10"
3209 ;; AIX does not support extended mnemonic xxswapd. Use the basic
3210 ;; mnemonic xxpermdi instead.
3211 "xxpermdi %x0,%x1,%x1,2"
3212 [(set_attr "type" "vecperm")])
3213
3214 (define_insn "xxgenpcvm_<mode>_internal"
3215 [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3216 (unspec:VSX_EXTRACT_I4
3217 [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3218 (match_operand:QI 2 "const_0_to_3_operand" "n")]
3219 UNSPEC_XXGENPCV))]
3220 "TARGET_POWER10"
3221 "xxgenpcv<wd>m %x0,%1,%2"
3222 [(set_attr "type" "vecsimple")])
3223
3224 (define_expand "xxgenpcvm_<mode>"
3225 [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3226 (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3227 (use (match_operand:QI 2 "immediate_operand"))]
3228 "TARGET_POWER10"
3229 {
3230 if (!BYTES_BIG_ENDIAN)
3231 {
3232 /* gen_xxgenpcvm assumes Big Endian order. If LE,
3233 change swap upper and lower double words. */
3234 rtx tmp = gen_reg_rtx (<MODE>mode);
3235
3236 emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3237 operands[1] = tmp;
3238 }
3239 emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3240 operands[2]));
3241 DONE;
3242 })
3243
3244 ;; lxvd2x for little endian loads. We need several of
3245 ;; these since the form of the PARALLEL differs by mode.
3246 (define_insn "*vsx_lxvd2x2_le_<mode>"
3247 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3248 (vec_select:VSX_D
3249 (match_operand:VSX_D 1 "memory_operand" "Z")
3250 (parallel [(const_int 1) (const_int 0)])))]
3251 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3252 "lxvd2x %x0,%y1"
3253 [(set_attr "type" "vecload")])
3254
3255 (define_insn "*vsx_lxvd2x4_le_<mode>"
3256 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3257 (vec_select:VSX_W
3258 (match_operand:VSX_W 1 "memory_operand" "Z")
3259 (parallel [(const_int 2) (const_int 3)
3260 (const_int 0) (const_int 1)])))]
3261 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3262 "lxvd2x %x0,%y1"
3263 [(set_attr "type" "vecload")])
3264
3265 (define_insn "*vsx_lxvd2x8_le_V8HI"
3266 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3267 (vec_select:V8HI
3268 (match_operand:V8HI 1 "memory_operand" "Z")
3269 (parallel [(const_int 4) (const_int 5)
3270 (const_int 6) (const_int 7)
3271 (const_int 0) (const_int 1)
3272 (const_int 2) (const_int 3)])))]
3273 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3274 "lxvd2x %x0,%y1"
3275 [(set_attr "type" "vecload")])
3276
3277 (define_insn "*vsx_lxvd2x16_le_V16QI"
3278 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3279 (vec_select:V16QI
3280 (match_operand:V16QI 1 "memory_operand" "Z")
3281 (parallel [(const_int 8) (const_int 9)
3282 (const_int 10) (const_int 11)
3283 (const_int 12) (const_int 13)
3284 (const_int 14) (const_int 15)
3285 (const_int 0) (const_int 1)
3286 (const_int 2) (const_int 3)
3287 (const_int 4) (const_int 5)
3288 (const_int 6) (const_int 7)])))]
3289 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3290 "lxvd2x %x0,%y1"
3291 [(set_attr "type" "vecload")])
3292
3293 ;; stxvd2x for little endian stores. We need several of
3294 ;; these since the form of the PARALLEL differs by mode.
3295 (define_insn "*vsx_stxvd2x2_le_<mode>"
3296 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3297 (vec_select:VSX_D
3298 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3299 (parallel [(const_int 1) (const_int 0)])))]
3300 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3301 "stxvd2x %x1,%y0"
3302 [(set_attr "type" "vecstore")])
3303
3304 (define_insn "*vsx_stxvd2x4_le_<mode>"
3305 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3306 (vec_select:VSX_W
3307 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3308 (parallel [(const_int 2) (const_int 3)
3309 (const_int 0) (const_int 1)])))]
3310 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3311 "stxvd2x %x1,%y0"
3312 [(set_attr "type" "vecstore")])
3313
3314 (define_insn "*vsx_stxvd2x8_le_V8HI"
3315 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3316 (vec_select:V8HI
3317 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3318 (parallel [(const_int 4) (const_int 5)
3319 (const_int 6) (const_int 7)
3320 (const_int 0) (const_int 1)
3321 (const_int 2) (const_int 3)])))]
3322 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3323 "stxvd2x %x1,%y0"
3324 [(set_attr "type" "vecstore")])
3325
3326 (define_insn "*vsx_stxvd2x16_le_V16QI"
3327 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3328 (vec_select:V16QI
3329 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3330 (parallel [(const_int 8) (const_int 9)
3331 (const_int 10) (const_int 11)
3332 (const_int 12) (const_int 13)
3333 (const_int 14) (const_int 15)
3334 (const_int 0) (const_int 1)
3335 (const_int 2) (const_int 3)
3336 (const_int 4) (const_int 5)
3337 (const_int 6) (const_int 7)])))]
3338 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3339 "stxvd2x %x1,%y0"
3340 [(set_attr "type" "vecstore")])
3341
3342 ;; Convert a TImode value into V1TImode
3343 (define_expand "vsx_set_v1ti"
3344 [(match_operand:V1TI 0 "nonimmediate_operand")
3345 (match_operand:V1TI 1 "nonimmediate_operand")
3346 (match_operand:TI 2 "input_operand")
3347 (match_operand:QI 3 "u5bit_cint_operand")]
3348 "VECTOR_MEM_VSX_P (V1TImode)"
3349 {
3350 if (operands[3] != const0_rtx)
3351 gcc_unreachable ();
3352
3353 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3354 DONE;
3355 })
3356
3357 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3358 (define_expand "vsx_set_<mode>"
3359 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3360 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3361 (use (match_operand:<VEC_base> 2 "gpc_reg_operand"))
3362 (use (match_operand:QI 3 "const_0_to_1_operand"))]
3363 "VECTOR_MEM_VSX_P (<MODE>mode)"
3364 {
3365 rtx dest = operands[0];
3366 rtx vec_reg = operands[1];
3367 rtx value = operands[2];
3368 rtx ele = operands[3];
3369 rtx tmp = gen_reg_rtx (<VEC_base>mode);
3370
3371 if (ele == const0_rtx)
3372 {
3373 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3374 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3375 DONE;
3376 }
3377 else if (ele == const1_rtx)
3378 {
3379 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3380 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3381 DONE;
3382 }
3383 else
3384 gcc_unreachable ();
3385 })
3386
3387 ;; Extract a DF/DI element from V2DF/V2DI
3388 ;; Optimize cases were we can do a simple or direct move.
3389 ;; Or see if we can avoid doing the move at all
3390
3391 (define_expand "vsx_extract_<mode>"
3392 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
3393 (vec_select:<VEC_base>
3394 (match_operand:VSX_D 1 "gpc_reg_operand")
3395 (parallel
3396 [(match_operand:QI 2 "const_0_to_1_operand")])))]
3397 "VECTOR_MEM_VSX_P (<MODE>mode)"
3398 "")
3399
3400 (define_insn "*vsx_extract_<mode>_0"
3401 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wa,wr")
3402 (vec_select:<VEC_base>
3403 (match_operand:VSX_D 1 "gpc_reg_operand" "0,wa,wa")
3404 (parallel
3405 [(match_operand:QI 2 "const_0_to_1_operand" "n,n,n")])))]
3406 "VECTOR_MEM_VSX_P (<MODE>mode)
3407 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)"
3408 {
3409 if (which_alternative == 0)
3410 return ASM_COMMENT_START " vec_extract to same register";
3411
3412 if (which_alternative == 2)
3413 return "mfvsrd %0,%x1";
3414
3415 return "xxlor %x0,%x1,%x1";
3416 }
3417 [(set_attr "type" "*,veclogical,mfvsr")
3418 (set_attr "isa" "*,*,p8v")
3419 (set_attr "length" "0,*,*")])
3420
3421 (define_insn "*vsx_extract_<mode>_1"
3422 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wr")
3423 (vec_select:<VEC_base>
3424 (match_operand:VSX_D 1 "gpc_reg_operand" "wa,wa")
3425 (parallel
3426 [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))]
3427 "VECTOR_MEM_VSX_P (<MODE>mode)
3428 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 0)"
3429 {
3430 if (which_alternative == 1)
3431 return "mfvsrld %0,%x1";
3432
3433 operands[3] = GEN_INT (BYTES_BIG_ENDIAN ? 2 : 3);
3434 return "xxpermdi %x0,%x1,%x1,%3";
3435 }
3436 [(set_attr "type" "mfvsr,vecperm")
3437 (set_attr "isa" "*,p9v")])
3438
3439 ;; Optimize extracting a single scalar element from memory.
3440 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3441 [(set (match_operand:<VEC_base> 0 "register_operand" "=wa,wr")
3442 (vec_select:<VSX_D:VEC_base>
3443 (match_operand:VSX_D 1 "memory_operand" "m,m")
3444 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3445 (clobber (match_scratch:P 3 "=&b,&b"))]
3446 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3447 "#"
3448 "&& reload_completed"
3449 [(set (match_dup 0) (match_dup 4))]
3450 {
3451 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3452 operands[3], <VSX_D:VEC_base>mode);
3453 }
3454 [(set_attr "type" "fpload,load")
3455 (set_attr "length" "8")])
3456
3457 ;; Optimize storing a single scalar element that is the right location to
3458 ;; memory
3459 (define_insn "*vsx_extract_<mode>_store"
3460 [(set (match_operand:<VEC_base> 0 "memory_operand" "=m,Z,wY")
3461 (vec_select:<VEC_base>
3462 (match_operand:VSX_D 1 "register_operand" "d,v,v")
3463 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "n,n,n")])))]
3464 "VECTOR_MEM_VSX_P (<MODE>mode)
3465 && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)"
3466 "@
3467 stfd%U0%X0 %1,%0
3468 stxsdx %x1,%y0
3469 stxsd %1,%0"
3470 [(set_attr "type" "fpstore")
3471 (set_attr "isa" "*,p7v,p9v")])
3472
3473 ;; Variable V2DI/V2DF extract shift
3474 (define_insn "vsx_vslo_<mode>"
3475 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v")
3476 (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3477 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3478 UNSPEC_VSX_VSLO))]
3479 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3480 "vslo %0,%1,%2"
3481 [(set_attr "type" "vecperm")])
3482
3483 ;; Variable V2DI/V2DF extract from a register
3484 (define_insn_and_split "vsx_extract_<mode>_var"
3485 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v")
3486 (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3487 (match_operand:DI 2 "gpc_reg_operand" "r")]
3488 UNSPEC_VSX_EXTRACT))
3489 (clobber (match_scratch:DI 3 "=r"))
3490 (clobber (match_scratch:V2DI 4 "=&v"))]
3491 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3492 "#"
3493 "&& reload_completed"
3494 [(const_int 0)]
3495 {
3496 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3497 operands[3], operands[4]);
3498 DONE;
3499 })
3500
3501 ;; Variable V2DI/V2DF extract from memory
3502 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3503 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,r")
3504 (unspec:<VEC_base> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3505 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3506 UNSPEC_VSX_EXTRACT))
3507 (clobber (match_scratch:DI 3 "=&b,&b"))]
3508 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3509 "#"
3510 "&& reload_completed"
3511 [(set (match_dup 0) (match_dup 4))]
3512 {
3513 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3514 operands[3], <VEC_base>mode);
3515 }
3516 [(set_attr "type" "fpload,load")])
3517
3518 ;; Extract a SF element from V4SF
3519 (define_insn_and_split "vsx_extract_v4sf"
3520 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3521 (vec_select:SF
3522 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3523 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3524 (clobber (match_scratch:V4SF 3 "=0"))]
3525 "VECTOR_UNIT_VSX_P (V4SFmode)"
3526 "#"
3527 "&& 1"
3528 [(const_int 0)]
3529 {
3530 rtx op0 = operands[0];
3531 rtx op1 = operands[1];
3532 rtx op2 = operands[2];
3533 rtx op3 = operands[3];
3534 rtx tmp;
3535 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3536
3537 if (ele == 0)
3538 tmp = op1;
3539 else
3540 {
3541 if (GET_CODE (op3) == SCRATCH)
3542 op3 = gen_reg_rtx (V4SFmode);
3543 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3544 tmp = op3;
3545 }
3546 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3547 DONE;
3548 }
3549 [(set_attr "length" "8")
3550 (set_attr "type" "fp")])
3551
3552 (define_insn_and_split "*vsx_extract_v4sf_load"
3553 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3554 (vec_select:SF
3555 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3556 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3557 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3558 "VECTOR_MEM_VSX_P (V4SFmode)"
3559 "#"
3560 "&& reload_completed"
3561 [(set (match_dup 0) (match_dup 4))]
3562 {
3563 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3564 operands[3], SFmode);
3565 }
3566 [(set_attr "type" "fpload,fpload,fpload,load")
3567 (set_attr "length" "8")
3568 (set_attr "isa" "*,p7v,p9v,*")])
3569
3570 ;; Variable V4SF extract from a register
3571 (define_insn_and_split "vsx_extract_v4sf_var"
3572 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3573 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3574 (match_operand:DI 2 "gpc_reg_operand" "r")]
3575 UNSPEC_VSX_EXTRACT))
3576 (clobber (match_scratch:DI 3 "=r"))
3577 (clobber (match_scratch:V2DI 4 "=&v"))]
3578 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3579 "#"
3580 "&& reload_completed"
3581 [(const_int 0)]
3582 {
3583 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3584 operands[3], operands[4]);
3585 DONE;
3586 })
3587
3588 ;; Variable V4SF extract from memory
3589 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3590 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3591 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3592 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3593 UNSPEC_VSX_EXTRACT))
3594 (clobber (match_scratch:DI 3 "=&b,&b"))]
3595 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3596 "#"
3597 "&& reload_completed"
3598 [(set (match_dup 0) (match_dup 4))]
3599 {
3600 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3601 operands[3], SFmode);
3602 }
3603 [(set_attr "type" "fpload,load")])
3604
3605 ;; Expand the builtin form of xxpermdi to canonical rtl.
3606 (define_expand "vsx_xxpermdi_<mode>"
3607 [(match_operand:VSX_L 0 "vsx_register_operand")
3608 (match_operand:VSX_L 1 "vsx_register_operand")
3609 (match_operand:VSX_L 2 "vsx_register_operand")
3610 (match_operand:QI 3 "u5bit_cint_operand")]
3611 "VECTOR_MEM_VSX_P (<MODE>mode)"
3612 {
3613 rtx target = operands[0];
3614 rtx op0 = operands[1];
3615 rtx op1 = operands[2];
3616 int mask = INTVAL (operands[3]);
3617 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3618 rtx perm1 = GEN_INT ((mask & 1) + 2);
3619 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3620
3621 if (<MODE>mode == V2DFmode)
3622 gen = gen_vsx_xxpermdi2_v2df_1;
3623 else
3624 {
3625 gen = gen_vsx_xxpermdi2_v2di_1;
3626 if (<MODE>mode != V2DImode)
3627 {
3628 target = gen_lowpart (V2DImode, target);
3629 op0 = gen_lowpart (V2DImode, op0);
3630 op1 = gen_lowpart (V2DImode, op1);
3631 }
3632 }
3633 emit_insn (gen (target, op0, op1, perm0, perm1));
3634 DONE;
3635 })
3636
3637 ;; Special version of xxpermdi that retains big-endian semantics.
3638 (define_expand "vsx_xxpermdi_<mode>_be"
3639 [(match_operand:VSX_L 0 "vsx_register_operand")
3640 (match_operand:VSX_L 1 "vsx_register_operand")
3641 (match_operand:VSX_L 2 "vsx_register_operand")
3642 (match_operand:QI 3 "u5bit_cint_operand")]
3643 "VECTOR_MEM_VSX_P (<MODE>mode)"
3644 {
3645 rtx target = operands[0];
3646 rtx op0 = operands[1];
3647 rtx op1 = operands[2];
3648 int mask = INTVAL (operands[3]);
3649 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3650 rtx perm1 = GEN_INT ((mask & 1) + 2);
3651 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3652
3653 if (<MODE>mode == V2DFmode)
3654 gen = gen_vsx_xxpermdi2_v2df_1;
3655 else
3656 {
3657 gen = gen_vsx_xxpermdi2_v2di_1;
3658 if (<MODE>mode != V2DImode)
3659 {
3660 target = gen_lowpart (V2DImode, target);
3661 op0 = gen_lowpart (V2DImode, op0);
3662 op1 = gen_lowpart (V2DImode, op1);
3663 }
3664 }
3665 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3666 transformation we don't want; it is necessary for
3667 rs6000_expand_vec_perm_const_1 but not for this use. So we
3668 prepare for that by reversing the transformation here. */
3669 if (BYTES_BIG_ENDIAN)
3670 emit_insn (gen (target, op0, op1, perm0, perm1));
3671 else
3672 {
3673 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3674 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3675 emit_insn (gen (target, op1, op0, p0, p1));
3676 }
3677 DONE;
3678 })
3679
3680 (define_insn "vsx_xxpermdi2_<mode>_1"
3681 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3682 (vec_select:VSX_D
3683 (vec_concat:<VS_double>
3684 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3685 (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3686 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3687 (match_operand 4 "const_2_to_3_operand" "")])))]
3688 "VECTOR_MEM_VSX_P (<MODE>mode)"
3689 {
3690 int op3, op4, mask;
3691
3692 /* For little endian, swap operands and invert/swap selectors
3693 to get the correct xxpermdi. The operand swap sets up the
3694 inputs as a little endian array. The selectors are swapped
3695 because they are defined to use big endian ordering. The
3696 selectors are inverted to get the correct doublewords for
3697 little endian ordering. */
3698 if (BYTES_BIG_ENDIAN)
3699 {
3700 op3 = INTVAL (operands[3]);
3701 op4 = INTVAL (operands[4]);
3702 }
3703 else
3704 {
3705 op3 = 3 - INTVAL (operands[4]);
3706 op4 = 3 - INTVAL (operands[3]);
3707 }
3708
3709 mask = (op3 << 1) | (op4 - 2);
3710 operands[3] = GEN_INT (mask);
3711
3712 if (BYTES_BIG_ENDIAN)
3713 return "xxpermdi %x0,%x1,%x2,%3";
3714 else
3715 return "xxpermdi %x0,%x2,%x1,%3";
3716 }
3717 [(set_attr "type" "vecperm")])
3718
3719 ;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3720 ;; none of the small types were allowed in a vector register, so we had to
3721 ;; extract to a DImode and either do a direct move or store.
3722 (define_expand "vsx_extract_<mode>"
3723 [(parallel [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
3724 (vec_select:<VEC_base>
3725 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3726 (parallel [(match_operand:QI 2 "const_int_operand")])))
3727 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3728 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3729 {
3730 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
3731 if (TARGET_P9_VECTOR)
3732 {
3733 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3734 operands[2]));
3735 DONE;
3736 }
3737 })
3738
3739 (define_insn "vsx_extract_<mode>_p9"
3740 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3741 (vec_select:<VEC_base>
3742 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3743 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3744 (clobber (match_scratch:SI 3 "=r,X"))]
3745 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3746 {
3747 if (which_alternative == 0)
3748 return "#";
3749
3750 else
3751 {
3752 HOST_WIDE_INT elt = INTVAL (operands[2]);
3753 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3754 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3755 : elt);
3756
3757 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3758 HOST_WIDE_INT offset = unit_size * elt_adj;
3759
3760 operands[2] = GEN_INT (offset);
3761 if (unit_size == 4)
3762 return "xxextractuw %x0,%x1,%2";
3763 else
3764 return "vextractu<wd> %0,%1,%2";
3765 }
3766 }
3767 [(set_attr "type" "vecsimple")
3768 (set_attr "isa" "p9v,*")])
3769
3770 (define_split
3771 [(set (match_operand:<VEC_base> 0 "int_reg_operand")
3772 (vec_select:<VEC_base>
3773 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3774 (parallel [(match_operand:QI 2 "const_int_operand")])))
3775 (clobber (match_operand:SI 3 "int_reg_operand"))]
3776 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3777 [(const_int 0)]
3778 {
3779 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3780 rtx op1 = operands[1];
3781 rtx op2 = operands[2];
3782 rtx op3 = operands[3];
3783 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3784
3785 emit_move_insn (op3, GEN_INT (offset));
3786 if (BYTES_BIG_ENDIAN)
3787 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3788 else
3789 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3790 DONE;
3791 })
3792
3793 ;; Optimize zero extracts to eliminate the AND after the extract.
3794 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3795 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3796 (zero_extend:DI
3797 (vec_select:<VEC_base>
3798 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3799 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3800 (clobber (match_scratch:SI 3 "=r,X"))]
3801 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3802 "#"
3803 "&& reload_completed"
3804 [(parallel [(set (match_dup 4)
3805 (vec_select:<VEC_base>
3806 (match_dup 1)
3807 (parallel [(match_dup 2)])))
3808 (clobber (match_dup 3))])]
3809 {
3810 operands[4] = gen_rtx_REG (<VEC_base>mode, REGNO (operands[0]));
3811 }
3812 [(set_attr "isa" "p9v,*")])
3813
3814 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3815 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3816 [(set (match_operand:<VEC_base> 0 "memory_operand" "=Z,m")
3817 (vec_select:<VEC_base>
3818 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3819 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3820 (clobber (match_scratch:<VEC_base> 3 "=<VSX_EX>,&*r"))
3821 (clobber (match_scratch:SI 4 "=X,&r"))]
3822 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3823 "#"
3824 "&& reload_completed"
3825 [(parallel [(set (match_dup 3)
3826 (vec_select:<VEC_base>
3827 (match_dup 1)
3828 (parallel [(match_dup 2)])))
3829 (clobber (match_dup 4))])
3830 (set (match_dup 0)
3831 (match_dup 3))])
3832
3833 (define_insn_and_split "*vsx_extract_si"
3834 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3835 (vec_select:SI
3836 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3837 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3838 (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3839 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3840 "#"
3841 "&& reload_completed"
3842 [(const_int 0)]
3843 {
3844 rtx dest = operands[0];
3845 rtx src = operands[1];
3846 rtx element = operands[2];
3847 rtx vec_tmp = operands[3];
3848 int value;
3849
3850 /* Adjust index for LE element ordering, the below minuend 3 is computed by
3851 GET_MODE_NUNITS (V4SImode) - 1. */
3852 if (!BYTES_BIG_ENDIAN)
3853 element = GEN_INT (3 - INTVAL (element));
3854
3855 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3856 instruction. */
3857 value = INTVAL (element);
3858 if (value != 1)
3859 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3860 else
3861 vec_tmp = src;
3862
3863 if (MEM_P (operands[0]))
3864 {
3865 if (can_create_pseudo_p ())
3866 dest = rs6000_force_indexed_or_indirect_mem (dest);
3867
3868 if (TARGET_P8_VECTOR)
3869 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3870 else
3871 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3872 }
3873
3874 else if (TARGET_P8_VECTOR)
3875 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3876 else
3877 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3878 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3879
3880 DONE;
3881 }
3882 [(set_attr "type" "mfvsr,vecperm,fpstore")
3883 (set_attr "length" "8")
3884 (set_attr "isa" "*,p8v,*")])
3885
3886 (define_insn_and_split "*vsx_extract_<mode>_p8"
3887 [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
3888 (vec_select:<VEC_base>
3889 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3890 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3891 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3892 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3893 && !TARGET_P9_VECTOR"
3894 "#"
3895 "&& reload_completed"
3896 [(const_int 0)]
3897 {
3898 rtx dest = operands[0];
3899 rtx src = operands[1];
3900 rtx element = operands[2];
3901 rtx vec_tmp = operands[3];
3902 int value;
3903
3904 if (!BYTES_BIG_ENDIAN)
3905 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3906
3907 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3908 instruction. */
3909 value = INTVAL (element);
3910 if (<MODE>mode == V16QImode)
3911 {
3912 if (value != 7)
3913 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3914 else
3915 vec_tmp = src;
3916 }
3917 else if (<MODE>mode == V8HImode)
3918 {
3919 if (value != 3)
3920 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3921 else
3922 vec_tmp = src;
3923 }
3924 else
3925 gcc_unreachable ();
3926
3927 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3928 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3929 DONE;
3930 }
3931 [(set_attr "type" "mfvsr")])
3932
3933 ;; Optimize extracting a single scalar element from memory.
3934 (define_insn_and_split "*vsx_extract_<mode>_load"
3935 [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
3936 (vec_select:<VEC_base>
3937 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3938 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3939 (clobber (match_scratch:DI 3 "=&b"))]
3940 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3941 "#"
3942 "&& reload_completed"
3943 [(set (match_dup 0) (match_dup 4))]
3944 {
3945 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3946 operands[3], <VEC_base>mode);
3947 }
3948 [(set_attr "type" "load")
3949 (set_attr "length" "8")])
3950
3951 ;; Variable V16QI/V8HI/V4SI extract from a register
3952 (define_insn_and_split "vsx_extract_<mode>_var"
3953 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,r")
3954 (unspec:<VEC_base>
3955 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3956 (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3957 UNSPEC_VSX_EXTRACT))
3958 (clobber (match_scratch:DI 3 "=r,r"))
3959 (clobber (match_scratch:V2DI 4 "=X,&v"))]
3960 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3961 "#"
3962 "&& reload_completed"
3963 [(const_int 0)]
3964 {
3965 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3966 operands[3], operands[4]);
3967 DONE;
3968 }
3969 [(set_attr "isa" "p9v,*")])
3970
3971 ;; Variable V16QI/V8HI/V4SI extract from memory
3972 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3973 [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r")
3974 (unspec:<VEC_base>
3975 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3976 (match_operand:DI 2 "gpc_reg_operand" "r")]
3977 UNSPEC_VSX_EXTRACT))
3978 (clobber (match_scratch:DI 3 "=&b"))]
3979 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3980 "#"
3981 "&& reload_completed"
3982 [(set (match_dup 0) (match_dup 4))]
3983 {
3984 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3985 operands[3], <VEC_base>mode);
3986 }
3987 [(set_attr "type" "load")])
3988
3989 ;; ISA 3.1 extract
3990 (define_expand "vextractl<mode>"
3991 [(set (match_operand:V2DI 0 "altivec_register_operand")
3992 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3993 (match_operand:VI2 2 "altivec_register_operand")
3994 (match_operand:SI 3 "register_operand")]
3995 UNSPEC_EXTRACTL))]
3996 "TARGET_POWER10"
3997 {
3998 if (BYTES_BIG_ENDIAN)
3999 {
4000 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1],
4001 operands[2], operands[3]));
4002 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
4003 }
4004 else
4005 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2],
4006 operands[1], operands[3]));
4007 DONE;
4008 })
4009
4010 (define_insn "vextractl<mode>_internal"
4011 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
4012 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
4013 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4014 (match_operand:SI 3 "register_operand" "r")]
4015 UNSPEC_EXTRACTL))]
4016 "TARGET_POWER10"
4017 "vext<du_or_d><wd>vlx %0,%1,%2,%3"
4018 [(set_attr "type" "vecsimple")])
4019
4020 (define_expand "vextractr<mode>"
4021 [(set (match_operand:V2DI 0 "altivec_register_operand")
4022 (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
4023 (match_operand:VI2 2 "altivec_register_operand")
4024 (match_operand:SI 3 "register_operand")]
4025 UNSPEC_EXTRACTR))]
4026 "TARGET_POWER10"
4027 {
4028 if (BYTES_BIG_ENDIAN)
4029 {
4030 emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1],
4031 operands[2], operands[3]));
4032 emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
4033 }
4034 else
4035 emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2],
4036 operands[1], operands[3]));
4037 DONE;
4038 })
4039
4040 (define_insn "vextractr<mode>_internal"
4041 [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
4042 (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
4043 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4044 (match_operand:SI 3 "register_operand" "r")]
4045 UNSPEC_EXTRACTR))]
4046 "TARGET_POWER10"
4047 "vext<du_or_d><wd>vrx %0,%1,%2,%3"
4048 [(set_attr "type" "vecsimple")])
4049
4050 (define_expand "vinsertvl_<mode>"
4051 [(set (match_operand:VI2 0 "altivec_register_operand")
4052 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4053 (match_operand:VI2 2 "altivec_register_operand")
4054 (match_operand:SI 3 "register_operand" "r")]
4055 UNSPEC_INSERTL))]
4056 "TARGET_POWER10"
4057 {
4058 if (BYTES_BIG_ENDIAN)
4059 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4060 operands[1], operands[2]));
4061 else
4062 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4063 operands[1], operands[2]));
4064 DONE;
4065 })
4066
4067 (define_insn "vinsertvl_internal_<mode>"
4068 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4069 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4070 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4071 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4072 UNSPEC_INSERTL))]
4073 "TARGET_POWER10"
4074 "vins<wd>vlx %0,%1,%2"
4075 [(set_attr "type" "vecsimple")])
4076
4077 (define_expand "vinsertvr_<mode>"
4078 [(set (match_operand:VI2 0 "altivec_register_operand")
4079 (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4080 (match_operand:VI2 2 "altivec_register_operand")
4081 (match_operand:SI 3 "register_operand" "r")]
4082 UNSPEC_INSERTR))]
4083 "TARGET_POWER10"
4084 {
4085 if (BYTES_BIG_ENDIAN)
4086 emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4087 operands[1], operands[2]));
4088 else
4089 emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4090 operands[1], operands[2]));
4091 DONE;
4092 })
4093
4094 (define_insn "vinsertvr_internal_<mode>"
4095 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4096 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4097 (match_operand:VEC_I 2 "altivec_register_operand" "v")
4098 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4099 UNSPEC_INSERTR))]
4100 "TARGET_POWER10"
4101 "vins<wd>vrx %0,%1,%2"
4102 [(set_attr "type" "vecsimple")])
4103
4104 (define_expand "vinsertgl_<mode>"
4105 [(set (match_operand:VI2 0 "altivec_register_operand")
4106 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4107 (match_operand:VI2 2 "altivec_register_operand")
4108 (match_operand:SI 3 "register_operand")]
4109 UNSPEC_INSERTL))]
4110 "TARGET_POWER10"
4111 {
4112 if (BYTES_BIG_ENDIAN)
4113 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4114 operands[1], operands[2]));
4115 else
4116 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4117 operands[1], operands[2]));
4118 DONE;
4119 })
4120
4121 (define_insn "vinsertgl_internal_<mode>"
4122 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4123 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4124 (match_operand:SI 2 "register_operand" "r")
4125 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4126 UNSPEC_INSERTL))]
4127 "TARGET_POWER10"
4128 "vins<wd>lx %0,%1,%2"
4129 [(set_attr "type" "vecsimple")])
4130
4131 (define_expand "vinsertgr_<mode>"
4132 [(set (match_operand:VI2 0 "altivec_register_operand")
4133 (unspec:VI2 [(match_operand:SI 1 "register_operand")
4134 (match_operand:VI2 2 "altivec_register_operand")
4135 (match_operand:SI 3 "register_operand")]
4136 UNSPEC_INSERTR))]
4137 "TARGET_POWER10"
4138 {
4139 if (BYTES_BIG_ENDIAN)
4140 emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4141 operands[1], operands[2]));
4142 else
4143 emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4144 operands[1], operands[2]));
4145 DONE;
4146 })
4147
4148 (define_insn "vinsertgr_internal_<mode>"
4149 [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4150 (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4151 (match_operand:SI 2 "register_operand" "r")
4152 (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4153 UNSPEC_INSERTR))]
4154 "TARGET_POWER10"
4155 "vins<wd>rx %0,%1,%2"
4156 [(set_attr "type" "vecsimple")])
4157
4158 (define_expand "vreplace_elt_<mode>"
4159 [(set (match_operand:REPLACE_ELT 0 "register_operand")
4160 (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4161 (match_operand:<VEC_base> 2 "register_operand")
4162 (match_operand:QI 3 "const_0_to_3_operand")]
4163 UNSPEC_REPLACE_ELT))]
4164 "TARGET_POWER10"
4165 {
4166 int index;
4167 /* Immediate value is the word index, convert to byte index and adjust for
4168 Endianness if needed. */
4169 if (BYTES_BIG_ENDIAN)
4170 index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
4171
4172 else
4173 index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
4174
4175 emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4176 operands[2],
4177 GEN_INT (index)));
4178 DONE;
4179 }
4180 [(set_attr "type" "vecsimple")])
4181
4182 (define_insn "vreplace_elt_<mode>_inst"
4183 [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
4184 (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
4185 (match_operand:<VEC_base> 2 "register_operand" "r")
4186 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4187 UNSPEC_REPLACE_ELT))]
4188 "TARGET_POWER10"
4189 "vins<REPLACE_ELT_char> %0,%2,%3"
4190 [(set_attr "type" "vecsimple")])
4191
4192 (define_insn "vreplace_un_<mode>"
4193 [(set (match_operand:V16QI 0 "register_operand" "=v")
4194 (unspec:V16QI [(match_operand:REPLACE_ELT 1 "register_operand" "0")
4195 (match_operand:<VEC_base> 2 "register_operand" "r")
4196 (match_operand:QI 3 "const_0_to_12_operand" "n")]
4197 UNSPEC_REPLACE_UN))]
4198 "TARGET_POWER10"
4199 "vins<REPLACE_ELT_char> %0,%2,%3"
4200 [(set_attr "type" "vecsimple")])
4201
4202 ;; VSX_EXTRACT optimizations
4203 ;; Optimize double d = (double) vec_extract (vi, <n>)
4204 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
4205 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
4206 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
4207 (any_float:DF
4208 (vec_select:SI
4209 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4210 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4211 (clobber (match_scratch:V4SI 3 "=v"))]
4212 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4213 "#"
4214 "&& 1"
4215 [(const_int 0)]
4216 {
4217 rtx dest = operands[0];
4218 rtx src = operands[1];
4219 rtx element = operands[2];
4220 rtx v4si_tmp = operands[3];
4221 int value;
4222
4223 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4224 GET_MODE_NUNITS (V4SImode) - 1. */
4225 if (!BYTES_BIG_ENDIAN)
4226 element = GEN_INT (3 - INTVAL (element));
4227
4228 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4229 instruction. */
4230 value = INTVAL (element);
4231 if (value != 0)
4232 {
4233 if (GET_CODE (v4si_tmp) == SCRATCH)
4234 v4si_tmp = gen_reg_rtx (V4SImode);
4235 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4236 }
4237 else
4238 v4si_tmp = src;
4239
4240 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
4241 DONE;
4242 })
4243
4244 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
4245 ;; where <type> is a floating point type that supported by the hardware that is
4246 ;; not double. First convert the value to double, and then to the desired
4247 ;; type.
4248 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
4249 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
4250 (any_float:VSX_EXTRACT_FL
4251 (vec_select:SI
4252 (match_operand:V4SI 1 "gpc_reg_operand" "v")
4253 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4254 (clobber (match_scratch:V4SI 3 "=v"))
4255 (clobber (match_scratch:DF 4 "=wa"))]
4256 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4257 "#"
4258 "&& 1"
4259 [(const_int 0)]
4260 {
4261 rtx dest = operands[0];
4262 rtx src = operands[1];
4263 rtx element = operands[2];
4264 rtx v4si_tmp = operands[3];
4265 rtx df_tmp = operands[4];
4266 int value;
4267
4268 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4269 GET_MODE_NUNITS (V4SImode) - 1. */
4270 if (!BYTES_BIG_ENDIAN)
4271 element = GEN_INT (3 - INTVAL (element));
4272
4273 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4274 instruction. */
4275 value = INTVAL (element);
4276 if (value != 0)
4277 {
4278 if (GET_CODE (v4si_tmp) == SCRATCH)
4279 v4si_tmp = gen_reg_rtx (V4SImode);
4280 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4281 }
4282 else
4283 v4si_tmp = src;
4284
4285 if (GET_CODE (df_tmp) == SCRATCH)
4286 df_tmp = gen_reg_rtx (DFmode);
4287
4288 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
4289
4290 if (<MODE>mode == SFmode)
4291 emit_insn (gen_truncdfsf2 (dest, df_tmp));
4292 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
4293 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
4294 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
4295 && TARGET_FLOAT128_HW)
4296 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
4297 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
4298 emit_insn (gen_extenddfif2 (dest, df_tmp));
4299 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
4300 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
4301 else
4302 gcc_unreachable ();
4303
4304 DONE;
4305 })
4306
4307 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
4308 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
4309 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
4310 ;; vector short or vector unsigned short.
4311 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_fl_<FL_CONV:mode>"
4312 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4313 (float:FL_CONV
4314 (vec_select:<VSX_EXTRACT_I:VEC_base>
4315 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4316 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4317 (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))]
4318 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4319 && TARGET_P9_VECTOR"
4320 "#"
4321 "&& reload_completed"
4322 [(parallel [(set (match_dup 3)
4323 (vec_select:<VSX_EXTRACT_I:VEC_base>
4324 (match_dup 1)
4325 (parallel [(match_dup 2)])))
4326 (clobber (scratch:SI))])
4327 (set (match_dup 4)
4328 (sign_extend:DI (match_dup 3)))
4329 (set (match_dup 0)
4330 (float:<FL_CONV:MODE> (match_dup 4)))]
4331 {
4332 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4333 }
4334 [(set_attr "isa" "<FL_CONV:VSisa>")])
4335
4336 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_ufl_<FL_CONV:mode>"
4337 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4338 (unsigned_float:FL_CONV
4339 (vec_select:<VSX_EXTRACT_I:VEC_base>
4340 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4341 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4342 (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))]
4343 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4344 && TARGET_P9_VECTOR"
4345 "#"
4346 "&& reload_completed"
4347 [(parallel [(set (match_dup 3)
4348 (vec_select:<VSX_EXTRACT_I:VEC_base>
4349 (match_dup 1)
4350 (parallel [(match_dup 2)])))
4351 (clobber (scratch:SI))])
4352 (set (match_dup 0)
4353 (float:<FL_CONV:MODE> (match_dup 4)))]
4354 {
4355 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4356 }
4357 [(set_attr "isa" "<FL_CONV:VSisa>")])
4358
4359 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
4360 (define_insn "vsx_set_<mode>_p9"
4361 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
4362 (unspec:VSX_EXTRACT_I
4363 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
4364 (match_operand:<VEC_base> 2 "gpc_reg_operand" "<VSX_EX>")
4365 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
4366 UNSPEC_VSX_SET))]
4367 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4368 {
4369 int ele = INTVAL (operands[3]);
4370 int nunits = GET_MODE_NUNITS (<MODE>mode);
4371
4372 if (!BYTES_BIG_ENDIAN)
4373 ele = nunits - 1 - ele;
4374
4375 operands[3] = GEN_INT (GET_MODE_SIZE (<VEC_base>mode) * ele);
4376 if (<MODE>mode == V4SImode)
4377 return "xxinsertw %x0,%x2,%3";
4378 else
4379 return "vinsert<wd> %0,%2,%3";
4380 }
4381 [(set_attr "type" "vecperm")])
4382
4383 (define_insn_and_split "vsx_set_v4sf_p9"
4384 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4385 (unspec:V4SF
4386 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4387 (match_operand:SF 2 "gpc_reg_operand" "wa")
4388 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4389 UNSPEC_VSX_SET))
4390 (clobber (match_scratch:SI 4 "=&wa"))]
4391 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4392 "#"
4393 "&& reload_completed"
4394 [(set (match_dup 5)
4395 (unspec:V4SF [(match_dup 2)]
4396 UNSPEC_VSX_CVDPSPN))
4397 (parallel [(set (match_dup 4)
4398 (vec_select:SI (match_dup 6)
4399 (parallel [(match_dup 7)])))
4400 (clobber (scratch:SI))])
4401 (set (match_dup 8)
4402 (unspec:V4SI [(match_dup 8)
4403 (match_dup 4)
4404 (match_dup 3)]
4405 UNSPEC_VSX_SET))]
4406 {
4407 unsigned int tmp_regno = reg_or_subregno (operands[4]);
4408
4409 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4410 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4411 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4412 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4413 }
4414 [(set_attr "type" "vecperm")
4415 (set_attr "length" "12")
4416 (set_attr "isa" "p9v")])
4417
4418 ;; Special case setting 0.0f to a V4SF element
4419 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4420 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4421 (unspec:V4SF
4422 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4423 (match_operand:SF 2 "zero_fp_constant" "j")
4424 (match_operand:QI 3 "const_0_to_3_operand" "n")]
4425 UNSPEC_VSX_SET))
4426 (clobber (match_scratch:SI 4 "=&wa"))]
4427 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4428 "#"
4429 "&& reload_completed"
4430 [(set (match_dup 4)
4431 (const_int 0))
4432 (set (match_dup 5)
4433 (unspec:V4SI [(match_dup 5)
4434 (match_dup 4)
4435 (match_dup 3)]
4436 UNSPEC_VSX_SET))]
4437 {
4438 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4439 }
4440 [(set_attr "type" "vecperm")
4441 (set_attr "length" "8")
4442 (set_attr "isa" "p9v")])
4443
4444 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4445 ;; that is in the default scalar position (1 for big endian, 2 for little
4446 ;; endian). We just need to do an xxinsertw since the element is in the
4447 ;; correct location.
4448
4449 (define_insn "*vsx_insert_extract_v4sf_p9"
4450 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4451 (unspec:V4SF
4452 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4453 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4454 (parallel
4455 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4456 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4457 UNSPEC_VSX_SET))]
4458 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4459 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4460 {
4461 int ele = INTVAL (operands[4]);
4462
4463 /* Adjust index for LE element ordering, the below minuend 3 is computed by
4464 GET_MODE_NUNITS (V4SFmode) - 1. */
4465 if (!BYTES_BIG_ENDIAN)
4466 ele = 3 - ele;
4467
4468 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4469 return "xxinsertw %x0,%x2,%4";
4470 }
4471 [(set_attr "type" "vecperm")])
4472
4473 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4474 ;; that is in the default scalar position (1 for big endian, 2 for little
4475 ;; endian). Convert the insert/extract to int and avoid doing the conversion.
4476
4477 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4478 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4479 (unspec:V4SF
4480 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4481 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4482 (parallel
4483 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4484 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4485 UNSPEC_VSX_SET))
4486 (clobber (match_scratch:SI 5 "=&wa"))]
4487 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4488 && TARGET_P9_VECTOR && TARGET_POWERPC64
4489 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4490 "#"
4491 "&& 1"
4492 [(parallel [(set (match_dup 5)
4493 (vec_select:SI (match_dup 6)
4494 (parallel [(match_dup 3)])))
4495 (clobber (scratch:SI))])
4496 (set (match_dup 7)
4497 (unspec:V4SI [(match_dup 8)
4498 (match_dup 5)
4499 (match_dup 4)]
4500 UNSPEC_VSX_SET))]
4501 {
4502 if (GET_CODE (operands[5]) == SCRATCH)
4503 operands[5] = gen_reg_rtx (SImode);
4504
4505 operands[6] = gen_lowpart (V4SImode, operands[2]);
4506 operands[7] = gen_lowpart (V4SImode, operands[0]);
4507 operands[8] = gen_lowpart (V4SImode, operands[1]);
4508 }
4509 [(set_attr "type" "vecperm")
4510 (set_attr "isa" "p9v")])
4511
4512 ;; Expanders for builtins
4513 (define_expand "vsx_mergel_<mode>"
4514 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4515 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4516 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4517 "VECTOR_MEM_VSX_P (<MODE>mode)"
4518 {
4519 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4520 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4521 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4522 emit_insn (gen_rtx_SET (operands[0], x));
4523 DONE;
4524 })
4525
4526 (define_expand "vsx_mergeh_<mode>"
4527 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4528 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4529 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4530 "VECTOR_MEM_VSX_P (<MODE>mode)"
4531 {
4532 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4533 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4534 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4535 emit_insn (gen_rtx_SET (operands[0], x));
4536 DONE;
4537 })
4538
4539 ;; V2DF/V2DI splat
4540 ;; We separate the register splat insn from the memory splat insn to force the
4541 ;; register allocator to generate the indexed form of the SPLAT when it is
4542 ;; given an offsettable memory reference. Otherwise, if the register and
4543 ;; memory insns were combined into a single insn, the register allocator will
4544 ;; load the value into a register, and then do a double word permute.
4545 (define_expand "vsx_splat_<mode>"
4546 [(set (match_operand:VSX_D 0 "vsx_register_operand")
4547 (vec_duplicate:VSX_D
4548 (match_operand:<VEC_base> 1 "input_operand")))]
4549 "VECTOR_MEM_VSX_P (<MODE>mode)"
4550 {
4551 rtx op1 = operands[1];
4552 if (MEM_P (op1))
4553 operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4554 else if (!REG_P (op1))
4555 op1 = force_reg (<VSX_D:VEC_base>mode, op1);
4556 })
4557
4558 (define_insn "vsx_splat_<mode>_reg"
4559 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4560 (vec_duplicate:VSX_D
4561 (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")))]
4562 "VECTOR_MEM_VSX_P (<MODE>mode)"
4563 "@
4564 xxpermdi %x0,%x1,%x1,0
4565 mtvsrdd %x0,%1,%1"
4566 [(set_attr "type" "vecperm,vecmove")])
4567
4568 (define_insn "vsx_splat_<mode>_mem"
4569 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4570 (vec_duplicate:VSX_D
4571 (match_operand:<VSX_D:VEC_base> 1 "memory_operand" "Z")))]
4572 "VECTOR_MEM_VSX_P (<MODE>mode)"
4573 "lxvdsx %x0,%y1"
4574 [(set_attr "type" "vecload")])
4575
4576 ;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
4577 (define_insn "*vsx_splat_extract_<mode>"
4578 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4579 (vec_duplicate:VSX_D
4580 (vec_select:<VEC_base>
4581 (match_operand:VSX_D 1 "vsx_register_operand" "wa")
4582 (parallel [(match_operand 2 "const_0_to_1_operand" "n")]))))]
4583 "VECTOR_MEM_VSX_P (<MODE>mode)"
4584 {
4585 int which_word = INTVAL (operands[2]);
4586 if (!BYTES_BIG_ENDIAN)
4587 which_word = 1 - which_word;
4588
4589 operands[3] = GEN_INT (which_word ? 3 : 0);
4590 return "xxpermdi %x0,%x1,%x1,%3";
4591 }
4592 [(set_attr "type" "vecperm")])
4593
4594 ;; V4SI splat support
4595 (define_insn "vsx_splat_v4si"
4596 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
4597 (vec_duplicate:V4SI
4598 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4599 "TARGET_P9_VECTOR"
4600 "@
4601 mtvsrws %x0,%1
4602 lxvwsx %x0,%y1"
4603 [(set_attr "type" "vecperm,vecload")])
4604
4605 ;; SImode is not currently allowed in vector registers. This pattern
4606 ;; allows us to use direct move to get the value in a vector register
4607 ;; so that we can use XXSPLTW
4608 (define_insn "vsx_splat_v4si_di"
4609 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4610 (vec_duplicate:V4SI
4611 (truncate:SI
4612 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4613 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4614 "@
4615 xxspltw %x0,%x1,1
4616 mtvsrws %x0,%1"
4617 [(set_attr "type" "vecperm")
4618 (set_attr "isa" "p8v,*")])
4619
4620 ;; V4SF splat (ISA 3.0)
4621 (define_insn_and_split "vsx_splat_v4sf"
4622 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4623 (vec_duplicate:V4SF
4624 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4625 "TARGET_P9_VECTOR"
4626 "@
4627 lxvwsx %x0,%y1
4628 #
4629 mtvsrws %x0,%1"
4630 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4631 [(set (match_dup 0)
4632 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4633 (set (match_dup 0)
4634 (unspec:V4SF [(match_dup 0)
4635 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4636 ""
4637 [(set_attr "type" "vecload,vecperm,vecperm")
4638 (set_attr "length" "*,8,*")
4639 (set_attr "isa" "*,p8v,*")])
4640
4641 ;; V4SF/V4SI splat from a vector element
4642 (define_insn "vsx_xxspltw_<mode>"
4643 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4644 (vec_duplicate:VSX_W
4645 (vec_select:<VEC_base>
4646 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4647 (parallel
4648 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4649 "VECTOR_MEM_VSX_P (<MODE>mode)"
4650 {
4651 if (!BYTES_BIG_ENDIAN)
4652 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4653
4654 return "xxspltw %x0,%x1,%2";
4655 }
4656 [(set_attr "type" "vecperm")])
4657
4658 (define_insn "vsx_xxspltw_<mode>_direct"
4659 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4660 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4661 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4662 UNSPEC_VSX_XXSPLTW))]
4663 "VECTOR_MEM_VSX_P (<MODE>mode)"
4664 "xxspltw %x0,%x1,%2"
4665 [(set_attr "type" "vecperm")])
4666
4667 ;; V16QI/V8HI splat support on ISA 2.07
4668 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4669 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4670 (vec_duplicate:VSX_SPLAT_I
4671 (truncate:<VEC_base>
4672 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4673 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4674 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4675 [(set_attr "type" "vecperm")])
4676
4677 ;; V2DF/V2DI splat for use by vec_splat builtin
4678 (define_insn "vsx_xxspltd_<mode>"
4679 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4680 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4681 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4682 UNSPEC_VSX_XXSPLTD))]
4683 "VECTOR_MEM_VSX_P (<MODE>mode)"
4684 {
4685 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4686 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4687 return "xxpermdi %x0,%x1,%x1,0";
4688 else
4689 return "xxpermdi %x0,%x1,%x1,3";
4690 }
4691 [(set_attr "type" "vecperm")])
4692
4693 ;; V4SF/V4SI interleave
4694 (define_expand "vsx_xxmrghw_<mode>"
4695 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4696 (vec_select:VSX_W
4697 (vec_concat:<VS_double>
4698 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4699 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4700 (parallel [(const_int 0) (const_int 4)
4701 (const_int 1) (const_int 5)])))]
4702 "VECTOR_MEM_VSX_P (<MODE>mode)"
4703 {
4704 rtx (*fun) (rtx, rtx, rtx);
4705 fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_<mode>
4706 : gen_altivec_vmrglw_direct_<mode>;
4707 if (!BYTES_BIG_ENDIAN)
4708 std::swap (operands[1], operands[2]);
4709 emit_insn (fun (operands[0], operands[1], operands[2]));
4710 DONE;
4711 }
4712 [(set_attr "type" "vecperm")])
4713
4714 (define_expand "vsx_xxmrglw_<mode>"
4715 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4716 (vec_select:VSX_W
4717 (vec_concat:<VS_double>
4718 (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4719 (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4720 (parallel [(const_int 2) (const_int 6)
4721 (const_int 3) (const_int 7)])))]
4722 "VECTOR_MEM_VSX_P (<MODE>mode)"
4723 {
4724 rtx (*fun) (rtx, rtx, rtx);
4725 fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_<mode>
4726 : gen_altivec_vmrghw_direct_<mode>;
4727 if (!BYTES_BIG_ENDIAN)
4728 std::swap (operands[1], operands[2]);
4729 emit_insn (fun (operands[0], operands[1], operands[2]));
4730 DONE;
4731 }
4732 [(set_attr "type" "vecperm")])
4733
4734 ;; Shift left double by word immediate
4735 (define_insn "vsx_xxsldwi_<mode>"
4736 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4737 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4738 (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4739 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4740 UNSPEC_VSX_SLDWI))]
4741 "VECTOR_MEM_VSX_P (<MODE>mode)"
4742 "xxsldwi %x0,%x1,%x2,%3"
4743 [(set_attr "type" "vecperm")
4744 (set_attr "isa" "<VSisa>")])
4745
4746 \f
4747 ;; Vector reduction insns and splitters
4748
4749 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4750 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4751 (VEC_reduc:V2DF
4752 (vec_concat:V2DF
4753 (vec_select:DF
4754 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4755 (parallel [(const_int 1)]))
4756 (vec_select:DF
4757 (match_dup 1)
4758 (parallel [(const_int 0)])))
4759 (match_dup 1)))
4760 (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4761 "VECTOR_UNIT_VSX_P (V2DFmode)"
4762 "#"
4763 "&& 1"
4764 [(const_int 0)]
4765 {
4766 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4767 ? gen_reg_rtx (V2DFmode)
4768 : operands[2];
4769 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4770 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4771 DONE;
4772 }
4773 [(set_attr "length" "8")
4774 (set_attr "type" "veccomplex")])
4775
4776 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4777 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4778 (VEC_reduc:V4SF
4779 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4780 (match_operand:V4SF 1 "vfloat_operand" "wa")))
4781 (clobber (match_scratch:V4SF 2 "=&wa"))
4782 (clobber (match_scratch:V4SF 3 "=&wa"))]
4783 "VECTOR_UNIT_VSX_P (V4SFmode)"
4784 "#"
4785 "&& 1"
4786 [(const_int 0)]
4787 {
4788 rtx op0 = operands[0];
4789 rtx op1 = operands[1];
4790 rtx tmp2, tmp3, tmp4;
4791
4792 if (can_create_pseudo_p ())
4793 {
4794 tmp2 = gen_reg_rtx (V4SFmode);
4795 tmp3 = gen_reg_rtx (V4SFmode);
4796 tmp4 = gen_reg_rtx (V4SFmode);
4797 }
4798 else
4799 {
4800 tmp2 = operands[2];
4801 tmp3 = operands[3];
4802 tmp4 = tmp2;
4803 }
4804
4805 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4806 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4807 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4808 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4809 DONE;
4810 }
4811 [(set_attr "length" "16")
4812 (set_attr "type" "veccomplex")])
4813
4814 ;; Combiner patterns with the vector reduction patterns that knows we can get
4815 ;; to the top element of the V2DF array without doing an extract.
4816
4817 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4818 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4819 (vec_select:DF
4820 (VEC_reduc:V2DF
4821 (vec_concat:V2DF
4822 (vec_select:DF
4823 (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4824 (parallel [(const_int 1)]))
4825 (vec_select:DF
4826 (match_dup 1)
4827 (parallel [(const_int 0)])))
4828 (match_dup 1))
4829 (parallel [(const_int 1)])))
4830 (clobber (match_scratch:DF 2 "=0,&wa"))]
4831 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4832 "#"
4833 "&& 1"
4834 [(const_int 0)]
4835 {
4836 rtx hi = gen_highpart (DFmode, operands[1]);
4837 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4838 ? gen_reg_rtx (DFmode)
4839 : operands[2];
4840
4841 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4842 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4843 DONE;
4844 }
4845 [(set_attr "length" "8")
4846 (set_attr "type" "veccomplex")])
4847
4848 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4849 [(set (match_operand:SF 0 "vfloat_operand" "=f")
4850 (vec_select:SF
4851 (VEC_reduc:V4SF
4852 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4853 (match_operand:V4SF 1 "vfloat_operand" "wa"))
4854 (parallel [(const_int 3)])))
4855 (clobber (match_scratch:V4SF 2 "=&wa"))
4856 (clobber (match_scratch:V4SF 3 "=&wa"))
4857 (clobber (match_scratch:V4SF 4 "=0"))]
4858 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4859 "#"
4860 "&& 1"
4861 [(const_int 0)]
4862 {
4863 rtx op0 = operands[0];
4864 rtx op1 = operands[1];
4865 rtx tmp2, tmp3, tmp4, tmp5;
4866
4867 if (can_create_pseudo_p ())
4868 {
4869 tmp2 = gen_reg_rtx (V4SFmode);
4870 tmp3 = gen_reg_rtx (V4SFmode);
4871 tmp4 = gen_reg_rtx (V4SFmode);
4872 tmp5 = gen_reg_rtx (V4SFmode);
4873 }
4874 else
4875 {
4876 tmp2 = operands[2];
4877 tmp3 = operands[3];
4878 tmp4 = tmp2;
4879 tmp5 = operands[4];
4880 }
4881
4882 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4883 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4884 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4885 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4886 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4887 DONE;
4888 }
4889 [(set_attr "length" "20")
4890 (set_attr "type" "veccomplex")])
4891
4892 \f
4893 ;; Power8 Vector fusion. The fused ops must be physically adjacent.
4894 (define_peephole
4895 [(set (match_operand:P 0 "base_reg_operand")
4896 (match_operand:P 1 "short_cint_operand"))
4897 (set (match_operand:VSX_M 2 "vsx_register_operand")
4898 (mem:VSX_M (plus:P (match_dup 0)
4899 (match_operand:P 3 "int_reg_operand"))))]
4900 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4901 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4902 [(set_attr "length" "8")
4903 (set_attr "type" "vecload")])
4904
4905 (define_peephole
4906 [(set (match_operand:P 0 "base_reg_operand")
4907 (match_operand:P 1 "short_cint_operand"))
4908 (set (match_operand:VSX_M 2 "vsx_register_operand")
4909 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4910 (match_dup 0))))]
4911 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4912 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4913 [(set_attr "length" "8")
4914 (set_attr "type" "vecload")])
4915
4916 \f
4917 ;; ISA 3.1 vector extend sign support
4918 (define_insn "vsx_sign_extend_v2di_v1ti"
4919 [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
4920 (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")]
4921 UNSPEC_VSX_SIGN_EXTEND))]
4922 "TARGET_POWER10"
4923 "vextsd2q %0,%1"
4924 [(set_attr "type" "vecexts")])
4925
4926 ;; ISA 3.0 vector extend sign support
4927
4928 (define_insn "vsx_sign_extend_v16qi_<mode>"
4929 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4930 (unspec:VSINT_84
4931 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4932 UNSPEC_VSX_SIGN_EXTEND))]
4933 "TARGET_P9_VECTOR"
4934 "vextsb2<wd> %0,%1"
4935 [(set_attr "type" "vecexts")])
4936
4937 (define_insn "vsx_sign_extend_v8hi_<mode>"
4938 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4939 (unspec:VSINT_84
4940 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4941 UNSPEC_VSX_SIGN_EXTEND))]
4942 "TARGET_P9_VECTOR"
4943 "vextsh2<wd> %0,%1"
4944 [(set_attr "type" "vecexts")])
4945
4946 (define_insn "vsx_sign_extend_v4si_v2di"
4947 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4948 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4949 UNSPEC_VSX_SIGN_EXTEND))]
4950 "TARGET_P9_VECTOR"
4951 "vextsw2d %0,%1"
4952 [(set_attr "type" "vecexts")])
4953
4954 ;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on
4955 ;; power10. On earlier systems, the machine independent code will generate a
4956 ;; shift left to sign extend the 64-bit value to 128-bit.
4957 ;;
4958 ;; If the register allocator prefers to use GPR registers, we will use a shift
4959 ;; left instruction to sign extend the 64-bit value to 128-bit.
4960 ;;
4961 ;; If the register allocator prefers to use Altivec registers on power10,
4962 ;; generate the vextsd2q instruction.
4963 (define_insn_and_split "extendditi2"
4964 [(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v")
4965 (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b,wa,Z")))
4966 (clobber (reg:DI CA_REGNO))]
4967 "TARGET_POWERPC64 && TARGET_POWER10"
4968 "#"
4969 "&& reload_completed"
4970 [(pc)]
4971 {
4972 rtx dest = operands[0];
4973 rtx src = operands[1];
4974 int dest_regno = reg_or_subregno (dest);
4975
4976 /* Handle conversion to GPR registers. Load up the low part and then do
4977 a sign extension to the upper part. */
4978 if (INT_REGNO_P (dest_regno))
4979 {
4980 rtx dest_hi = gen_highpart (DImode, dest);
4981 rtx dest_lo = gen_lowpart (DImode, dest);
4982
4983 emit_move_insn (dest_lo, src);
4984 /* In case src is a MEM, we have to use the destination, which is a
4985 register, instead of re-using the source. */
4986 rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
4987 emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
4988 DONE;
4989 }
4990
4991 /* For conversion to an Altivec register, generate either a splat operation
4992 or a load rightmost double word instruction. Both instructions gets the
4993 DImode value into the lower 64 bits, and then do the vextsd2q
4994 instruction. */
4995
4996 else if (ALTIVEC_REGNO_P (dest_regno))
4997 {
4998 if (MEM_P (src))
4999 emit_insn (gen_vsx_lxvrdx (dest, src));
5000 else
5001 {
5002 rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
5003 emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
5004 }
5005
5006 emit_insn (gen_extendditi2_vector (dest, dest));
5007 DONE;
5008 }
5009
5010 else
5011 gcc_unreachable ();
5012 }
5013 [(set_attr "length" "8")
5014 (set_attr "type" "shift,load,vecmove,vecperm,load")])
5015
5016 ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
5017 (define_insn "extendditi2_vector"
5018 [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
5019 (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")]
5020 UNSPEC_EXTENDDITI2))]
5021 "TARGET_POWER10"
5022 "vextsd2q %0,%1"
5023 [(set_attr "type" "vecexts")])
5024
5025 \f
5026 ;; ISA 3.0 Binary Floating-Point Support
5027
5028 ;; VSX Scalar Extract Exponent Quad-Precision
5029 (define_insn "xsxexpqp_<mode>"
5030 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
5031 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
5032 UNSPEC_VSX_SXEXPDP))]
5033 "TARGET_P9_VECTOR"
5034 "xsxexpqp %0,%1"
5035 [(set_attr "type" "vecmove")])
5036
5037 ;; VSX Scalar Extract Exponent Double-Precision
5038 (define_insn "xsxexpdp"
5039 [(set (match_operand:DI 0 "register_operand" "=r")
5040 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
5041 UNSPEC_VSX_SXEXPDP))]
5042 "TARGET_P9_VECTOR && TARGET_64BIT"
5043 "xsxexpdp %0,%x1"
5044 [(set_attr "type" "integer")])
5045
5046 ;; VSX Scalar Extract Significand Quad-Precision
5047 (define_insn "xsxsigqp_<mode>"
5048 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
5049 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
5050 UNSPEC_VSX_SXSIG))]
5051 "TARGET_P9_VECTOR"
5052 "xsxsigqp %0,%1"
5053 [(set_attr "type" "vecmove")])
5054
5055 ;; VSX Scalar Extract Significand Double-Precision
5056 (define_insn "xsxsigdp"
5057 [(set (match_operand:DI 0 "register_operand" "=r")
5058 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
5059 UNSPEC_VSX_SXSIG))]
5060 "TARGET_P9_VECTOR && TARGET_64BIT"
5061 "xsxsigdp %0,%x1"
5062 [(set_attr "type" "integer")])
5063
5064 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
5065 (define_insn "xsiexpqpf_<mode>"
5066 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
5067 (unspec:IEEE128
5068 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5069 (match_operand:DI 2 "altivec_register_operand" "v")]
5070 UNSPEC_VSX_SIEXPQP))]
5071 "TARGET_P9_VECTOR"
5072 "xsiexpqp %0,%1,%2"
5073 [(set_attr "type" "vecmove")])
5074
5075 ;; VSX Scalar Insert Exponent Quad-Precision
5076 (define_insn "xsiexpqp_<mode>"
5077 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
5078 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
5079 (match_operand:DI 2 "altivec_register_operand" "v")]
5080 UNSPEC_VSX_SIEXPQP))]
5081 "TARGET_P9_VECTOR"
5082 "xsiexpqp %0,%1,%2"
5083 [(set_attr "type" "vecmove")])
5084
5085 ;; VSX Scalar Insert Exponent Double-Precision
5086 (define_insn "xsiexpdp"
5087 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
5088 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
5089 (match_operand:DI 2 "register_operand" "r")]
5090 UNSPEC_VSX_SIEXPDP))]
5091 "TARGET_P9_VECTOR && TARGET_64BIT"
5092 "xsiexpdp %x0,%1,%2"
5093 [(set_attr "type" "fpsimple")])
5094
5095 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
5096 (define_insn "xsiexpdpf"
5097 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
5098 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
5099 (match_operand:DI 2 "register_operand" "r")]
5100 UNSPEC_VSX_SIEXPDP))]
5101 "TARGET_P9_VECTOR && TARGET_64BIT"
5102 "xsiexpdp %x0,%1,%2"
5103 [(set_attr "type" "fpsimple")])
5104
5105 ;; VSX Scalar Compare Exponents Double-Precision
5106 (define_expand "xscmpexpdp_<code>"
5107 [(set (match_dup 3)
5108 (compare:CCFP
5109 (unspec:DF
5110 [(match_operand:DF 1 "vsx_register_operand" "wa")
5111 (match_operand:DF 2 "vsx_register_operand" "wa")]
5112 UNSPEC_VSX_SCMPEXPDP)
5113 (const_int 0)))
5114 (set (match_operand:SI 0 "register_operand" "=r")
5115 (CMP_TEST:SI (match_dup 3)
5116 (const_int 0)))]
5117 "TARGET_P9_VECTOR"
5118 {
5119 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
5120 {
5121 emit_move_insn (operands[0], const0_rtx);
5122 DONE;
5123 }
5124
5125 operands[3] = gen_reg_rtx (CCFPmode);
5126 })
5127
5128 (define_insn "*xscmpexpdp"
5129 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5130 (compare:CCFP
5131 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
5132 (match_operand:DF 2 "vsx_register_operand" "wa")]
5133 UNSPEC_VSX_SCMPEXPDP)
5134 (match_operand:SI 3 "zero_constant" "j")))]
5135 "TARGET_P9_VECTOR"
5136 "xscmpexpdp %0,%x1,%x2"
5137 [(set_attr "type" "fpcompare")])
5138
5139 ;; VSX Scalar Compare Exponents Quad-Precision
5140 (define_expand "xscmpexpqp_<code>_<mode>"
5141 [(set (match_dup 3)
5142 (compare:CCFP
5143 (unspec:IEEE128
5144 [(match_operand:IEEE128 1 "vsx_register_operand" "v")
5145 (match_operand:IEEE128 2 "vsx_register_operand" "v")]
5146 UNSPEC_VSX_SCMPEXPQP)
5147 (const_int 0)))
5148 (set (match_operand:SI 0 "register_operand" "=r")
5149 (CMP_TEST:SI (match_dup 3)
5150 (const_int 0)))]
5151 "TARGET_P9_VECTOR"
5152 {
5153 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
5154 {
5155 emit_move_insn (operands[0], const0_rtx);
5156 DONE;
5157 }
5158
5159 operands[3] = gen_reg_rtx (CCFPmode);
5160 })
5161
5162 (define_insn "*xscmpexpqp"
5163 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5164 (compare:CCFP
5165 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5166 (match_operand:IEEE128 2 "altivec_register_operand" "v")]
5167 UNSPEC_VSX_SCMPEXPQP)
5168 (match_operand:SI 3 "zero_constant" "j")))]
5169 "TARGET_P9_VECTOR"
5170 "xscmpexpqp %0,%1,%2"
5171 [(set_attr "type" "fpcompare")])
5172
5173 ;; VSX Scalar Test Data Class Quad-Precision
5174 ;; (Expansion for scalar_test_data_class (__ieee128, int))
5175 ;; (Has side effect of setting the lt bit if operand 1 is negative,
5176 ;; setting the eq bit if any of the conditions tested by operand 2
5177 ;; are satisfied, and clearing the gt and undordered bits to zero.)
5178 (define_expand "xststdcqp_<mode>"
5179 [(set (match_dup 3)
5180 (compare:CCFP
5181 (unspec:IEEE128
5182 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5183 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5184 UNSPEC_VSX_STSTDC)
5185 (const_int 0)))
5186 (set (match_operand:SI 0 "register_operand" "=r")
5187 (eq:SI (match_dup 3)
5188 (const_int 0)))]
5189 "TARGET_P9_VECTOR"
5190 {
5191 operands[3] = gen_reg_rtx (CCFPmode);
5192 })
5193
5194 ;; VSX Scalar Test Data Class Double- and Single-Precision
5195 ;; (The lt bit is set if operand 1 is negative. The eq bit is set
5196 ;; if any of the conditions tested by operand 2 are satisfied.
5197 ;; The gt and unordered bits are cleared to zero.)
5198 (define_expand "xststdc<sd>p"
5199 [(set (match_dup 3)
5200 (compare:CCFP
5201 (unspec:SFDF
5202 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5203 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5204 UNSPEC_VSX_STSTDC)
5205 (match_dup 4)))
5206 (set (match_operand:SI 0 "register_operand" "=r")
5207 (eq:SI (match_dup 3)
5208 (const_int 0)))]
5209 "TARGET_P9_VECTOR"
5210 {
5211 operands[3] = gen_reg_rtx (CCFPmode);
5212 operands[4] = CONST0_RTX (SImode);
5213 })
5214
5215 ;; The VSX Scalar Test Negative Quad-Precision
5216 (define_expand "xststdcnegqp_<mode>"
5217 [(set (match_dup 2)
5218 (compare:CCFP
5219 (unspec:IEEE128
5220 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5221 (const_int 0)]
5222 UNSPEC_VSX_STSTDC)
5223 (const_int 0)))
5224 (set (match_operand:SI 0 "register_operand" "=r")
5225 (lt:SI (match_dup 2)
5226 (const_int 0)))]
5227 "TARGET_P9_VECTOR"
5228 {
5229 operands[2] = gen_reg_rtx (CCFPmode);
5230 })
5231
5232 ;; The VSX Scalar Test Negative Double- and Single-Precision
5233 (define_expand "xststdcneg<sd>p"
5234 [(set (match_dup 2)
5235 (compare:CCFP
5236 (unspec:SFDF
5237 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5238 (const_int 0)]
5239 UNSPEC_VSX_STSTDC)
5240 (match_dup 3)))
5241 (set (match_operand:SI 0 "register_operand" "=r")
5242 (lt:SI (match_dup 2)
5243 (const_int 0)))]
5244 "TARGET_P9_VECTOR"
5245 {
5246 operands[2] = gen_reg_rtx (CCFPmode);
5247 operands[3] = CONST0_RTX (SImode);
5248 })
5249
5250 (define_insn "*xststdcqp_<mode>"
5251 [(set (match_operand:CCFP 0 "" "=y")
5252 (compare:CCFP
5253 (unspec:IEEE128
5254 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5255 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5256 UNSPEC_VSX_STSTDC)
5257 (const_int 0)))]
5258 "TARGET_P9_VECTOR"
5259 "xststdcqp %0,%1,%2"
5260 [(set_attr "type" "fpcompare")])
5261
5262 (define_insn "*xststdc<sd>p"
5263 [(set (match_operand:CCFP 0 "" "=y")
5264 (compare:CCFP
5265 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5266 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5267 UNSPEC_VSX_STSTDC)
5268 (match_operand:SI 3 "zero_constant" "j")))]
5269 "TARGET_P9_VECTOR"
5270 "xststdc<sd>p %0,%x1,%2"
5271 [(set_attr "type" "fpcompare")])
5272
5273 ;; VSX Vector Extract Exponent Double and Single Precision
5274 (define_insn "xvxexp<sd>p"
5275 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5276 (unspec:VSX_F
5277 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5278 UNSPEC_VSX_VXEXP))]
5279 "TARGET_P9_VECTOR"
5280 "xvxexp<sd>p %x0,%x1"
5281 [(set_attr "type" "vecsimple")])
5282
5283 ;; VSX Vector Extract Significand Double and Single Precision
5284 (define_insn "xvxsig<sd>p"
5285 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5286 (unspec:VSX_F
5287 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5288 UNSPEC_VSX_VXSIG))]
5289 "TARGET_P9_VECTOR"
5290 "xvxsig<sd>p %x0,%x1"
5291 [(set_attr "type" "vecsimple")])
5292
5293 ;; VSX Vector Insert Exponent Double and Single Precision
5294 (define_insn "xviexp<sd>p"
5295 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5296 (unspec:VSX_F
5297 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5298 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
5299 UNSPEC_VSX_VIEXP))]
5300 "TARGET_P9_VECTOR"
5301 "xviexp<sd>p %x0,%x1,%x2"
5302 [(set_attr "type" "vecsimple")])
5303
5304 ;; VSX Vector Test Data Class Double and Single Precision
5305 ;; The corresponding elements of the result vector are all ones
5306 ;; if any of the conditions tested by operand 3 are satisfied.
5307 (define_insn "xvtstdc<sd>p"
5308 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
5309 (unspec:<VSI>
5310 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5311 (match_operand:SI 2 "u7bit_cint_operand" "n")]
5312 UNSPEC_VSX_VTSTDC))]
5313 "TARGET_P9_VECTOR"
5314 "xvtstdc<sd>p %x0,%x1,%2"
5315 [(set_attr "type" "vecsimple")])
5316
5317 ;; ISA 3.0 String Operations Support
5318
5319 ;; Compare vectors producing a vector result and a predicate, setting CR6
5320 ;; to indicate a combined status. This pattern matches v16qi, v8hi, and
5321 ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
5322 ;; need to match v4sf, v2df, or v2di modes because those are expanded
5323 ;; to use Power8 instructions.
5324 (define_insn "*vsx_ne_<mode>_p"
5325 [(set (reg:CC CR6_REGNO)
5326 (unspec:CC
5327 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
5328 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
5329 UNSPEC_PREDICATE))
5330 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
5331 (ne:VSX_EXTRACT_I (match_dup 1)
5332 (match_dup 2)))]
5333 "TARGET_P9_VECTOR"
5334 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5335 [(set_attr "type" "vecsimple")])
5336
5337 (define_insn "*vector_nez_<mode>_p"
5338 [(set (reg:CC CR6_REGNO)
5339 (unspec:CC [(unspec:VI
5340 [(match_operand:VI 1 "gpc_reg_operand" "v")
5341 (match_operand:VI 2 "gpc_reg_operand" "v")]
5342 UNSPEC_NEZ_P)]
5343 UNSPEC_PREDICATE))
5344 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
5345 (unspec:VI [(match_dup 1)
5346 (match_dup 2)]
5347 UNSPEC_NEZ_P))]
5348 "TARGET_P9_VECTOR"
5349 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5350 [(set_attr "type" "vecsimple")])
5351
5352 ;; Return first position of match between vectors using natural order
5353 ;; for both LE and BE execution modes.
5354 (define_expand "first_match_index_<mode>"
5355 [(match_operand:SI 0 "register_operand")
5356 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5357 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5358 UNSPEC_VSX_FIRST_MATCH_INDEX)]
5359 "TARGET_P9_VECTOR"
5360 {
5361 int sh;
5362
5363 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5364 rtx not_result = gen_reg_rtx (<MODE>mode);
5365
5366 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5367 operands[2]));
5368 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
5369
5370 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5371
5372 if (<MODE>mode == V16QImode)
5373 {
5374 if (!BYTES_BIG_ENDIAN)
5375 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
5376 else
5377 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
5378 }
5379 else
5380 {
5381 rtx tmp = gen_reg_rtx (SImode);
5382 if (!BYTES_BIG_ENDIAN)
5383 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
5384 else
5385 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
5386 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5387 }
5388 DONE;
5389 })
5390
5391 ;; Return first position of match between vectors or end of string (EOS) using
5392 ;; natural element order for both LE and BE execution modes.
5393 (define_expand "first_match_or_eos_index_<mode>"
5394 [(match_operand:SI 0 "register_operand")
5395 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5396 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5397 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
5398 "TARGET_P9_VECTOR"
5399 {
5400 int sh;
5401 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5402 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5403 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5404 rtx and_result = gen_reg_rtx (<MODE>mode);
5405 rtx result = gen_reg_rtx (<MODE>mode);
5406 rtx vzero = gen_reg_rtx (<MODE>mode);
5407
5408 /* Vector with zeros in elements that correspond to zeros in operands. */
5409 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5410 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5411 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5412 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5413
5414 /* Vector with ones in elments that do not match. */
5415 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5416 operands[2]));
5417
5418 /* Create vector with ones in elements where there was a zero in one of
5419 the source elements or the elements that match. */
5420 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
5421 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5422
5423 if (<MODE>mode == V16QImode)
5424 {
5425 if (!BYTES_BIG_ENDIAN)
5426 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5427 else
5428 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5429 }
5430 else
5431 {
5432 rtx tmp = gen_reg_rtx (SImode);
5433 if (!BYTES_BIG_ENDIAN)
5434 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5435 else
5436 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5437 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5438 }
5439 DONE;
5440 })
5441
5442 ;; Return first position of mismatch between vectors using natural
5443 ;; element order for both LE and BE execution modes.
5444 (define_expand "first_mismatch_index_<mode>"
5445 [(match_operand:SI 0 "register_operand")
5446 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5447 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5448 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
5449 "TARGET_P9_VECTOR"
5450 {
5451 int sh;
5452 rtx cmp_result = gen_reg_rtx (<MODE>mode);
5453
5454 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5455 operands[2]));
5456 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5457
5458 if (<MODE>mode == V16QImode)
5459 {
5460 if (!BYTES_BIG_ENDIAN)
5461 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
5462 else
5463 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
5464 }
5465 else
5466 {
5467 rtx tmp = gen_reg_rtx (SImode);
5468 if (!BYTES_BIG_ENDIAN)
5469 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
5470 else
5471 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
5472 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5473 }
5474 DONE;
5475 })
5476
5477 ;; Return first position of mismatch between vectors or end of string (EOS)
5478 ;; using natural element order for both LE and BE execution modes.
5479 (define_expand "first_mismatch_or_eos_index_<mode>"
5480 [(match_operand:SI 0 "register_operand")
5481 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5482 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5483 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
5484 "TARGET_P9_VECTOR"
5485 {
5486 int sh;
5487 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5488 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5489 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5490 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
5491 rtx and_result = gen_reg_rtx (<MODE>mode);
5492 rtx result = gen_reg_rtx (<MODE>mode);
5493 rtx vzero = gen_reg_rtx (<MODE>mode);
5494
5495 /* Vector with zeros in elements that correspond to zeros in operands. */
5496 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5497
5498 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5499 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5500 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5501
5502 /* Vector with ones in elments that match. */
5503 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5504 operands[2]));
5505 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
5506
5507 /* Create vector with ones in elements where there was a zero in one of
5508 the source elements or the elements did not match. */
5509 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5510 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5511
5512 if (<MODE>mode == V16QImode)
5513 {
5514 if (!BYTES_BIG_ENDIAN)
5515 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5516 else
5517 emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5518 }
5519 else
5520 {
5521 rtx tmp = gen_reg_rtx (SImode);
5522 if (!BYTES_BIG_ENDIAN)
5523 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5524 else
5525 emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5526 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5527 }
5528 DONE;
5529 })
5530
5531 ;; Load VSX Vector with Length
5532 (define_expand "lxvl"
5533 [(set (match_dup 3)
5534 (ashift:DI (match_operand:DI 2 "register_operand")
5535 (const_int 56)))
5536 (set (match_operand:V16QI 0 "vsx_register_operand")
5537 (unspec:V16QI
5538 [(match_operand:DI 1 "gpc_reg_operand")
5539 (mem:V16QI (match_dup 1))
5540 (match_dup 3)]
5541 UNSPEC_LXVL))]
5542 "TARGET_P9_VECTOR && TARGET_64BIT"
5543 {
5544 operands[3] = gen_reg_rtx (DImode);
5545 })
5546
5547 (define_insn "*lxvl"
5548 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5549 (unspec:V16QI
5550 [(match_operand:DI 1 "gpc_reg_operand" "b")
5551 (mem:V16QI (match_dup 1))
5552 (match_operand:DI 2 "register_operand" "r")]
5553 UNSPEC_LXVL))]
5554 "TARGET_P9_VECTOR && TARGET_64BIT"
5555 "lxvl %x0,%1,%2"
5556 [(set_attr "type" "vecload")])
5557
5558 (define_insn "lxvll"
5559 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5560 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5561 (mem:V16QI (match_dup 1))
5562 (match_operand:DI 2 "register_operand" "r")]
5563 UNSPEC_LXVLL))]
5564 "TARGET_P9_VECTOR"
5565 "lxvll %x0,%1,%2"
5566 [(set_attr "type" "vecload")])
5567
5568 ;; Expand for builtin xl_len_r
5569 (define_expand "xl_len_r"
5570 [(match_operand:V16QI 0 "vsx_register_operand")
5571 (match_operand:DI 1 "register_operand")
5572 (match_operand:DI 2 "register_operand")]
5573 ""
5574 {
5575 rtx shift_mask = gen_reg_rtx (V16QImode);
5576 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5577 rtx tmp = gen_reg_rtx (DImode);
5578
5579 emit_insn (gen_altivec_lvsl_reg_di (shift_mask, operands[2]));
5580 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5581 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5582 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5583 shift_mask));
5584 DONE;
5585 })
5586
5587 (define_insn "stxvll"
5588 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5589 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5590 (mem:V16QI (match_dup 1))
5591 (match_operand:DI 2 "register_operand" "r")]
5592 UNSPEC_STXVLL))]
5593 "TARGET_P9_VECTOR"
5594 "stxvll %x0,%1,%2"
5595 [(set_attr "type" "vecstore")])
5596
5597 ;; Store VSX Vector with Length
5598 (define_expand "stxvl"
5599 [(set (match_dup 3)
5600 (ashift:DI (match_operand:DI 2 "register_operand")
5601 (const_int 56)))
5602 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5603 (unspec:V16QI
5604 [(match_operand:V16QI 0 "vsx_register_operand")
5605 (mem:V16QI (match_dup 1))
5606 (match_dup 3)]
5607 UNSPEC_STXVL))]
5608 "TARGET_P9_VECTOR && TARGET_64BIT"
5609 {
5610 operands[3] = gen_reg_rtx (DImode);
5611 })
5612
5613 ;; Define optab for vector access with length vectorization exploitation.
5614 (define_expand "len_load_v16qi"
5615 [(match_operand:V16QI 0 "vlogical_operand")
5616 (match_operand:V16QI 1 "memory_operand")
5617 (match_operand:QI 2 "gpc_reg_operand")
5618 (match_operand:QI 3 "zero_constant")]
5619 "TARGET_P9_VECTOR && TARGET_64BIT"
5620 {
5621 rtx mem = XEXP (operands[1], 0);
5622 mem = force_reg (DImode, mem);
5623 rtx len = gen_lowpart (DImode, operands[2]);
5624 emit_insn (gen_lxvl (operands[0], mem, len));
5625 DONE;
5626 })
5627
5628 (define_expand "len_store_v16qi"
5629 [(match_operand:V16QI 0 "memory_operand")
5630 (match_operand:V16QI 1 "vlogical_operand")
5631 (match_operand:QI 2 "gpc_reg_operand")
5632 (match_operand:QI 3 "zero_constant")
5633 ]
5634 "TARGET_P9_VECTOR && TARGET_64BIT"
5635 {
5636 rtx mem = XEXP (operands[0], 0);
5637 mem = force_reg (DImode, mem);
5638 rtx len = gen_lowpart (DImode, operands[2]);
5639 emit_insn (gen_stxvl (operands[1], mem, len));
5640 DONE;
5641 })
5642
5643 (define_insn "*stxvl"
5644 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5645 (unspec:V16QI
5646 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5647 (mem:V16QI (match_dup 1))
5648 (match_operand:DI 2 "register_operand" "r")]
5649 UNSPEC_STXVL))]
5650 "TARGET_P9_VECTOR && TARGET_64BIT"
5651 "stxvl %x0,%1,%2"
5652 [(set_attr "type" "vecstore")])
5653
5654 ;; Expand for builtin xst_len_r
5655 (define_expand "xst_len_r"
5656 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5657 (match_operand:DI 1 "register_operand" "b")
5658 (match_operand:DI 2 "register_operand" "r")]
5659 "UNSPEC_XST_LEN_R"
5660 {
5661 rtx shift_mask = gen_reg_rtx (V16QImode);
5662 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5663 rtx tmp = gen_reg_rtx (DImode);
5664
5665 emit_insn (gen_altivec_lvsr_reg_di (shift_mask, operands[2]));
5666 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5667 shift_mask));
5668 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5669 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5670 DONE;
5671 })
5672
5673 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5674 (define_insn "vcmpneb"
5675 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5676 (not:V16QI
5677 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5678 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5679 "TARGET_P9_VECTOR"
5680 "vcmpneb %0,%1,%2"
5681 [(set_attr "type" "vecsimple")])
5682
5683 ;; Vector Compare Not Equal v1ti (specified/not+eq:)
5684 (define_expand "vcmpnet"
5685 [(set (match_operand:V1TI 0 "altivec_register_operand")
5686 (not:V1TI
5687 (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand")
5688 (match_operand:V1TI 2 "altivec_register_operand"))))]
5689 "TARGET_POWER10"
5690 {
5691 emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2]));
5692 emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0]));
5693 DONE;
5694 })
5695
5696 ;; Vector Compare Not Equal or Zero Byte
5697 (define_insn "vcmpnezb"
5698 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5699 (unspec:V16QI
5700 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5701 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5702 UNSPEC_VCMPNEZB))]
5703 "TARGET_P9_VECTOR"
5704 "vcmpnezb %0,%1,%2"
5705 [(set_attr "type" "vecsimple")])
5706
5707 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5708 (define_insn "vcmpnezb_p"
5709 [(set (reg:CC CR6_REGNO)
5710 (unspec:CC
5711 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5712 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5713 UNSPEC_VCMPNEZB))
5714 (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5715 (unspec:V16QI
5716 [(match_dup 1)
5717 (match_dup 2)]
5718 UNSPEC_VCMPNEZB))]
5719 "TARGET_P9_VECTOR"
5720 "vcmpnezb. %0,%1,%2"
5721 [(set_attr "type" "vecsimple")])
5722
5723 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5724 (define_insn "vcmpneh"
5725 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5726 (not:V8HI
5727 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5728 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5729 "TARGET_P9_VECTOR"
5730 "vcmpneh %0,%1,%2"
5731 [(set_attr "type" "vecsimple")])
5732
5733 ;; Vector Compare Not Equal or Zero Half Word
5734 (define_insn "vcmpnezh"
5735 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5736 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5737 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5738 UNSPEC_VCMPNEZH))]
5739 "TARGET_P9_VECTOR"
5740 "vcmpnezh %0,%1,%2"
5741 [(set_attr "type" "vecsimple")])
5742
5743 ;; Vector Compare Not Equal Word (specified/not+eq:)
5744 (define_insn "vcmpnew"
5745 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5746 (not:V4SI
5747 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5748 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5749 "TARGET_P9_VECTOR"
5750 "vcmpnew %0,%1,%2"
5751 [(set_attr "type" "vecsimple")])
5752
5753 ;; Vector Compare Not Equal or Zero Word
5754 (define_insn "vcmpnezw"
5755 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5756 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5757 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5758 UNSPEC_VCMPNEZW))]
5759 "TARGET_P9_VECTOR"
5760 "vcmpnezw %0,%1,%2"
5761 [(set_attr "type" "vecsimple")])
5762
5763 ;; Vector Count Leading Zero Least-Significant Bits Byte
5764 (define_insn "vclzlsbb_<mode>"
5765 [(set (match_operand:SI 0 "register_operand" "=r")
5766 (unspec:SI
5767 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5768 UNSPEC_VCLZLSBB))]
5769 "TARGET_P9_VECTOR"
5770 "vclzlsbb %0,%1"
5771 [(set_attr "type" "vecsimple")])
5772
5773 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5774 (define_insn "vctzlsbb_<mode>"
5775 [(set (match_operand:SI 0 "register_operand" "=r")
5776 (unspec:SI
5777 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5778 UNSPEC_VCTZLSBB))]
5779 "TARGET_P9_VECTOR"
5780 "vctzlsbb %0,%1"
5781 [(set_attr "type" "vecsimple")])
5782
5783 ;; Vector Extract Unsigned Byte Left-Indexed
5784 (define_insn "vextublx"
5785 [(set (match_operand:SI 0 "register_operand" "=r")
5786 (unspec:SI
5787 [(match_operand:SI 1 "register_operand" "r")
5788 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5789 UNSPEC_VEXTUBLX))]
5790 "TARGET_P9_VECTOR"
5791 "vextublx %0,%1,%2"
5792 [(set_attr "type" "vecsimple")])
5793
5794 ;; Vector Extract Unsigned Byte Right-Indexed
5795 (define_insn "vextubrx"
5796 [(set (match_operand:SI 0 "register_operand" "=r")
5797 (unspec:SI
5798 [(match_operand:SI 1 "register_operand" "r")
5799 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5800 UNSPEC_VEXTUBRX))]
5801 "TARGET_P9_VECTOR"
5802 "vextubrx %0,%1,%2"
5803 [(set_attr "type" "vecsimple")])
5804
5805 ;; Vector Extract Unsigned Half Word Left-Indexed
5806 (define_insn "vextuhlx"
5807 [(set (match_operand:SI 0 "register_operand" "=r")
5808 (unspec:SI
5809 [(match_operand:SI 1 "register_operand" "r")
5810 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5811 UNSPEC_VEXTUHLX))]
5812 "TARGET_P9_VECTOR"
5813 "vextuhlx %0,%1,%2"
5814 [(set_attr "type" "vecsimple")])
5815
5816 ;; Vector Extract Unsigned Half Word Right-Indexed
5817 (define_insn "vextuhrx"
5818 [(set (match_operand:SI 0 "register_operand" "=r")
5819 (unspec:SI
5820 [(match_operand:SI 1 "register_operand" "r")
5821 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5822 UNSPEC_VEXTUHRX))]
5823 "TARGET_P9_VECTOR"
5824 "vextuhrx %0,%1,%2"
5825 [(set_attr "type" "vecsimple")])
5826
5827 ;; Vector Extract Unsigned Word Left-Indexed
5828 (define_insn "vextuwlx"
5829 [(set (match_operand:SI 0 "register_operand" "=r")
5830 (unspec:SI
5831 [(match_operand:SI 1 "register_operand" "r")
5832 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5833 UNSPEC_VEXTUWLX))]
5834 "TARGET_P9_VECTOR"
5835 "vextuwlx %0,%1,%2"
5836 [(set_attr "type" "vecsimple")])
5837
5838 ;; Vector Extract Unsigned Word Right-Indexed
5839 (define_insn "vextuwrx"
5840 [(set (match_operand:SI 0 "register_operand" "=r")
5841 (unspec:SI
5842 [(match_operand:SI 1 "register_operand" "r")
5843 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5844 UNSPEC_VEXTUWRX))]
5845 "TARGET_P9_VECTOR"
5846 "vextuwrx %0,%1,%2"
5847 [(set_attr "type" "vecsimple")])
5848
5849 ;; Vector insert/extract word at arbitrary byte values. Note, the little
5850 ;; endian version needs to adjust the byte number, and the V4SI element in
5851 ;; vinsert4b.
5852 (define_insn "extract4b"
5853 [(set (match_operand:V2DI 0 "vsx_register_operand")
5854 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5855 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5856 UNSPEC_XXEXTRACTUW))]
5857 "TARGET_P9_VECTOR"
5858 {
5859 if (!BYTES_BIG_ENDIAN)
5860 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5861
5862 return "xxextractuw %x0,%x1,%2";
5863 })
5864
5865 (define_expand "insert4b"
5866 [(set (match_operand:V16QI 0 "vsx_register_operand")
5867 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5868 (match_operand:V16QI 2 "vsx_register_operand")
5869 (match_operand:QI 3 "const_0_to_12_operand")]
5870 UNSPEC_XXINSERTW))]
5871 "TARGET_P9_VECTOR"
5872 {
5873 if (!BYTES_BIG_ENDIAN)
5874 {
5875 rtx op1 = operands[1];
5876 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5877 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5878 operands[1] = v4si_tmp;
5879 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5880 }
5881 })
5882
5883 (define_insn "*insert4b_internal"
5884 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5885 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5886 (match_operand:V16QI 2 "vsx_register_operand" "0")
5887 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5888 UNSPEC_XXINSERTW))]
5889 "TARGET_P9_VECTOR"
5890 "xxinsertw %x0,%x1,%3"
5891 [(set_attr "type" "vecperm")])
5892
5893
5894 ;; Generate vector extract four float 32 values from left four elements
5895 ;; of eight element vector of float 16 values.
5896 (define_expand "vextract_fp_from_shorth"
5897 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5898 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5899 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5900 "TARGET_P9_VECTOR"
5901 {
5902 int i;
5903 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5904 int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
5905
5906 rtx rvals[16];
5907 rtx mask = gen_reg_rtx (V16QImode);
5908 rtx tmp = gen_reg_rtx (V16QImode);
5909 rtvec v;
5910
5911 for (i = 0; i < 16; i++)
5912 if (!BYTES_BIG_ENDIAN)
5913 rvals[i] = GEN_INT (vals_le[i]);
5914 else
5915 rvals[i] = GEN_INT (vals_be[i]);
5916
5917 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5918 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5919 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5920 conversion instruction. */
5921 v = gen_rtvec_v (16, rvals);
5922 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5923 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5924 operands[1], mask));
5925 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5926 DONE;
5927 })
5928
5929 ;; Generate vector extract four float 32 values from right four elements
5930 ;; of eight element vector of float 16 values.
5931 (define_expand "vextract_fp_from_shortl"
5932 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5933 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5934 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5935 "TARGET_P9_VECTOR"
5936 {
5937 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5938 int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
5939
5940 int i;
5941 rtx rvals[16];
5942 rtx mask = gen_reg_rtx (V16QImode);
5943 rtx tmp = gen_reg_rtx (V16QImode);
5944 rtvec v;
5945
5946 for (i = 0; i < 16; i++)
5947 if (!BYTES_BIG_ENDIAN)
5948 rvals[i] = GEN_INT (vals_le[i]);
5949 else
5950 rvals[i] = GEN_INT (vals_be[i]);
5951
5952 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5953 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5954 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5955 conversion instruction. */
5956 v = gen_rtvec_v (16, rvals);
5957 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5958 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5959 operands[1], mask));
5960 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5961 DONE;
5962 })
5963
5964 ;; Support for ISA 3.0 vector byte reverse
5965
5966 ;; Swap all bytes with in a vector
5967 (define_insn "p9_xxbrq_v1ti"
5968 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5969 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5970 "TARGET_P9_VECTOR"
5971 "xxbrq %x0,%x1"
5972 [(set_attr "type" "vecperm")])
5973
5974 (define_expand "p9_xxbrq_v16qi"
5975 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5976 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5977 "TARGET_P9_VECTOR"
5978 {
5979 rtx op0 = gen_reg_rtx (V1TImode);
5980 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5981 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5982 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5983 DONE;
5984 })
5985
5986 ;; Swap all bytes in each 64-bit element
5987 (define_insn "p9_xxbrd_v2di"
5988 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5989 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5990 "TARGET_P9_VECTOR"
5991 "xxbrd %x0,%x1"
5992 [(set_attr "type" "vecperm")])
5993
5994 (define_expand "p9_xxbrd_v2df"
5995 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5996 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5997 "TARGET_P9_VECTOR"
5998 {
5999 rtx op0 = gen_reg_rtx (V2DImode);
6000 rtx op1 = gen_lowpart (V2DImode, operands[1]);
6001 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
6002 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
6003 DONE;
6004 })
6005
6006 ;; Swap all bytes in each 32-bit element
6007 (define_insn "p9_xxbrw_v4si"
6008 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
6009 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
6010 "TARGET_P9_VECTOR"
6011 "xxbrw %x0,%x1"
6012 [(set_attr "type" "vecperm")])
6013
6014 (define_expand "p9_xxbrw_v4sf"
6015 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
6016 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
6017 "TARGET_P9_VECTOR"
6018 {
6019 rtx op0 = gen_reg_rtx (V4SImode);
6020 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6021 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
6022 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
6023 DONE;
6024 })
6025
6026 ;; Swap all bytes in each element of vector
6027 (define_expand "revb_<mode>"
6028 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
6029 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
6030 ""
6031 {
6032 if (TARGET_P9_VECTOR)
6033 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
6034 else
6035 {
6036 if (<MODE>mode == V8HImode)
6037 {
6038 rtx splt = gen_reg_rtx (V8HImode);
6039 emit_insn (gen_altivec_vspltish (splt, GEN_INT (8)));
6040 emit_insn (gen_altivec_vrlh (operands[0], operands[1], splt));
6041 }
6042 else
6043 {
6044 /* Want to have the elements in reverse order relative
6045 to the endian mode in use, i.e. in LE mode, put elements
6046 in BE order. */
6047 rtx sel = swap_endian_selector_for_mode (<MODE>mode);
6048 emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], operands[1],
6049 operands[1], sel));
6050 }
6051 }
6052
6053 DONE;
6054 })
6055
6056 ;; Reversing bytes in vector char is just a NOP.
6057 (define_expand "revb_v16qi"
6058 [(set (match_operand:V16QI 0 "vsx_register_operand")
6059 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
6060 ""
6061 {
6062 emit_move_insn (operands[0], operands[1]);
6063 DONE;
6064 })
6065
6066 ;; Swap all bytes in each 16-bit element
6067 (define_insn "p9_xxbrh_v8hi"
6068 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
6069 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
6070 "TARGET_P9_VECTOR"
6071 "xxbrh %x0,%x1"
6072 [(set_attr "type" "vecperm")])
6073 \f
6074
6075 ;; Operand numbers for the following peephole2
6076 (define_constants
6077 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
6078 (SFBOOL_TMP_VSX 1) ;; vector temporary
6079 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
6080 (SFBOOL_MFVSR_A 3) ;; move to gpr src
6081 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
6082 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
6083 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
6084 (SFBOOL_SHL_D 7) ;; shift left dest
6085 (SFBOOL_SHL_A 8) ;; shift left arg
6086 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
6087 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
6088 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
6089 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
6090 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
6091
6092 ;; Attempt to optimize some common GLIBC operations using logical operations to
6093 ;; pick apart SFmode operations. For example, there is code from e_powf.c
6094 ;; after macro expansion that looks like:
6095 ;;
6096 ;; typedef union {
6097 ;; float value;
6098 ;; uint32_t word;
6099 ;; } ieee_float_shape_type;
6100 ;;
6101 ;; float t1;
6102 ;; int32_t is;
6103 ;;
6104 ;; do {
6105 ;; ieee_float_shape_type gf_u;
6106 ;; gf_u.value = (t1);
6107 ;; (is) = gf_u.word;
6108 ;; } while (0);
6109 ;;
6110 ;; do {
6111 ;; ieee_float_shape_type sf_u;
6112 ;; sf_u.word = (is & 0xfffff000);
6113 ;; (t1) = sf_u.value;
6114 ;; } while (0);
6115 ;;
6116 ;;
6117 ;; This would result in two direct move operations (convert to memory format,
6118 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
6119 ;; scalar format). With this peephole, we eliminate the direct move to the
6120 ;; GPR, and instead move the integer mask value to the vector register after a
6121 ;; shift and do the VSX logical operation.
6122
6123 ;; The insns for dealing with SFmode in GPR registers looks like:
6124 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
6125 ;;
6126 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
6127 ;;
6128 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
6129 ;;
6130 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
6131 ;;
6132 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
6133 ;;
6134 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
6135
6136 (define_peephole2
6137 [(match_scratch:DI SFBOOL_TMP_GPR "r")
6138 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
6139
6140 ;; MFVSRWZ (aka zero_extend)
6141 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
6142 (zero_extend:DI
6143 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
6144
6145 ;; AND/IOR/XOR operation on int
6146 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
6147 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
6148 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
6149
6150 ;; SLDI
6151 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
6152 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
6153 (const_int 32)))
6154
6155 ;; MTVSRD
6156 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
6157 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
6158
6159 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
6160 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
6161 to compare registers, when the mode is different. */
6162 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
6163 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
6164 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
6165 && (REG_P (operands[SFBOOL_BOOL_A2])
6166 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
6167 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
6168 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
6169 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
6170 || (REG_P (operands[SFBOOL_BOOL_A2])
6171 && REGNO (operands[SFBOOL_MFVSR_D])
6172 == REGNO (operands[SFBOOL_BOOL_A2])))
6173 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
6174 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
6175 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
6176 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
6177 [(set (match_dup SFBOOL_TMP_GPR)
6178 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
6179 (const_int 32)))
6180
6181 (set (match_dup SFBOOL_TMP_VSX_DI)
6182 (match_dup SFBOOL_TMP_GPR))
6183
6184 (set (match_dup SFBOOL_MTVSR_D_V4SF)
6185 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
6186 (match_dup SFBOOL_TMP_VSX)))]
6187 {
6188 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
6189 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
6190 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
6191 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
6192 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
6193 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
6194
6195 if (CONST_INT_P (bool_a2))
6196 {
6197 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
6198 emit_move_insn (tmp_gpr, bool_a2);
6199 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
6200 }
6201 else
6202 {
6203 int regno_bool_a1 = REGNO (bool_a1);
6204 int regno_bool_a2 = REGNO (bool_a2);
6205 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
6206 ? regno_bool_a2 : regno_bool_a1);
6207 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
6208 }
6209
6210 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
6211 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
6212 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
6213 })
6214
6215 ;; Support signed/unsigned long long to float conversion vectorization.
6216 ;; Note that any_float (pc) here is just for code attribute <su>.
6217 (define_expand "vec_pack<su>_float_v2di"
6218 [(match_operand:V4SF 0 "vfloat_operand")
6219 (match_operand:V2DI 1 "vint_operand")
6220 (match_operand:V2DI 2 "vint_operand")
6221 (any_float (pc))]
6222 "TARGET_VSX"
6223 {
6224 rtx r1 = gen_reg_rtx (V4SFmode);
6225 rtx r2 = gen_reg_rtx (V4SFmode);
6226 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
6227 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
6228 rs6000_expand_extract_even (operands[0], r1, r2);
6229 DONE;
6230 })
6231
6232 ;; Support float to signed/unsigned long long conversion vectorization.
6233 ;; Note that any_fix (pc) here is just for code attribute <su>.
6234 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
6235 [(match_operand:V2DI 0 "vint_operand")
6236 (match_operand:V4SF 1 "vfloat_operand")
6237 (any_fix (pc))]
6238 "TARGET_VSX"
6239 {
6240 rtx reg = gen_reg_rtx (V4SFmode);
6241 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
6242 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6243 DONE;
6244 })
6245
6246 ;; Note that any_fix (pc) here is just for code attribute <su>.
6247 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
6248 [(match_operand:V2DI 0 "vint_operand")
6249 (match_operand:V4SF 1 "vfloat_operand")
6250 (any_fix (pc))]
6251 "TARGET_VSX"
6252 {
6253 rtx reg = gen_reg_rtx (V4SFmode);
6254 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
6255 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6256 DONE;
6257 })
6258
6259 (define_insn "vsx_<xvcvbf16>"
6260 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6261 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
6262 XVCVBF16))]
6263 "TARGET_POWER10"
6264 "<xvcvbf16> %x0,%x1"
6265 [(set_attr "type" "vecfloat")])
6266
6267 (define_insn "vec_mtvsrbmi"
6268 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
6269 (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
6270 UNSPEC_MTVSBM))]
6271 "TARGET_POWER10"
6272 "mtvsrbmi %0,%1"
6273 )
6274
6275 (define_insn "vec_mtvsr_<mode>"
6276 [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v")
6277 (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")]
6278 UNSPEC_MTVSBM))]
6279 "TARGET_POWER10"
6280 "mtvsr<wd>m %0,%1";
6281 [(set_attr "type" "vecsimple")])
6282
6283 (define_insn "vec_cntmb_<mode>"
6284 [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
6285 (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v")
6286 (match_operand:QI 2 "const_0_to_1_operand" "n")]
6287 UNSPEC_VCNTMB))]
6288 "TARGET_POWER10"
6289 "vcntmb<wd> %0,%1,%2"
6290 [(set_attr "type" "vecsimple")])
6291
6292 (define_insn "vec_extract_<mode>"
6293 [(set (match_operand:SI 0 "register_operand" "=r")
6294 (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
6295 UNSPEC_VEXTRACT))]
6296 "TARGET_POWER10"
6297 "vextract<wd>m %0,%1"
6298 [(set_attr "type" "vecsimple")])
6299
6300 (define_insn "vec_expand_<mode>"
6301 [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v")
6302 (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
6303 UNSPEC_VEXPAND))]
6304 "TARGET_POWER10"
6305 "vexpand<wd>m %0,%1"
6306 [(set_attr "type" "vecsimple")])
6307
6308 (define_insn "dives_<mode>"
6309 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6310 (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6311 (match_operand:VIlong 2 "vsx_register_operand" "v")]
6312 UNSPEC_VDIVES))]
6313 "TARGET_POWER10"
6314 "vdives<wd> %0,%1,%2"
6315 [(set_attr "type" "vecdiv")
6316 (set_attr "size" "<bits>")])
6317
6318 (define_insn "diveu_<mode>"
6319 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6320 (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6321 (match_operand:VIlong 2 "vsx_register_operand" "v")]
6322 UNSPEC_VDIVEU))]
6323 "TARGET_POWER10"
6324 "vdiveu<wd> %0,%1,%2"
6325 [(set_attr "type" "vecdiv")
6326 (set_attr "size" "<bits>")])
6327
6328 (define_insn "div<mode>3"
6329 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6330 (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6331 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6332 "TARGET_POWER10"
6333 "vdivs<wd> %0,%1,%2"
6334 [(set_attr "type" "vecdiv")
6335 (set_attr "size" "<bits>")])
6336
6337 (define_insn "udiv<mode>3"
6338 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6339 (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6340 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6341 "TARGET_POWER10"
6342 "vdivu<wd> %0,%1,%2"
6343 [(set_attr "type" "vecdiv")
6344 (set_attr "size" "<bits>")])
6345
6346 (define_insn "mod<mode>3"
6347 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6348 (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6349 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6350 "TARGET_POWER10"
6351 "vmods<wd> %0,%1,%2"
6352 [(set_attr "type" "vecdiv")
6353 (set_attr "size" "<bits>")])
6354
6355 (define_insn "umod<mode>3"
6356 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6357 (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6358 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6359 "TARGET_POWER10"
6360 "vmodu<wd> %0,%1,%2"
6361 [(set_attr "type" "vecdiv")
6362 (set_attr "size" "<bits>")])
6363
6364 (define_insn "smul<mode>3_highpart"
6365 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6366 (mult:VIlong (ashiftrt
6367 (match_operand:VIlong 1 "vsx_register_operand" "v")
6368 (const_int 32))
6369 (ashiftrt
6370 (match_operand:VIlong 2 "vsx_register_operand" "v")
6371 (const_int 32))))]
6372 "TARGET_POWER10"
6373 "vmulhs<wd> %0,%1,%2"
6374 [(set_attr "type" "veccomplex")])
6375
6376 (define_insn "umul<mode>3_highpart"
6377 [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6378 (us_mult:VIlong (ashiftrt
6379 (match_operand:VIlong 1 "vsx_register_operand" "v")
6380 (const_int 32))
6381 (ashiftrt
6382 (match_operand:VIlong 2 "vsx_register_operand" "v")
6383 (const_int 32))))]
6384 "TARGET_POWER10"
6385 "vmulhu<wd> %0,%1,%2"
6386 [(set_attr "type" "veccomplex")])
6387
6388 ;; Vector multiply low double word
6389 (define_insn "mulv2di3"
6390 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
6391 (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v")
6392 (match_operand:V2DI 2 "vsx_register_operand" "v")))]
6393 "TARGET_POWER10"
6394 "vmulld %0,%1,%2"
6395 [(set_attr "type" "veccomplex")])
6396
6397 \f
6398 ;; XXSPLTIW built-in function support
6399 (define_insn "xxspltiw_v4si"
6400 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6401 (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
6402 UNSPEC_XXSPLTIW))]
6403 "TARGET_POWER10"
6404 "xxspltiw %x0,%1"
6405 [(set_attr "type" "vecperm")
6406 (set_attr "prefixed" "yes")])
6407
6408 (define_expand "xxspltiw_v4sf"
6409 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6410 (unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")]
6411 UNSPEC_XXSPLTIW))]
6412 "TARGET_POWER10"
6413 {
6414 long value = rs6000_const_f32_to_i32 (operands[1]);
6415 emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value)));
6416 DONE;
6417 })
6418
6419 (define_insn "xxspltiw_v4sf_inst"
6420 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6421 (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
6422 UNSPEC_XXSPLTIW))]
6423 "TARGET_POWER10"
6424 "xxspltiw %x0,%1"
6425 [(set_attr "type" "vecperm")
6426 (set_attr "prefixed" "yes")])
6427
6428 ;; XXSPLTIDP built-in function support
6429 (define_expand "xxspltidp_v2df"
6430 [(set (match_operand:V2DF 0 "register_operand" )
6431 (unspec:V2DF [(match_operand:SF 1 "const_double_operand")]
6432 UNSPEC_XXSPLTIDP))]
6433 "TARGET_POWER10"
6434 {
6435 long value = rs6000_const_f32_to_i32 (operands[1]);
6436 rs6000_emit_xxspltidp_v2df (operands[0], value);
6437 DONE;
6438 })
6439
6440 (define_insn "xxspltidp_v2df_inst"
6441 [(set (match_operand:V2DF 0 "register_operand" "=wa")
6442 (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
6443 UNSPEC_XXSPLTIDP))]
6444 "TARGET_POWER10"
6445 "xxspltidp %x0,%1"
6446 [(set_attr "type" "vecperm")
6447 (set_attr "prefixed" "yes")])
6448
6449 ;; XXSPLTI32DX built-in function support
6450 (define_expand "xxsplti32dx_v4si"
6451 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6452 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6453 (match_operand:QI 2 "u1bit_cint_operand" "n")
6454 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6455 UNSPEC_XXSPLTI32DX))]
6456 "TARGET_POWER10"
6457 {
6458 int index = INTVAL (operands[2]);
6459
6460 if (!BYTES_BIG_ENDIAN)
6461 index = 1 - index;
6462
6463 emit_insn (gen_xxsplti32dx_v4si_inst (operands[0], operands[1],
6464 GEN_INT (index), operands[3]));
6465 DONE;
6466 }
6467 [(set_attr "type" "vecperm")])
6468
6469 (define_insn "xxsplti32dx_v4si_inst"
6470 [(set (match_operand:V4SI 0 "register_operand" "=wa")
6471 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6472 (match_operand:QI 2 "u1bit_cint_operand" "n")
6473 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6474 UNSPEC_XXSPLTI32DX))]
6475 "TARGET_POWER10"
6476 "xxsplti32dx %x0,%2,%3"
6477 [(set_attr "type" "vecperm")
6478 (set_attr "prefixed" "yes")])
6479
6480 (define_expand "xxsplti32dx_v4sf"
6481 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6482 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
6483 (match_operand:QI 2 "u1bit_cint_operand" "n")
6484 (match_operand:SF 3 "const_double_operand" "n")]
6485 UNSPEC_XXSPLTI32DX))]
6486 "TARGET_POWER10"
6487 {
6488 int index = INTVAL (operands[2]);
6489 long value = rs6000_const_f32_to_i32 (operands[3]);
6490 if (!BYTES_BIG_ENDIAN)
6491 index = 1 - index;
6492
6493 emit_insn (gen_xxsplti32dx_v4sf_inst (operands[0], operands[1],
6494 GEN_INT (index), GEN_INT (value)));
6495 DONE;
6496 })
6497
6498 (define_insn "xxsplti32dx_v4sf_inst"
6499 [(set (match_operand:V4SF 0 "register_operand" "=wa")
6500 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
6501 (match_operand:QI 2 "u1bit_cint_operand" "n")
6502 (match_operand:SI 3 "s32bit_cint_operand" "n")]
6503 UNSPEC_XXSPLTI32DX))]
6504 "TARGET_POWER10"
6505 "xxsplti32dx %x0,%2,%3"
6506 [(set_attr "type" "vecperm")
6507 (set_attr "prefixed" "yes")])
6508
6509 ;; XXBLEND built-in function support
6510 (define_insn "xxblend_<mode>"
6511 [(set (match_operand:VM3 0 "register_operand" "=wa")
6512 (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa")
6513 (match_operand:VM3 2 "register_operand" "wa")
6514 (match_operand:VM3 3 "register_operand" "wa")]
6515 UNSPEC_XXBLEND))]
6516 "TARGET_POWER10"
6517 "xxblendv<VM3_char> %x0,%x1,%x2,%x3"
6518 [(set_attr "type" "vecperm")
6519 (set_attr "prefixed" "yes")])
6520
6521 ;; XXPERMX built-in function support
6522 (define_expand "xxpermx"
6523 [(set (match_operand:V2DI 0 "register_operand" "+wa")
6524 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
6525 (match_operand:V2DI 2 "register_operand" "wa")
6526 (match_operand:V16QI 3 "register_operand" "wa")
6527 (match_operand:QI 4 "u8bit_cint_operand" "n")]
6528 UNSPEC_XXPERMX))]
6529 "TARGET_POWER10"
6530 {
6531 if (BYTES_BIG_ENDIAN)
6532 emit_insn (gen_xxpermx_inst (operands[0], operands[1],
6533 operands[2], operands[3],
6534 operands[4]));
6535 else
6536 {
6537 /* Reverse value of byte element indexes by XORing with 0xFF.
6538 Reverse the 32-byte section identifier match by subracting bits [0:2]
6539 of elemet from 7. */
6540 int value = INTVAL (operands[4]);
6541 rtx vreg = gen_reg_rtx (V16QImode);
6542
6543 emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1)));
6544 emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg));
6545 value = 7 - value;
6546 emit_insn (gen_xxpermx_inst (operands[0], operands[2],
6547 operands[1], operands[3],
6548 GEN_INT (value)));
6549 }
6550
6551 DONE;
6552 }
6553 [(set_attr "type" "vecperm")])
6554
6555 (define_insn "xxpermx_inst"
6556 [(set (match_operand:V2DI 0 "register_operand" "+v")
6557 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
6558 (match_operand:V2DI 2 "register_operand" "v")
6559 (match_operand:V16QI 3 "register_operand" "v")
6560 (match_operand:QI 4 "u3bit_cint_operand" "n")]
6561 UNSPEC_XXPERMX))]
6562 "TARGET_POWER10"
6563 "xxpermx %x0,%x1,%x2,%x3,%4"
6564 [(set_attr "type" "vecperm")
6565 (set_attr "prefixed" "yes")])
6566
6567 ;; XXEVAL built-in function support
6568 (define_insn "xxeval"
6569 [(set (match_operand:V2DI 0 "register_operand" "=wa")
6570 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
6571 (match_operand:V2DI 2 "register_operand" "wa")
6572 (match_operand:V2DI 3 "register_operand" "wa")
6573 (match_operand:QI 4 "u8bit_cint_operand" "n")]
6574 UNSPEC_XXEVAL))]
6575 "TARGET_POWER10"
6576 "xxeval %0,%1,%2,%3,%4"
6577 [(set_attr "type" "vecperm")
6578 (set_attr "prefixed" "yes")])
6579
6580 ;; Construct V1TI by vsx_concat_v2di
6581 (define_split
6582 [(set (match_operand:V1TI 0 "vsx_register_operand")
6583 (subreg:V1TI
6584 (match_operand:TI 1 "int_reg_operand") 0 ))]
6585 "TARGET_P9_VECTOR && !reload_completed"
6586 [(const_int 0)]
6587 {
6588 rtx tmp1 = simplify_gen_subreg (DImode, operands[1], TImode, 0);
6589 rtx tmp2 = simplify_gen_subreg (DImode, operands[1], TImode, 8);
6590 rtx tmp3 = gen_reg_rtx (V2DImode);
6591 emit_insn (gen_vsx_concat_v2di (tmp3, tmp1, tmp2));
6592 rtx tmp4 = simplify_gen_subreg (V1TImode, tmp3, V2DImode, 0);
6593 emit_move_insn (operands[0], tmp4);
6594 DONE;
6595 })
6596
6597 ;; vmsumcud
6598 (define_insn "vmsumcud"
6599 [(set (match_operand:V1TI 0 "register_operand" "+v")
6600 (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
6601 (match_operand:V2DI 2 "register_operand" "v")
6602 (match_operand:V1TI 3 "register_operand" "v")]
6603 UNSPEC_VMSUMCUD))]
6604 "TARGET_POWER10"
6605 "vmsumcud %0,%1,%2,%3"
6606 [(set_attr "type" "veccomplex")]
6607 )
This page took 0.33373 seconds and 5 git commands to generate.