]> gcc.gnu.org Git - gcc.git/blame - gcc/config/rs6000/vsx.md
PR libstdc++/85642 fix is_nothrow_default_constructible<optional<T>>
[gcc.git] / gcc / config / rs6000 / vsx.md
CommitLineData
29e6733c 1;; VSX patterns.
85ec4feb 2;; Copyright (C) 2009-2018 Free Software Foundation, Inc.
29e6733c
MM
3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
4
5;; This file is part of GCC.
6
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
e9e6d4f6
KN
21;; Iterator for comparison types
22(define_code_iterator CMP_TEST [eq lt gt unordered])
23
394a527f
CL
24;; Mode attribute for vector floate and floato conversions
25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
26
29e6733c
MM
27;; Iterator for both scalar and vector floating point types supported by VSX
28(define_mode_iterator VSX_B [DF V4SF V2DF])
29
30;; Iterator for the 2 64-bit vector types
31(define_mode_iterator VSX_D [V2DF V2DI])
32
c477a667
MM
33;; Mode iterator to handle swapping words on little endian for the 128-bit
34;; types that goes in a single vector register.
35(define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)")
9393bc31 36 (TF "FLOAT128_VECTOR_P (TFmode)")
4a89b7e7 37 TI
6579b156 38 V1TI])
c477a667 39
02d3ba0e 40;; Iterator for 128-bit integer types that go in a single vector register.
4a89b7e7 41(define_mode_iterator VSX_TI [TI V1TI])
02d3ba0e 42
29e6733c
MM
43;; Iterator for the 2 32-bit vector types
44(define_mode_iterator VSX_W [V4SF V4SI])
45
688e4919
MM
46;; Iterator for the DF types
47(define_mode_iterator VSX_DF [V2DF DF])
48
29e6733c
MM
49;; Iterator for vector floating point types supported by VSX
50(define_mode_iterator VSX_F [V4SF V2DF])
51
52;; Iterator for logical types supported by VSX
c477a667
MM
53(define_mode_iterator VSX_L [V16QI
54 V8HI
55 V4SI
56 V2DI
57 V4SF
58 V2DF
59 V1TI
60 TI
61 (KF "FLOAT128_VECTOR_P (KFmode)")
711c065c 62 (TF "FLOAT128_VECTOR_P (TFmode)")])
29e6733c 63
50c78b9a 64;; Iterator for memory moves.
c477a667
MM
65(define_mode_iterator VSX_M [V16QI
66 V8HI
67 V4SI
68 V2DI
69 V4SF
70 V2DF
71 V1TI
72 (KF "FLOAT128_VECTOR_P (KFmode)")
50c78b9a 73 (TF "FLOAT128_VECTOR_P (TFmode)")
4a89b7e7 74 TI])
d86e633a 75
fc504349
CL
76(define_mode_attr VSX_XXBR [(V8HI "h")
77 (V4SI "w")
78 (V4SF "w")
79 (V2DF "d")
80 (V2DI "d")
81 (V1TI "q")])
82
29e6733c
MM
83;; Map into the appropriate load/store name based on the type
84(define_mode_attr VSm [(V16QI "vw4")
85 (V8HI "vw4")
86 (V4SI "vw4")
87 (V4SF "vw4")
88 (V2DF "vd2")
89 (V2DI "vd2")
90 (DF "d")
c477a667
MM
91 (TF "vd2")
92 (KF "vd2")
a16a872d 93 (V1TI "vd2")
c6d5ff83 94 (TI "vd2")])
29e6733c
MM
95
96;; Map into the appropriate suffix based on the type
97(define_mode_attr VSs [(V16QI "sp")
98 (V8HI "sp")
99 (V4SI "sp")
100 (V4SF "sp")
101 (V2DF "dp")
102 (V2DI "dp")
103 (DF "dp")
104 (SF "sp")
c477a667
MM
105 (TF "dp")
106 (KF "dp")
a16a872d 107 (V1TI "dp")
c6d5ff83 108 (TI "dp")])
29e6733c
MM
109
110;; Map the register class used
111(define_mode_attr VSr [(V16QI "v")
112 (V8HI "v")
113 (V4SI "v")
114 (V4SF "wf")
115 (V2DI "wd")
116 (V2DF "wd")
59f5868d 117 (DI "wi")
29e6733c 118 (DF "ws")
59f5868d 119 (SF "ww")
c477a667
MM
120 (TF "wp")
121 (KF "wq")
a16a872d 122 (V1TI "v")
c6d5ff83 123 (TI "wt")])
29e6733c 124
59f5868d
MM
125;; Map the register class used for float<->int conversions (floating point side)
126;; VSr2 is the preferred register class, VSr3 is any register class that will
127;; hold the data
29e6733c
MM
128(define_mode_attr VSr2 [(V2DF "wd")
129 (V4SF "wf")
59f5868d
MM
130 (DF "ws")
131 (SF "ww")
16370e79
MM
132 (DI "wi")
133 (KF "wq")
134 (TF "wp")])
29e6733c
MM
135
136(define_mode_attr VSr3 [(V2DF "wa")
137 (V4SF "wa")
59f5868d
MM
138 (DF "ws")
139 (SF "ww")
16370e79
MM
140 (DI "wi")
141 (KF "wq")
142 (TF "wp")])
29e6733c
MM
143
144;; Map the register class for sp<->dp float conversions, destination
145(define_mode_attr VSr4 [(SF "ws")
146 (DF "f")
147 (V2DF "wd")
148 (V4SF "v")])
149
59f5868d 150;; Map the register class for sp<->dp float conversions, source
29e6733c
MM
151(define_mode_attr VSr5 [(SF "ws")
152 (DF "f")
153 (V2DF "v")
154 (V4SF "wd")])
155
59f5868d
MM
156;; The VSX register class that a type can occupy, even if it is not the
157;; preferred register class (VSr is the preferred register class that will get
158;; allocated first).
159(define_mode_attr VSa [(V16QI "wa")
160 (V8HI "wa")
161 (V4SI "wa")
162 (V4SF "wa")
163 (V2DI "wa")
164 (V2DF "wa")
165 (DI "wi")
166 (DF "ws")
167 (SF "ww")
168 (V1TI "wa")
c477a667
MM
169 (TI "wt")
170 (TF "wp")
171 (KF "wq")])
59f5868d 172
c3217088
PB
173;; A mode attribute to disparage use of GPR registers, except for scalar
174;; integer modes.
175(define_mode_attr ??r [(V16QI "??r")
176 (V8HI "??r")
177 (V4SI "??r")
178 (V4SF "??r")
179 (V2DI "??r")
180 (V2DF "??r")
181 (V1TI "??r")
182 (KF "??r")
183 (TF "??r")
184 (TI "r")])
185
29e6733c
MM
186;; Same size integer type for floating point data
187(define_mode_attr VSi [(V4SF "v4si")
188 (V2DF "v2di")
189 (DF "di")])
190
191(define_mode_attr VSI [(V4SF "V4SI")
192 (V2DF "V2DI")
193 (DF "DI")])
194
195;; Word size for same size conversion
196(define_mode_attr VSc [(V4SF "w")
197 (V2DF "d")
198 (DF "d")])
199
29e6733c
MM
200;; Map into either s or v, depending on whether this is a scalar or vector
201;; operation
202(define_mode_attr VSv [(V16QI "v")
203 (V8HI "v")
204 (V4SI "v")
205 (V4SF "v")
206 (V2DI "v")
207 (V2DF "v")
a16a872d 208 (V1TI "v")
c477a667
MM
209 (DF "s")
210 (KF "v")])
29e6733c
MM
211
212;; Appropriate type for add ops (and other simple FP ops)
4356b75d 213(define_mode_attr VStype_simple [(V2DF "vecdouble")
29e6733c
MM
214 (V4SF "vecfloat")
215 (DF "fp")])
216
217(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d")
218 (V4SF "fp_addsub_s")
219 (DF "fp_addsub_d")])
220
221;; Appropriate type for multiply ops
4356b75d 222(define_mode_attr VStype_mul [(V2DF "vecdouble")
29e6733c
MM
223 (V4SF "vecfloat")
224 (DF "dmul")])
225
226(define_mode_attr VSfptype_mul [(V2DF "fp_mul_d")
227 (V4SF "fp_mul_s")
228 (DF "fp_mul_d")])
229
4356b75d
PH
230;; Appropriate type for divide ops.
231(define_mode_attr VStype_div [(V2DF "vecdiv")
232 (V4SF "vecfdiv")
29e6733c
MM
233 (DF "ddiv")])
234
235(define_mode_attr VSfptype_div [(V2DF "fp_div_d")
236 (V4SF "fp_div_s")
237 (DF "fp_div_d")])
238
239;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with
240;; the scalar sqrt
241(define_mode_attr VStype_sqrt [(V2DF "dsqrt")
4356b75d
PH
242 (V4SF "ssqrt")
243 (DF "dsqrt")])
29e6733c
MM
244
245(define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d")
246 (V4SF "fp_sqrt_s")
247 (DF "fp_sqrt_d")])
248
249;; Iterator and modes for sp<->dp conversions
250;; Because scalar SF values are represented internally as double, use the
251;; V4SF type to represent this than SF.
252(define_mode_iterator VSX_SPDP [DF V4SF V2DF])
253
254(define_mode_attr VS_spdp_res [(DF "V4SF")
255 (V4SF "V2DF")
256 (V2DF "V4SF")])
257
258(define_mode_attr VS_spdp_insn [(DF "xscvdpsp")
259 (V4SF "xvcvspdp")
260 (V2DF "xvcvdpsp")])
261
262(define_mode_attr VS_spdp_type [(DF "fp")
4356b75d
PH
263 (V4SF "vecdouble")
264 (V2DF "vecdouble")])
29e6733c
MM
265
266;; Map the scalar mode for a vector type
a16a872d
MM
267(define_mode_attr VS_scalar [(V1TI "TI")
268 (V2DF "DF")
29e6733c
MM
269 (V2DI "DI")
270 (V4SF "SF")
271 (V4SI "SI")
272 (V8HI "HI")
273 (V16QI "QI")])
5aebfdad
RH
274
275;; Map to a double-sized vector mode
276(define_mode_attr VS_double [(V4SI "V8SI")
277 (V4SF "V8SF")
278 (V2DI "V4DI")
a16a872d
MM
279 (V2DF "V4DF")
280 (V1TI "V2TI")])
5aebfdad 281
59f5868d
MM
282;; Map register class for 64-bit element in 128-bit vector for direct moves
283;; to/from gprs
284(define_mode_attr VS_64dm [(V2DF "wk")
285 (V2DI "wj")])
286
287;; Map register class for 64-bit element in 128-bit vector for normal register
288;; to register moves
289(define_mode_attr VS_64reg [(V2DF "ws")
290 (V2DI "wi")])
291
50c78b9a 292;; Iterators for loading constants with xxspltib
787c7a65 293(define_mode_iterator VSINT_84 [V4SI V2DI DI SI])
50c78b9a
MM
294(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
295
fc504349
CL
296;; Vector reverse byte modes
297(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
298
787c7a65
MM
299;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
300;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
301;; done on ISA 2.07 and not just ISA 3.0.
302(define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI])
303(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
c5e74d9d 304
902cb7b1
KN
305(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
306 (V8HI "h")
307 (V4SI "w")])
308
c5e74d9d
MM
309;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
310;; insert to validate the operand number.
311(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
312 (V8HI "const_0_to_7_operand")
313 (V4SI "const_0_to_3_operand")])
314
315;; Mode attribute to give the constraint for vector extract and insert
316;; operations.
317(define_mode_attr VSX_EX [(V16QI "v")
318 (V8HI "v")
319 (V4SI "wa")])
320
156b5cca
MM
321;; Mode iterator for binary floating types other than double to
322;; optimize convert to that floating point type from an extract
323;; of an integer type
324(define_mode_iterator VSX_EXTRACT_FL [SF
325 (IF "FLOAT128_2REG_P (IFmode)")
326 (KF "TARGET_FLOAT128_HW")
327 (TF "FLOAT128_2REG_P (TFmode)
328 || (FLOAT128_IEEE_P (TFmode)
329 && TARGET_FLOAT128_HW)")])
330
16370e79
MM
331;; Mode iterator for binary floating types that have a direct conversion
332;; from 64-bit integer to floating point
333(define_mode_iterator FL_CONV [SF
334 DF
335 (KF "TARGET_FLOAT128_HW")
336 (TF "TARGET_FLOAT128_HW
337 && FLOAT128_IEEE_P (TFmode)")])
338
6019c0fc
MM
339;; Iterator for the 2 short vector types to do a splat from an integer
340(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
341
342;; Mode attribute to give the count for the splat instruction to splat
343;; the value in the 64-bit integer slot
344(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
345
346;; Mode attribute to give the suffix for the splat instruction
347(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
348
29e6733c 349;; Constants for creating unspecs
f3c33d9d
MM
350(define_c_enum "unspec"
351 [UNSPEC_VSX_CONCAT
352 UNSPEC_VSX_CVDPSXWS
353 UNSPEC_VSX_CVDPUXWS
354 UNSPEC_VSX_CVSPDP
26bca0ed 355 UNSPEC_VSX_CVHPSP
0bd62dca
MM
356 UNSPEC_VSX_CVSPDPN
357 UNSPEC_VSX_CVDPSPN
f3c33d9d
MM
358 UNSPEC_VSX_CVSXWDP
359 UNSPEC_VSX_CVUXWDP
360 UNSPEC_VSX_CVSXDSP
361 UNSPEC_VSX_CVUXDSP
362 UNSPEC_VSX_CVSPSXDS
363 UNSPEC_VSX_CVSPUXDS
be1418c7
CL
364 UNSPEC_VSX_CVSXWSP
365 UNSPEC_VSX_CVUXWSP
366 UNSPEC_VSX_FLOAT2
367 UNSPEC_VSX_UNS_FLOAT2
368 UNSPEC_VSX_FLOATE
369 UNSPEC_VSX_UNS_FLOATE
370 UNSPEC_VSX_FLOATO
371 UNSPEC_VSX_UNS_FLOATO
f3c33d9d
MM
372 UNSPEC_VSX_TDIV
373 UNSPEC_VSX_TSQRT
f3c33d9d
MM
374 UNSPEC_VSX_SET
375 UNSPEC_VSX_ROUND_I
376 UNSPEC_VSX_ROUND_IC
377 UNSPEC_VSX_SLDWI
26bca0ed
CL
378 UNSPEC_VSX_XXPERM
379
bf53d4b8 380 UNSPEC_VSX_XXSPLTW
2ccdda19
BS
381 UNSPEC_VSX_XXSPLTD
382 UNSPEC_VSX_DIVSD
383 UNSPEC_VSX_DIVUD
384 UNSPEC_VSX_MULSD
70f0f8b2
BS
385 UNSPEC_VSX_XVCVSXDDP
386 UNSPEC_VSX_XVCVUXDDP
387 UNSPEC_VSX_XVCVDPSXDS
19d22f7c 388 UNSPEC_VSX_XVCDPSP
70f0f8b2 389 UNSPEC_VSX_XVCVDPUXDS
50c78b9a 390 UNSPEC_VSX_SIGN_EXTEND
e5898daf
CL
391 UNSPEC_VSX_XVCVSPSXWS
392 UNSPEC_VSX_XVCVSPSXDS
e0d32185
MM
393 UNSPEC_VSX_VSLO
394 UNSPEC_VSX_EXTRACT
e9e6d4f6 395 UNSPEC_VSX_SXEXPDP
b70bb05b 396 UNSPEC_VSX_SXSIG
e9e6d4f6 397 UNSPEC_VSX_SIEXPDP
b70bb05b 398 UNSPEC_VSX_SIEXPQP
e9e6d4f6
KN
399 UNSPEC_VSX_SCMPEXPDP
400 UNSPEC_VSX_STSTDC
26bca0ed
CL
401 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
402 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
e9e6d4f6
KN
403 UNSPEC_VSX_VXEXP
404 UNSPEC_VSX_VXSIG
405 UNSPEC_VSX_VIEXP
406 UNSPEC_VSX_VTSTDC
6019c0fc 407 UNSPEC_VSX_VEC_INIT
e5898daf 408 UNSPEC_VSX_VSIGNED2
1262c6cf 409
902cb7b1 410 UNSPEC_LXVL
1262c6cf
CL
411 UNSPEC_LXVLL
412 UNSPEC_LVSL_REG
413 UNSPEC_LVSR_REG
902cb7b1 414 UNSPEC_STXVL
1262c6cf
CL
415 UNSPEC_STXVLL
416 UNSPEC_XL_LEN_R
417 UNSPEC_XST_LEN_R
418
902cb7b1
KN
419 UNSPEC_VCLZLSBB
420 UNSPEC_VCTZLSBB
421 UNSPEC_VEXTUBLX
422 UNSPEC_VEXTUHLX
423 UNSPEC_VEXTUWLX
424 UNSPEC_VEXTUBRX
425 UNSPEC_VEXTUHRX
426 UNSPEC_VEXTUWRX
427 UNSPEC_VCMPNEB
428 UNSPEC_VCMPNEZB
429 UNSPEC_VCMPNEH
430 UNSPEC_VCMPNEZH
431 UNSPEC_VCMPNEW
432 UNSPEC_VCMPNEZW
16370e79
MM
433 UNSPEC_XXEXTRACTUW
434 UNSPEC_XXINSERTW
4d85d480
CL
435 UNSPEC_VSX_FIRST_MATCH_INDEX
436 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
437 UNSPEC_VSX_FIRST_MISMATCH_INDEX
438 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
f3c33d9d 439 ])
29e6733c
MM
440
441;; VSX moves
0cf68694
BS
442
443;; The patterns for LE permuted loads and stores come before the general
444;; VSX moves so they match first.
6e8b7d9c 445(define_insn_and_split "*vsx_le_perm_load_<mode>"
6579b156 446 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
2025a48d 447 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
5d57fdc1 448 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
0cf68694 449 "#"
a3a821c9 450 "&& 1"
0cf68694 451 [(set (match_dup 2)
6e8b7d9c 452 (vec_select:<MODE>
0cf68694
BS
453 (match_dup 1)
454 (parallel [(const_int 1) (const_int 0)])))
455 (set (match_dup 0)
6e8b7d9c 456 (vec_select:<MODE>
0cf68694
BS
457 (match_dup 2)
458 (parallel [(const_int 1) (const_int 0)])))]
0cf68694 459{
a3a821c9
KN
460 rtx mem = operands[1];
461
462 /* Don't apply the swap optimization if we've already performed register
463 allocation and the hard register destination is not in the altivec
464 range. */
465 if ((MEM_ALIGN (mem) >= 128)
466 && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER)
467 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
468 {
469 rtx mem_address = XEXP (mem, 0);
470 enum machine_mode mode = GET_MODE (mem);
471
472 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
473 {
474 /* Replace the source memory address with masked address. */
475 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
476 emit_insn (lvx_set_expr);
477 DONE;
478 }
479 else if (rs6000_quadword_masked_address_p (mem_address))
480 {
481 /* This rtl is already in the form that matches lvx
482 instruction, so leave it alone. */
483 DONE;
484 }
485 /* Otherwise, fall through to transform into a swapping load. */
486 }
0cf68694
BS
487 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
488 : operands[0];
489}
0cf68694
BS
490 [(set_attr "type" "vecload")
491 (set_attr "length" "8")])
492
6e8b7d9c 493(define_insn_and_split "*vsx_le_perm_load_<mode>"
59f5868d 494 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
2025a48d 495 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
5d57fdc1 496 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
0cf68694 497 "#"
a3a821c9 498 "&& 1"
0cf68694 499 [(set (match_dup 2)
6e8b7d9c 500 (vec_select:<MODE>
0cf68694
BS
501 (match_dup 1)
502 (parallel [(const_int 2) (const_int 3)
503 (const_int 0) (const_int 1)])))
504 (set (match_dup 0)
6e8b7d9c 505 (vec_select:<MODE>
0cf68694
BS
506 (match_dup 2)
507 (parallel [(const_int 2) (const_int 3)
508 (const_int 0) (const_int 1)])))]
0cf68694 509{
a3a821c9
KN
510 rtx mem = operands[1];
511
512 /* Don't apply the swap optimization if we've already performed register
513 allocation and the hard register destination is not in the altivec
514 range. */
515 if ((MEM_ALIGN (mem) >= 128)
516 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
517 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
518 {
519 rtx mem_address = XEXP (mem, 0);
520 enum machine_mode mode = GET_MODE (mem);
521
522 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
523 {
524 /* Replace the source memory address with masked address. */
525 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
526 emit_insn (lvx_set_expr);
527 DONE;
528 }
529 else if (rs6000_quadword_masked_address_p (mem_address))
530 {
531 /* This rtl is already in the form that matches lvx
532 instruction, so leave it alone. */
533 DONE;
534 }
535 /* Otherwise, fall through to transform into a swapping load. */
536 }
0cf68694
BS
537 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
538 : operands[0];
539}
0cf68694
BS
540 [(set_attr "type" "vecload")
541 (set_attr "length" "8")])
542
543(define_insn_and_split "*vsx_le_perm_load_v8hi"
544 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
2025a48d 545 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
5d57fdc1 546 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
0cf68694 547 "#"
a3a821c9 548 "&& 1"
0cf68694
BS
549 [(set (match_dup 2)
550 (vec_select:V8HI
551 (match_dup 1)
552 (parallel [(const_int 4) (const_int 5)
553 (const_int 6) (const_int 7)
554 (const_int 0) (const_int 1)
555 (const_int 2) (const_int 3)])))
556 (set (match_dup 0)
557 (vec_select:V8HI
558 (match_dup 2)
559 (parallel [(const_int 4) (const_int 5)
560 (const_int 6) (const_int 7)
561 (const_int 0) (const_int 1)
562 (const_int 2) (const_int 3)])))]
0cf68694 563{
a3a821c9
KN
564 rtx mem = operands[1];
565
566 /* Don't apply the swap optimization if we've already performed register
567 allocation and the hard register destination is not in the altivec
568 range. */
569 if ((MEM_ALIGN (mem) >= 128)
570 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
571 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
572 {
573 rtx mem_address = XEXP (mem, 0);
574 enum machine_mode mode = GET_MODE (mem);
575
576 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
577 {
578 /* Replace the source memory address with masked address. */
579 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
580 emit_insn (lvx_set_expr);
581 DONE;
582 }
583 else if (rs6000_quadword_masked_address_p (mem_address))
584 {
585 /* This rtl is already in the form that matches lvx
586 instruction, so leave it alone. */
587 DONE;
588 }
589 /* Otherwise, fall through to transform into a swapping load. */
590 }
0cf68694
BS
591 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
592 : operands[0];
593}
0cf68694
BS
594 [(set_attr "type" "vecload")
595 (set_attr "length" "8")])
596
597(define_insn_and_split "*vsx_le_perm_load_v16qi"
598 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
2025a48d 599 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
5d57fdc1 600 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
0cf68694 601 "#"
a3a821c9 602 "&& 1"
0cf68694
BS
603 [(set (match_dup 2)
604 (vec_select:V16QI
605 (match_dup 1)
606 (parallel [(const_int 8) (const_int 9)
607 (const_int 10) (const_int 11)
608 (const_int 12) (const_int 13)
609 (const_int 14) (const_int 15)
610 (const_int 0) (const_int 1)
611 (const_int 2) (const_int 3)
612 (const_int 4) (const_int 5)
613 (const_int 6) (const_int 7)])))
614 (set (match_dup 0)
615 (vec_select:V16QI
616 (match_dup 2)
617 (parallel [(const_int 8) (const_int 9)
618 (const_int 10) (const_int 11)
619 (const_int 12) (const_int 13)
620 (const_int 14) (const_int 15)
621 (const_int 0) (const_int 1)
622 (const_int 2) (const_int 3)
623 (const_int 4) (const_int 5)
624 (const_int 6) (const_int 7)])))]
0cf68694 625{
a3a821c9
KN
626 rtx mem = operands[1];
627
628 /* Don't apply the swap optimization if we've already performed register
629 allocation and the hard register destination is not in the altivec
630 range. */
631 if ((MEM_ALIGN (mem) >= 128)
632 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER)
633 || ALTIVEC_REGNO_P (REGNO(operands[0]))))
634 {
635 rtx mem_address = XEXP (mem, 0);
636 enum machine_mode mode = GET_MODE (mem);
637
638 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
639 {
640 /* Replace the source memory address with masked address. */
641 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
642 emit_insn (lvx_set_expr);
643 DONE;
644 }
645 else if (rs6000_quadword_masked_address_p (mem_address))
646 {
647 /* This rtl is already in the form that matches lvx
648 instruction, so leave it alone. */
649 DONE;
650 }
651 /* Otherwise, fall through to transform into a swapping load. */
652 }
0cf68694
BS
653 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
654 : operands[0];
655}
0cf68694
BS
656 [(set_attr "type" "vecload")
657 (set_attr "length" "8")])
658
411f1755 659(define_insn "*vsx_le_perm_store_<mode>"
2025a48d 660 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
6579b156 661 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))]
5d57fdc1 662 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
0cf68694 663 "#"
411f1755
BS
664 [(set_attr "type" "vecstore")
665 (set_attr "length" "12")])
666
667(define_split
ad18eed2
SB
668 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
669 (match_operand:VSX_D 1 "vsx_register_operand"))]
5d57fdc1 670 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
0cf68694 671 [(set (match_dup 2)
6e8b7d9c 672 (vec_select:<MODE>
0cf68694
BS
673 (match_dup 1)
674 (parallel [(const_int 1) (const_int 0)])))
675 (set (match_dup 0)
6e8b7d9c 676 (vec_select:<MODE>
0cf68694
BS
677 (match_dup 2)
678 (parallel [(const_int 1) (const_int 0)])))]
0cf68694 679{
a3a821c9
KN
680 rtx mem = operands[0];
681
682 /* Don't apply the swap optimization if we've already performed register
683 allocation and the hard register source is not in the altivec range. */
684 if ((MEM_ALIGN (mem) >= 128)
685 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
686 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
687 {
688 rtx mem_address = XEXP (mem, 0);
689 enum machine_mode mode = GET_MODE (mem);
690 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
691 {
692 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
693 emit_insn (stvx_set_expr);
694 DONE;
695 }
696 else if (rs6000_quadword_masked_address_p (mem_address))
697 {
698 /* This rtl is already in the form that matches stvx instruction,
699 so leave it alone. */
700 DONE;
701 }
702 /* Otherwise, fall through to transform into a swapping store. */
703 }
704
0cf68694
BS
705 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
706 : operands[1];
411f1755
BS
707})
708
709;; The post-reload split requires that we re-permute the source
710;; register in case it is still live.
711(define_split
ad18eed2
SB
712 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
713 (match_operand:VSX_D 1 "vsx_register_operand"))]
5d57fdc1 714 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
411f1755
BS
715 [(set (match_dup 1)
716 (vec_select:<MODE>
717 (match_dup 1)
718 (parallel [(const_int 1) (const_int 0)])))
719 (set (match_dup 0)
720 (vec_select:<MODE>
721 (match_dup 1)
722 (parallel [(const_int 1) (const_int 0)])))
723 (set (match_dup 1)
724 (vec_select:<MODE>
725 (match_dup 1)
726 (parallel [(const_int 1) (const_int 0)])))]
727 "")
0cf68694 728
411f1755 729(define_insn "*vsx_le_perm_store_<mode>"
2025a48d 730 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
59f5868d 731 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))]
5d57fdc1 732 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
0cf68694 733 "#"
411f1755
BS
734 [(set_attr "type" "vecstore")
735 (set_attr "length" "12")])
736
737(define_split
ad18eed2
SB
738 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
739 (match_operand:VSX_W 1 "vsx_register_operand"))]
5d57fdc1 740 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
0cf68694 741 [(set (match_dup 2)
6e8b7d9c 742 (vec_select:<MODE>
0cf68694
BS
743 (match_dup 1)
744 (parallel [(const_int 2) (const_int 3)
745 (const_int 0) (const_int 1)])))
746 (set (match_dup 0)
6e8b7d9c 747 (vec_select:<MODE>
0cf68694
BS
748 (match_dup 2)
749 (parallel [(const_int 2) (const_int 3)
750 (const_int 0) (const_int 1)])))]
0cf68694 751{
a3a821c9
KN
752 rtx mem = operands[0];
753
754 /* Don't apply the swap optimization if we've already performed register
755 allocation and the hard register source is not in the altivec range. */
756 if ((MEM_ALIGN (mem) >= 128)
757 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
758 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
759 {
760 rtx mem_address = XEXP (mem, 0);
761 enum machine_mode mode = GET_MODE (mem);
762 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
763 {
764 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
765 emit_insn (stvx_set_expr);
766 DONE;
767 }
768 else if (rs6000_quadword_masked_address_p (mem_address))
769 {
770 /* This rtl is already in the form that matches stvx instruction,
771 so leave it alone. */
772 DONE;
773 }
774 /* Otherwise, fall through to transform into a swapping store. */
775 }
776
0cf68694
BS
777 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
778 : operands[1];
411f1755
BS
779})
780
781;; The post-reload split requires that we re-permute the source
782;; register in case it is still live.
783(define_split
ad18eed2
SB
784 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
785 (match_operand:VSX_W 1 "vsx_register_operand"))]
5d57fdc1 786 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
411f1755
BS
787 [(set (match_dup 1)
788 (vec_select:<MODE>
789 (match_dup 1)
790 (parallel [(const_int 2) (const_int 3)
791 (const_int 0) (const_int 1)])))
792 (set (match_dup 0)
793 (vec_select:<MODE>
794 (match_dup 1)
795 (parallel [(const_int 2) (const_int 3)
796 (const_int 0) (const_int 1)])))
797 (set (match_dup 1)
798 (vec_select:<MODE>
799 (match_dup 1)
800 (parallel [(const_int 2) (const_int 3)
801 (const_int 0) (const_int 1)])))]
802 "")
0cf68694 803
411f1755 804(define_insn "*vsx_le_perm_store_v8hi"
2025a48d 805 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
0cf68694 806 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
5d57fdc1 807 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
0cf68694 808 "#"
411f1755
BS
809 [(set_attr "type" "vecstore")
810 (set_attr "length" "12")])
811
812(define_split
ad18eed2
SB
813 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
814 (match_operand:V8HI 1 "vsx_register_operand"))]
5d57fdc1 815 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
0cf68694
BS
816 [(set (match_dup 2)
817 (vec_select:V8HI
818 (match_dup 1)
819 (parallel [(const_int 4) (const_int 5)
820 (const_int 6) (const_int 7)
821 (const_int 0) (const_int 1)
822 (const_int 2) (const_int 3)])))
823 (set (match_dup 0)
824 (vec_select:V8HI
825 (match_dup 2)
826 (parallel [(const_int 4) (const_int 5)
827 (const_int 6) (const_int 7)
828 (const_int 0) (const_int 1)
829 (const_int 2) (const_int 3)])))]
0cf68694 830{
a3a821c9
KN
831 rtx mem = operands[0];
832
833 /* Don't apply the swap optimization if we've already performed register
834 allocation and the hard register source is not in the altivec range. */
835 if ((MEM_ALIGN (mem) >= 128)
836 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
837 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
838 {
839 rtx mem_address = XEXP (mem, 0);
840 enum machine_mode mode = GET_MODE (mem);
841 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
842 {
843 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
844 emit_insn (stvx_set_expr);
845 DONE;
846 }
847 else if (rs6000_quadword_masked_address_p (mem_address))
848 {
849 /* This rtl is already in the form that matches stvx instruction,
850 so leave it alone. */
851 DONE;
852 }
853 /* Otherwise, fall through to transform into a swapping store. */
854 }
855
0cf68694
BS
856 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
857 : operands[1];
411f1755
BS
858})
859
860;; The post-reload split requires that we re-permute the source
861;; register in case it is still live.
862(define_split
ad18eed2
SB
863 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
864 (match_operand:V8HI 1 "vsx_register_operand"))]
5d57fdc1 865 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
411f1755
BS
866 [(set (match_dup 1)
867 (vec_select:V8HI
868 (match_dup 1)
869 (parallel [(const_int 4) (const_int 5)
870 (const_int 6) (const_int 7)
871 (const_int 0) (const_int 1)
872 (const_int 2) (const_int 3)])))
873 (set (match_dup 0)
874 (vec_select:V8HI
875 (match_dup 1)
876 (parallel [(const_int 4) (const_int 5)
877 (const_int 6) (const_int 7)
878 (const_int 0) (const_int 1)
879 (const_int 2) (const_int 3)])))
880 (set (match_dup 1)
881 (vec_select:V8HI
882 (match_dup 1)
883 (parallel [(const_int 4) (const_int 5)
884 (const_int 6) (const_int 7)
885 (const_int 0) (const_int 1)
886 (const_int 2) (const_int 3)])))]
887 "")
0cf68694 888
411f1755 889(define_insn "*vsx_le_perm_store_v16qi"
2025a48d 890 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
0cf68694 891 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
5d57fdc1 892 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
0cf68694 893 "#"
411f1755
BS
894 [(set_attr "type" "vecstore")
895 (set_attr "length" "12")])
896
897(define_split
ad18eed2
SB
898 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
899 (match_operand:V16QI 1 "vsx_register_operand"))]
5d57fdc1 900 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
0cf68694
BS
901 [(set (match_dup 2)
902 (vec_select:V16QI
903 (match_dup 1)
904 (parallel [(const_int 8) (const_int 9)
905 (const_int 10) (const_int 11)
906 (const_int 12) (const_int 13)
907 (const_int 14) (const_int 15)
908 (const_int 0) (const_int 1)
909 (const_int 2) (const_int 3)
910 (const_int 4) (const_int 5)
911 (const_int 6) (const_int 7)])))
912 (set (match_dup 0)
913 (vec_select:V16QI
914 (match_dup 2)
915 (parallel [(const_int 8) (const_int 9)
916 (const_int 10) (const_int 11)
917 (const_int 12) (const_int 13)
918 (const_int 14) (const_int 15)
919 (const_int 0) (const_int 1)
920 (const_int 2) (const_int 3)
921 (const_int 4) (const_int 5)
922 (const_int 6) (const_int 7)])))]
0cf68694 923{
a3a821c9
KN
924 rtx mem = operands[0];
925
926 /* Don't apply the swap optimization if we've already performed register
927 allocation and the hard register source is not in the altivec range. */
928 if ((MEM_ALIGN (mem) >= 128)
929 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER)
930 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
931 {
932 rtx mem_address = XEXP (mem, 0);
933 enum machine_mode mode = GET_MODE (mem);
934 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
935 {
936 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
937 emit_insn (stvx_set_expr);
938 DONE;
939 }
940 else if (rs6000_quadword_masked_address_p (mem_address))
941 {
942 /* This rtl is already in the form that matches stvx instruction,
943 so leave it alone. */
944 DONE;
945 }
946 /* Otherwise, fall through to transform into a swapping store. */
947 }
948
0cf68694
BS
949 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
950 : operands[1];
411f1755
BS
951})
952
953;; The post-reload split requires that we re-permute the source
954;; register in case it is still live.
955(define_split
ad18eed2
SB
956 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
957 (match_operand:V16QI 1 "vsx_register_operand"))]
5d57fdc1 958 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
411f1755
BS
959 [(set (match_dup 1)
960 (vec_select:V16QI
961 (match_dup 1)
962 (parallel [(const_int 8) (const_int 9)
963 (const_int 10) (const_int 11)
964 (const_int 12) (const_int 13)
965 (const_int 14) (const_int 15)
966 (const_int 0) (const_int 1)
967 (const_int 2) (const_int 3)
968 (const_int 4) (const_int 5)
969 (const_int 6) (const_int 7)])))
970 (set (match_dup 0)
971 (vec_select:V16QI
972 (match_dup 1)
973 (parallel [(const_int 8) (const_int 9)
974 (const_int 10) (const_int 11)
975 (const_int 12) (const_int 13)
976 (const_int 14) (const_int 15)
977 (const_int 0) (const_int 1)
978 (const_int 2) (const_int 3)
979 (const_int 4) (const_int 5)
980 (const_int 6) (const_int 7)])))
981 (set (match_dup 1)
982 (vec_select:V16QI
983 (match_dup 1)
984 (parallel [(const_int 8) (const_int 9)
985 (const_int 10) (const_int 11)
986 (const_int 12) (const_int 13)
987 (const_int 14) (const_int 15)
988 (const_int 0) (const_int 1)
989 (const_int 2) (const_int 3)
990 (const_int 4) (const_int 5)
991 (const_int 6) (const_int 7)])))]
992 "")
0cf68694 993
c477a667
MM
994;; Little endian word swapping for 128-bit types that are either scalars or the
995;; special V1TI container class, which it is not appropriate to use vec_select
996;; for the type.
997(define_insn "*vsx_le_permute_<mode>"
d00fdf85 998 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q")
02d3ba0e 999 (rotate:VSX_TI
d00fdf85 1000 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r")
c477a667 1001 (const_int 64)))]
32928931 1002 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
c477a667
MM
1003 "@
1004 xxpermdi %x0,%x1,%x1,2
1005 lxvd2x %x0,%y1
d00fdf85
PB
1006 stxvd2x %x1,%y0
1007 mr %0,%L1\;mr %L0,%1
1008 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
1009 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
1010 [(set_attr "length" "4,4,4,8,8,8")
1011 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
c477a667
MM
1012
1013(define_insn_and_split "*vsx_le_undo_permute_<mode>"
02d3ba0e
RS
1014 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
1015 (rotate:VSX_TI
1016 (rotate:VSX_TI
1017 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
c477a667
MM
1018 (const_int 64))
1019 (const_int 64)))]
1020 "!BYTES_BIG_ENDIAN && TARGET_VSX"
1021 "@
1022 #
1023 xxlor %x0,%x1"
1024 ""
1025 [(set (match_dup 0) (match_dup 1))]
1026{
1027 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
1028 {
1029 emit_note (NOTE_INSN_DELETED);
1030 DONE;
1031 }
1032}
1033 [(set_attr "length" "0,4")
7c788ce2 1034 (set_attr "type" "veclogical")])
c477a667
MM
1035
1036(define_insn_and_split "*vsx_le_perm_load_<mode>"
d00fdf85
PB
1037 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r")
1038 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
32928931 1039 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
d00fdf85
PB
1040 "@
1041 #
1042 #"
32928931 1043 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
02d3ba0e 1044 [(const_int 0)]
c477a667 1045{
02d3ba0e
RS
1046 rtx tmp = (can_create_pseudo_p ()
1047 ? gen_reg_rtx_and_attrs (operands[0])
1048 : operands[0]);
1049 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1050 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1051 DONE;
c477a667 1052}
d00fdf85
PB
1053 [(set_attr "type" "vecload,load")
1054 (set_attr "length" "8,8")])
c477a667
MM
1055
1056(define_insn "*vsx_le_perm_store_<mode>"
d00fdf85
PB
1057 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1058 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))]
32928931 1059 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
d00fdf85
PB
1060 "@
1061 #
1062 #"
1063 [(set_attr "type" "vecstore,store")
1064 (set_attr "length" "12,8")])
c477a667
MM
1065
1066(define_split
ad18eed2
SB
1067 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1068 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
32928931 1069 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
02d3ba0e 1070 [(const_int 0)]
c477a667 1071{
02d3ba0e
RS
1072 rtx tmp = (can_create_pseudo_p ()
1073 ? gen_reg_rtx_and_attrs (operands[0])
1074 : operands[0]);
1075 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1076 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1077 DONE;
c477a667
MM
1078})
1079
d00fdf85
PB
1080;; Peepholes to catch loads and stores for TImode if TImode landed in
1081;; GPR registers on a little endian system.
1082(define_peephole2
1083 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1084 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
1085 (const_int 64)))
1086 (set (match_operand:VSX_TI 2 "int_reg_operand")
1087 (rotate:VSX_TI (match_dup 0)
1088 (const_int 64)))]
4a89b7e7 1089 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
d00fdf85
PB
1090 && (rtx_equal_p (operands[0], operands[2])
1091 || peep2_reg_dead_p (2, operands[0]))"
1092 [(set (match_dup 2) (match_dup 1))])
1093
1094(define_peephole2
1095 [(set (match_operand:VSX_TI 0 "int_reg_operand")
1096 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
1097 (const_int 64)))
1098 (set (match_operand:VSX_TI 2 "memory_operand")
1099 (rotate:VSX_TI (match_dup 0)
1100 (const_int 64)))]
4a89b7e7 1101 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
d00fdf85
PB
1102 && peep2_reg_dead_p (2, operands[0])"
1103 [(set (match_dup 2) (match_dup 1))])
1104
9393bc31
MM
1105;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1106;; VSX registers on a little endian system. The vector types and IEEE 128-bit
1107;; floating point are handled by the more generic swap elimination pass.
1108(define_peephole2
ad18eed2
SB
1109 [(set (match_operand:TI 0 "vsx_register_operand")
1110 (rotate:TI (match_operand:TI 1 "vsx_register_operand")
9393bc31 1111 (const_int 64)))
ad18eed2 1112 (set (match_operand:TI 2 "vsx_register_operand")
9393bc31
MM
1113 (rotate:TI (match_dup 0)
1114 (const_int 64)))]
4a89b7e7 1115 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
9393bc31
MM
1116 && (rtx_equal_p (operands[0], operands[2])
1117 || peep2_reg_dead_p (2, operands[0]))"
1118 [(set (match_dup 2) (match_dup 1))])
1119
c477a667
MM
1120;; The post-reload split requires that we re-permute the source
1121;; register in case it is still live.
1122(define_split
ad18eed2
SB
1123 [(set (match_operand:VSX_LE_128 0 "memory_operand")
1124 (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
32928931 1125 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
02d3ba0e
RS
1126 [(const_int 0)]
1127{
1128 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1129 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1130 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1131 DONE;
1132})
0cf68694 1133
50c78b9a
MM
1134;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1135;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1136(define_insn "xxspltib_v16qi"
1137 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1138 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1139 "TARGET_P9_VECTOR"
29e6733c 1140{
50c78b9a
MM
1141 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1142 return "xxspltib %x0,%2";
29e6733c 1143}
50c78b9a
MM
1144 [(set_attr "type" "vecperm")])
1145
1146(define_insn "xxspltib_<mode>_nosplit"
58f2fb5c
MM
1147 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1148 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
50c78b9a 1149 "TARGET_P9_VECTOR"
29e6733c 1150{
50c78b9a
MM
1151 rtx op1 = operands[1];
1152 int value = 256;
1153 int num_insns = -1;
1154
1155 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1156 || num_insns != 1)
1157 gcc_unreachable ();
1158
1159 operands[2] = GEN_INT (value & 0xff);
1160 return "xxspltib %x0,%2";
c6d5ff83 1161}
50c78b9a
MM
1162 [(set_attr "type" "vecperm")])
1163
1164(define_insn_and_split "*xxspltib_<mode>_split"
1165 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1166 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1167 "TARGET_P9_VECTOR"
1168 "#"
1169 "&& 1"
1170 [(const_int 0)]
c6d5ff83 1171{
50c78b9a
MM
1172 int value = 256;
1173 int num_insns = -1;
1174 rtx op0 = operands[0];
1175 rtx op1 = operands[1];
1176 rtx tmp = ((can_create_pseudo_p ())
1177 ? gen_reg_rtx (V16QImode)
1178 : gen_lowpart (V16QImode, op0));
c6d5ff83 1179
50c78b9a
MM
1180 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1181 || num_insns != 2)
1182 gcc_unreachable ();
c6d5ff83 1183
50c78b9a 1184 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
c6d5ff83 1185
50c78b9a
MM
1186 if (<MODE>mode == V2DImode)
1187 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
c6d5ff83 1188
50c78b9a
MM
1189 else if (<MODE>mode == V4SImode)
1190 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
1191
1192 else if (<MODE>mode == V8HImode)
1193 emit_insn (gen_altivec_vupkhsb (op0, tmp));
1194
1195 else
1196 gcc_unreachable ();
29e6733c 1197
50c78b9a
MM
1198 DONE;
1199}
1200 [(set_attr "type" "vecperm")
1201 (set_attr "length" "8")])
29e6733c 1202
29e6733c 1203
50c78b9a
MM
1204;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
1205;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1206;; all 1's, since the machine does not have to wait for the previous
1207;; instruction using the register being set (such as a store waiting on a slow
1208;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
c6d5ff83 1209
50c78b9a
MM
1210;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
1211;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
1212;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
1213(define_insn "*vsx_mov<mode>_64bit"
1214 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1215 "=ZwO, <VSa>, <VSa>, r, we, ?wQ,
c3217088 1216 ?&r, ??r, ??Y, <??r>, wo, v,
50c78b9a 1217 ?<VSa>, *r, v, ??r, wZ, v")
c6d5ff83 1218
50c78b9a
MM
1219 (match_operand:VSX_M 1 "input_operand"
1220 "<VSa>, ZwO, <VSa>, we, r, r,
1221 wQ, Y, r, r, wE, jwM,
1222 ?jwM, jwM, W, W, v, wZ"))]
c6d5ff83 1223
50c78b9a
MM
1224 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1225 && (register_operand (operands[0], <MODE>mode)
1226 || register_operand (operands[1], <MODE>mode))"
1227{
1228 return rs6000_output_move_128bit (operands);
1229}
1230 [(set_attr "type"
1231 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load,
1232 store, load, store, *, vecsimple, vecsimple,
1233 vecsimple, *, *, *, vecstore, vecload")
1234
1235 (set_attr "length"
1236 "4, 4, 4, 8, 4, 8,
1237 8, 8, 8, 8, 4, 4,
1238 4, 8, 20, 20, 4, 4")])
1239
1240;; VSX store VSX load VSX move GPR load GPR store GPR move
1241;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const
1242;; LVX (VMX) STVX (VMX)
1243(define_insn "*vsx_mov<mode>_32bit"
1244 [(set (match_operand:VSX_M 0 "nonimmediate_operand"
c3217088 1245 "=ZwO, <VSa>, <VSa>, ??r, ??Y, <??r>,
50c78b9a
MM
1246 wo, v, ?<VSa>, *r, v, ??r,
1247 wZ, v")
1248
1249 (match_operand:VSX_M 1 "input_operand"
1250 "<VSa>, ZwO, <VSa>, Y, r, r,
1251 wE, jwM, ?jwM, jwM, W, W,
1252 v, wZ"))]
1253
1254 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1255 && (register_operand (operands[0], <MODE>mode)
1256 || register_operand (operands[1], <MODE>mode))"
1257{
1258 return rs6000_output_move_128bit (operands);
29e6733c 1259}
50c78b9a
MM
1260 [(set_attr "type"
1261 "vecstore, vecload, vecsimple, load, store, *,
1262 vecsimple, vecsimple, vecsimple, *, *, *,
1263 vecstore, vecload")
1264
1265 (set_attr "length"
1266 "4, 4, 4, 16, 16, 16,
1267 4, 4, 4, 16, 20, 32,
1268 4, 4")])
29e6733c 1269
c9485473
MM
1270;; Explicit load/store expanders for the builtin functions
1271(define_expand "vsx_load_<mode>"
ad18eed2
SB
1272 [(set (match_operand:VSX_M 0 "vsx_register_operand")
1273 (match_operand:VSX_M 1 "memory_operand"))]
c9485473 1274 "VECTOR_MEM_VSX_P (<MODE>mode)"
06f9caed
BS
1275{
1276 /* Expand to swaps if needed, prior to swap optimization. */
1277 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1278 {
1279 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1280 DONE;
1281 }
1282})
c9485473
MM
1283
1284(define_expand "vsx_store_<mode>"
ad18eed2
SB
1285 [(set (match_operand:VSX_M 0 "memory_operand")
1286 (match_operand:VSX_M 1 "vsx_register_operand"))]
c9485473 1287 "VECTOR_MEM_VSX_P (<MODE>mode)"
06f9caed
BS
1288{
1289 /* Expand to swaps if needed, prior to swap optimization. */
1290 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
1291 {
1292 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1293 DONE;
1294 }
1295})
c9485473 1296
8fa97501
BS
1297;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1298;; when you really want their element-reversing behavior.
1299(define_insn "vsx_ld_elemrev_v2di"
1300 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1301 (vec_select:V2DI
1302 (match_operand:V2DI 1 "memory_operand" "Z")
1303 (parallel [(const_int 1) (const_int 0)])))]
1304 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1305 "lxvd2x %x0,%y1"
1306 [(set_attr "type" "vecload")])
1307
d10cff95
CL
1308(define_insn "vsx_ld_elemrev_v1ti"
1309 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1310 (vec_select:V1TI
1311 (match_operand:V1TI 1 "memory_operand" "Z")
1312 (parallel [(const_int 0)])))]
1313 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1314{
1315 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1316}
1317 [(set_attr "type" "vecload")])
1318
8fa97501
BS
1319(define_insn "vsx_ld_elemrev_v2df"
1320 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1321 (vec_select:V2DF
1322 (match_operand:V2DF 1 "memory_operand" "Z")
1323 (parallel [(const_int 1) (const_int 0)])))]
1324 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1325 "lxvd2x %x0,%y1"
1326 [(set_attr "type" "vecload")])
1327
1328(define_insn "vsx_ld_elemrev_v4si"
1329 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1330 (vec_select:V4SI
1331 (match_operand:V4SI 1 "memory_operand" "Z")
1332 (parallel [(const_int 3) (const_int 2)
1333 (const_int 1) (const_int 0)])))]
1334 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1335 "lxvw4x %x0,%y1"
1336 [(set_attr "type" "vecload")])
1337
1338(define_insn "vsx_ld_elemrev_v4sf"
1339 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1340 (vec_select:V4SF
1341 (match_operand:V4SF 1 "memory_operand" "Z")
1342 (parallel [(const_int 3) (const_int 2)
1343 (const_int 1) (const_int 0)])))]
1344 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1345 "lxvw4x %x0,%y1"
1346 [(set_attr "type" "vecload")])
1347
3ef9e1ec 1348(define_expand "vsx_ld_elemrev_v8hi"
8fa97501
BS
1349 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1350 (vec_select:V8HI
1351 (match_operand:V8HI 1 "memory_operand" "Z")
1352 (parallel [(const_int 7) (const_int 6)
1353 (const_int 5) (const_int 4)
1354 (const_int 3) (const_int 2)
1355 (const_int 1) (const_int 0)])))]
3ef9e1ec
BS
1356 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1357{
1358 if (!TARGET_P9_VECTOR)
1359 {
1360 rtx tmp = gen_reg_rtx (V4SImode);
1361 rtx subreg, subreg2, perm[16], pcv;
1362 /* 2 is leftmost element in register */
1363 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1364 int i;
1365
1366 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1367 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1368 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1369
1370 for (i = 0; i < 16; ++i)
1371 perm[i] = GEN_INT (reorder[i]);
1372
1373 pcv = force_reg (V16QImode,
1374 gen_rtx_CONST_VECTOR (V16QImode,
1375 gen_rtvec_v (16, perm)));
1376 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1377 subreg2, pcv));
1378 DONE;
1379 }
1380})
1381
1382(define_insn "*vsx_ld_elemrev_v8hi_internal"
1383 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1384 (vec_select:V8HI
1385 (match_operand:V8HI 1 "memory_operand" "Z")
1386 (parallel [(const_int 7) (const_int 6)
1387 (const_int 5) (const_int 4)
1388 (const_int 3) (const_int 2)
1389 (const_int 1) (const_int 0)])))]
8fa97501
BS
1390 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1391 "lxvh8x %x0,%y1"
1392 [(set_attr "type" "vecload")])
1393
3ef9e1ec 1394(define_expand "vsx_ld_elemrev_v16qi"
8fa97501
BS
1395 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1396 (vec_select:V16QI
3ef9e1ec
BS
1397 (match_operand:V16QI 1 "memory_operand" "Z")
1398 (parallel [(const_int 15) (const_int 14)
1399 (const_int 13) (const_int 12)
1400 (const_int 11) (const_int 10)
1401 (const_int 9) (const_int 8)
1402 (const_int 7) (const_int 6)
1403 (const_int 5) (const_int 4)
1404 (const_int 3) (const_int 2)
1405 (const_int 1) (const_int 0)])))]
1406 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1407{
1408 if (!TARGET_P9_VECTOR)
1409 {
1410 rtx tmp = gen_reg_rtx (V4SImode);
1411 rtx subreg, subreg2, perm[16], pcv;
1412 /* 3 is leftmost element in register */
1413 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1414 int i;
1415
1416 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1417 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1418 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1419
1420 for (i = 0; i < 16; ++i)
1421 perm[i] = GEN_INT (reorder[i]);
1422
1423 pcv = force_reg (V16QImode,
1424 gen_rtx_CONST_VECTOR (V16QImode,
1425 gen_rtvec_v (16, perm)));
1426 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1427 subreg2, pcv));
1428 DONE;
1429 }
1430})
1431
1432(define_insn "*vsx_ld_elemrev_v16qi_internal"
1433 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1434 (vec_select:V16QI
1435 (match_operand:V16QI 1 "memory_operand" "Z")
1436 (parallel [(const_int 15) (const_int 14)
1437 (const_int 13) (const_int 12)
1438 (const_int 11) (const_int 10)
1439 (const_int 9) (const_int 8)
1440 (const_int 7) (const_int 6)
1441 (const_int 5) (const_int 4)
1442 (const_int 3) (const_int 2)
1443 (const_int 1) (const_int 0)])))]
8fa97501
BS
1444 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1445 "lxvb16x %x0,%y1"
1446 [(set_attr "type" "vecload")])
1447
d10cff95
CL
1448(define_insn "vsx_st_elemrev_v1ti"
1449 [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1450 (vec_select:V1TI
1451 (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1452 (parallel [(const_int 0)])))
1453 (clobber (match_dup 1))]
1454 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1455{
1456 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1457}
1458 [(set_attr "type" "vecstore")])
1459
8fa97501
BS
1460(define_insn "vsx_st_elemrev_v2df"
1461 [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1462 (vec_select:V2DF
3ef9e1ec
BS
1463 (match_operand:V2DF 1 "vsx_register_operand" "wa")
1464 (parallel [(const_int 1) (const_int 0)])))]
8fa97501
BS
1465 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1466 "stxvd2x %x1,%y0"
1467 [(set_attr "type" "vecstore")])
1468
1469(define_insn "vsx_st_elemrev_v2di"
1470 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1471 (vec_select:V2DI
3ef9e1ec
BS
1472 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1473 (parallel [(const_int 1) (const_int 0)])))]
8fa97501
BS
1474 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1475 "stxvd2x %x1,%y0"
1476 [(set_attr "type" "vecstore")])
1477
1478(define_insn "vsx_st_elemrev_v4sf"
1479 [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1480 (vec_select:V4SF
3ef9e1ec
BS
1481 (match_operand:V4SF 1 "vsx_register_operand" "wa")
1482 (parallel [(const_int 3) (const_int 2)
1483 (const_int 1) (const_int 0)])))]
8fa97501
BS
1484 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1485 "stxvw4x %x1,%y0"
1486 [(set_attr "type" "vecstore")])
1487
1488(define_insn "vsx_st_elemrev_v4si"
1489 [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1490 (vec_select:V4SI
1491 (match_operand:V4SI 1 "vsx_register_operand" "wa")
1492 (parallel [(const_int 3) (const_int 2)
1493 (const_int 1) (const_int 0)])))]
1494 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1495 "stxvw4x %x1,%y0"
1496 [(set_attr "type" "vecstore")])
1497
3ef9e1ec 1498(define_expand "vsx_st_elemrev_v8hi"
8fa97501
BS
1499 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1500 (vec_select:V8HI
3ef9e1ec
BS
1501 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1502 (parallel [(const_int 7) (const_int 6)
1503 (const_int 5) (const_int 4)
1504 (const_int 3) (const_int 2)
1505 (const_int 1) (const_int 0)])))]
1506 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1507{
1508 if (!TARGET_P9_VECTOR)
1509 {
d10cff95 1510 rtx mem_subreg, subreg, perm[16], pcv;
3ef9e1ec
BS
1511 rtx tmp = gen_reg_rtx (V8HImode);
1512 /* 2 is leftmost element in register */
1513 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1514 int i;
1515
1516 for (i = 0; i < 16; ++i)
1517 perm[i] = GEN_INT (reorder[i]);
1518
1519 pcv = force_reg (V16QImode,
1520 gen_rtx_CONST_VECTOR (V16QImode,
1521 gen_rtvec_v (16, perm)));
1522 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1523 operands[1], pcv));
1524 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
d10cff95
CL
1525 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1526 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
3ef9e1ec
BS
1527 DONE;
1528 }
1529})
1530
d10cff95
CL
1531(define_insn "*vsx_st_elemrev_v2di_internal"
1532 [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1533 (vec_select:V2DI
1534 (match_operand:V2DI 1 "vsx_register_operand" "wa")
1535 (parallel [(const_int 1) (const_int 0)])))]
1536 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1537 "stxvd2x %x1,%y0"
1538 [(set_attr "type" "vecstore")])
1539
3ef9e1ec
BS
1540(define_insn "*vsx_st_elemrev_v8hi_internal"
1541 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1542 (vec_select:V8HI
1543 (match_operand:V8HI 1 "vsx_register_operand" "wa")
1544 (parallel [(const_int 7) (const_int 6)
1545 (const_int 5) (const_int 4)
1546 (const_int 3) (const_int 2)
1547 (const_int 1) (const_int 0)])))]
8fa97501
BS
1548 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1549 "stxvh8x %x1,%y0"
1550 [(set_attr "type" "vecstore")])
1551
3ef9e1ec
BS
1552(define_expand "vsx_st_elemrev_v16qi"
1553 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1554 (vec_select:V16QI
1555 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1556 (parallel [(const_int 15) (const_int 14)
1557 (const_int 13) (const_int 12)
1558 (const_int 11) (const_int 10)
1559 (const_int 9) (const_int 8)
1560 (const_int 7) (const_int 6)
1561 (const_int 5) (const_int 4)
1562 (const_int 3) (const_int 2)
1563 (const_int 1) (const_int 0)])))]
1564 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1565{
1566 if (!TARGET_P9_VECTOR)
1567 {
d10cff95 1568 rtx mem_subreg, subreg, perm[16], pcv;
3ef9e1ec
BS
1569 rtx tmp = gen_reg_rtx (V16QImode);
1570 /* 3 is leftmost element in register */
1571 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1572 int i;
1573
1574 for (i = 0; i < 16; ++i)
1575 perm[i] = GEN_INT (reorder[i]);
1576
1577 pcv = force_reg (V16QImode,
1578 gen_rtx_CONST_VECTOR (V16QImode,
1579 gen_rtvec_v (16, perm)));
1580 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1581 operands[1], pcv));
1582 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
d10cff95
CL
1583 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1584 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
3ef9e1ec
BS
1585 DONE;
1586 }
1587})
1588
1589(define_insn "*vsx_st_elemrev_v16qi_internal"
8fa97501
BS
1590 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1591 (vec_select:V16QI
3ef9e1ec
BS
1592 (match_operand:V16QI 1 "vsx_register_operand" "wa")
1593 (parallel [(const_int 15) (const_int 14)
1594 (const_int 13) (const_int 12)
1595 (const_int 11) (const_int 10)
1596 (const_int 9) (const_int 8)
1597 (const_int 7) (const_int 6)
1598 (const_int 5) (const_int 4)
1599 (const_int 3) (const_int 2)
1600 (const_int 1) (const_int 0)])))]
8fa97501
BS
1601 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1602 "stxvb16x %x1,%y0"
1603 [(set_attr "type" "vecstore")])
1604
29e6733c 1605\f
0609bdf2
MM
1606;; VSX vector floating point arithmetic instructions. The VSX scalar
1607;; instructions are now combined with the insn for the traditional floating
1608;; point unit.
29e6733c 1609(define_insn "*vsx_add<mode>3"
59f5868d
MM
1610 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1611 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1612 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1613 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1614 "xvadd<VSs> %x0,%x1,%x2"
29e6733c
MM
1615 [(set_attr "type" "<VStype_simple>")
1616 (set_attr "fp_type" "<VSfptype_simple>")])
1617
1618(define_insn "*vsx_sub<mode>3"
59f5868d
MM
1619 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1620 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1621 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1622 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1623 "xvsub<VSs> %x0,%x1,%x2"
29e6733c
MM
1624 [(set_attr "type" "<VStype_simple>")
1625 (set_attr "fp_type" "<VSfptype_simple>")])
1626
1627(define_insn "*vsx_mul<mode>3"
59f5868d
MM
1628 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1629 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1630 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1631 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2
MM
1632 "xvmul<VSs> %x0,%x1,%x2"
1633 [(set_attr "type" "<VStype_simple>")
29e6733c
MM
1634 (set_attr "fp_type" "<VSfptype_mul>")])
1635
2ccdda19
BS
1636; Emulate vector with scalar for vec_mul in V2DImode
1637(define_insn_and_split "vsx_mul_v2di"
1638 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1639 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1640 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1641 UNSPEC_VSX_MULSD))]
1642 "VECTOR_MEM_VSX_P (V2DImode)"
1643 "#"
3cb8ee5c 1644 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
2ccdda19 1645 [(const_int 0)]
2ccdda19
BS
1646{
1647 rtx op0 = operands[0];
1648 rtx op1 = operands[1];
1649 rtx op2 = operands[2];
1650 rtx op3 = gen_reg_rtx (DImode);
1651 rtx op4 = gen_reg_rtx (DImode);
1652 rtx op5 = gen_reg_rtx (DImode);
1653 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1654 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
da86c81e
PB
1655 if (TARGET_POWERPC64)
1656 emit_insn (gen_muldi3 (op5, op3, op4));
1657 else
1658 {
1659 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1660 emit_move_insn (op5, ret);
1661 }
2ccdda19
BS
1662 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1663 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
da86c81e
PB
1664 if (TARGET_POWERPC64)
1665 emit_insn (gen_muldi3 (op3, op3, op4));
1666 else
1667 {
1668 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1669 emit_move_insn (op3, ret);
1670 }
2ccdda19 1671 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
d5e6e133 1672 DONE;
6c332313 1673}
2ccdda19
BS
1674 [(set_attr "type" "mul")])
1675
29e6733c 1676(define_insn "*vsx_div<mode>3"
59f5868d
MM
1677 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1678 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1679 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1680 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1681 "xvdiv<VSs> %x0,%x1,%x2"
29e6733c
MM
1682 [(set_attr "type" "<VStype_div>")
1683 (set_attr "fp_type" "<VSfptype_div>")])
1684
2ccdda19
BS
1685; Emulate vector with scalar for vec_div in V2DImode
1686(define_insn_and_split "vsx_div_v2di"
1687 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1688 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1689 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1690 UNSPEC_VSX_DIVSD))]
1691 "VECTOR_MEM_VSX_P (V2DImode)"
1692 "#"
3cb8ee5c 1693 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
2ccdda19 1694 [(const_int 0)]
2ccdda19
BS
1695{
1696 rtx op0 = operands[0];
1697 rtx op1 = operands[1];
1698 rtx op2 = operands[2];
1699 rtx op3 = gen_reg_rtx (DImode);
1700 rtx op4 = gen_reg_rtx (DImode);
1701 rtx op5 = gen_reg_rtx (DImode);
1702 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1703 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
da86c81e
PB
1704 if (TARGET_POWERPC64)
1705 emit_insn (gen_divdi3 (op5, op3, op4));
1706 else
1707 {
1708 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1709 rtx target = emit_library_call_value (libfunc,
1710 op5, LCT_NORMAL, DImode,
1711 op3, DImode,
1712 op4, DImode);
1713 emit_move_insn (op5, target);
1714 }
2ccdda19
BS
1715 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1716 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
da86c81e
PB
1717 if (TARGET_POWERPC64)
1718 emit_insn (gen_divdi3 (op3, op3, op4));
1719 else
1720 {
1721 rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1722 rtx target = emit_library_call_value (libfunc,
1723 op3, LCT_NORMAL, DImode,
1724 op3, DImode,
1725 op4, DImode);
1726 emit_move_insn (op3, target);
1727 }
2ccdda19 1728 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
d5e6e133 1729 DONE;
6c332313 1730}
2ccdda19
BS
1731 [(set_attr "type" "div")])
1732
1733(define_insn_and_split "vsx_udiv_v2di"
1734 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1735 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1736 (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1737 UNSPEC_VSX_DIVUD))]
1738 "VECTOR_MEM_VSX_P (V2DImode)"
1739 "#"
3cb8ee5c 1740 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
2ccdda19 1741 [(const_int 0)]
2ccdda19
BS
1742{
1743 rtx op0 = operands[0];
1744 rtx op1 = operands[1];
1745 rtx op2 = operands[2];
1746 rtx op3 = gen_reg_rtx (DImode);
1747 rtx op4 = gen_reg_rtx (DImode);
1748 rtx op5 = gen_reg_rtx (DImode);
1749 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1750 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
da86c81e
PB
1751 if (TARGET_POWERPC64)
1752 emit_insn (gen_udivdi3 (op5, op3, op4));
1753 else
1754 {
1755 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1756 rtx target = emit_library_call_value (libfunc,
1757 op5, LCT_NORMAL, DImode,
1758 op3, DImode,
1759 op4, DImode);
1760 emit_move_insn (op5, target);
1761 }
2ccdda19
BS
1762 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1763 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
da86c81e
PB
1764 if (TARGET_POWERPC64)
1765 emit_insn (gen_udivdi3 (op3, op3, op4));
1766 else
1767 {
1768 rtx libfunc = optab_libfunc (udiv_optab, DImode);
1769 rtx target = emit_library_call_value (libfunc,
1770 op3, LCT_NORMAL, DImode,
1771 op3, DImode,
1772 op4, DImode);
1773 emit_move_insn (op3, target);
1774 }
2ccdda19 1775 emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
d5e6e133 1776 DONE;
6c332313 1777}
2ccdda19
BS
1778 [(set_attr "type" "div")])
1779
29e6733c
MM
1780;; *tdiv* instruction returning the FG flag
1781(define_expand "vsx_tdiv<mode>3_fg"
1782 [(set (match_dup 3)
ad18eed2
SB
1783 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1784 (match_operand:VSX_B 2 "vsx_register_operand")]
29e6733c 1785 UNSPEC_VSX_TDIV))
ad18eed2 1786 (set (match_operand:SI 0 "gpc_reg_operand")
29e6733c
MM
1787 (gt:SI (match_dup 3)
1788 (const_int 0)))]
1789 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1790{
1791 operands[3] = gen_reg_rtx (CCFPmode);
1792})
1793
1794;; *tdiv* instruction returning the FE flag
1795(define_expand "vsx_tdiv<mode>3_fe"
1796 [(set (match_dup 3)
ad18eed2
SB
1797 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1798 (match_operand:VSX_B 2 "vsx_register_operand")]
29e6733c 1799 UNSPEC_VSX_TDIV))
ad18eed2 1800 (set (match_operand:SI 0 "gpc_reg_operand")
29e6733c
MM
1801 (eq:SI (match_dup 3)
1802 (const_int 0)))]
1803 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1804{
1805 operands[3] = gen_reg_rtx (CCFPmode);
1806})
1807
1808(define_insn "*vsx_tdiv<mode>3_internal"
1809 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
59f5868d
MM
1810 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")
1811 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")]
29e6733c
MM
1812 UNSPEC_VSX_TDIV))]
1813 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1814 "x<VSv>tdiv<VSs> %0,%x1,%x2"
1815 [(set_attr "type" "<VStype_simple>")
1816 (set_attr "fp_type" "<VSfptype_simple>")])
1817
1818(define_insn "vsx_fre<mode>2"
59f5868d
MM
1819 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1820 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
29e6733c
MM
1821 UNSPEC_FRES))]
1822 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1823 "xvre<VSs> %x0,%x1"
29e6733c
MM
1824 [(set_attr "type" "<VStype_simple>")
1825 (set_attr "fp_type" "<VSfptype_simple>")])
1826
1827(define_insn "*vsx_neg<mode>2"
59f5868d
MM
1828 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1829 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1830 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1831 "xvneg<VSs> %x0,%x1"
29e6733c
MM
1832 [(set_attr "type" "<VStype_simple>")
1833 (set_attr "fp_type" "<VSfptype_simple>")])
1834
1835(define_insn "*vsx_abs<mode>2"
59f5868d
MM
1836 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1837 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1838 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1839 "xvabs<VSs> %x0,%x1"
29e6733c
MM
1840 [(set_attr "type" "<VStype_simple>")
1841 (set_attr "fp_type" "<VSfptype_simple>")])
1842
1843(define_insn "vsx_nabs<mode>2"
59f5868d 1844 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
0609bdf2
MM
1845 (neg:VSX_F
1846 (abs:VSX_F
59f5868d 1847 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))]
29e6733c 1848 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1849 "xvnabs<VSs> %x0,%x1"
29e6733c
MM
1850 [(set_attr "type" "<VStype_simple>")
1851 (set_attr "fp_type" "<VSfptype_simple>")])
1852
1853(define_insn "vsx_smax<mode>3"
59f5868d
MM
1854 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1855 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1856 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1857 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1858 "xvmax<VSs> %x0,%x1,%x2"
29e6733c
MM
1859 [(set_attr "type" "<VStype_simple>")
1860 (set_attr "fp_type" "<VSfptype_simple>")])
1861
1862(define_insn "*vsx_smin<mode>3"
59f5868d
MM
1863 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1864 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
1865 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1866 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1867 "xvmin<VSs> %x0,%x1,%x2"
29e6733c
MM
1868 [(set_attr "type" "<VStype_simple>")
1869 (set_attr "fp_type" "<VSfptype_simple>")])
1870
1871(define_insn "*vsx_sqrt<mode>2"
59f5868d
MM
1872 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1873 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 1874 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1875 "xvsqrt<VSs> %x0,%x1"
29e6733c
MM
1876 [(set_attr "type" "<VStype_sqrt>")
1877 (set_attr "fp_type" "<VSfptype_sqrt>")])
1878
92902797 1879(define_insn "*vsx_rsqrte<mode>2"
59f5868d
MM
1880 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
1881 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
92902797 1882 UNSPEC_RSQRT))]
29e6733c 1883 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 1884 "xvrsqrte<VSs> %x0,%x1"
29e6733c
MM
1885 [(set_attr "type" "<VStype_simple>")
1886 (set_attr "fp_type" "<VSfptype_simple>")])
1887
1888;; *tsqrt* returning the fg flag
1889(define_expand "vsx_tsqrt<mode>2_fg"
d36a53d6 1890 [(set (match_dup 2)
ad18eed2 1891 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
29e6733c 1892 UNSPEC_VSX_TSQRT))
ad18eed2 1893 (set (match_operand:SI 0 "gpc_reg_operand")
d36a53d6 1894 (gt:SI (match_dup 2)
29e6733c
MM
1895 (const_int 0)))]
1896 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1897{
d36a53d6 1898 operands[2] = gen_reg_rtx (CCFPmode);
29e6733c
MM
1899})
1900
1901;; *tsqrt* returning the fe flag
1902(define_expand "vsx_tsqrt<mode>2_fe"
d36a53d6 1903 [(set (match_dup 2)
ad18eed2 1904 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
29e6733c 1905 UNSPEC_VSX_TSQRT))
ad18eed2 1906 (set (match_operand:SI 0 "gpc_reg_operand")
d36a53d6 1907 (eq:SI (match_dup 2)
29e6733c
MM
1908 (const_int 0)))]
1909 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1910{
d36a53d6 1911 operands[2] = gen_reg_rtx (CCFPmode);
29e6733c
MM
1912})
1913
1914(define_insn "*vsx_tsqrt<mode>2_internal"
1915 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x")
59f5868d 1916 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
29e6733c
MM
1917 UNSPEC_VSX_TSQRT))]
1918 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1919 "x<VSv>tsqrt<VSs> %0,%x1"
1920 [(set_attr "type" "<VStype_simple>")
1921 (set_attr "fp_type" "<VSfptype_simple>")])
1922
0609bdf2
MM
1923;; Fused vector multiply/add instructions. Support the classical Altivec
1924;; versions of fma, which allows the target to be a separate register from the
1925;; 3 inputs. Under VSX, the target must be either the addend or the first
1926;; multiply.
c36193c6
MM
1927
1928(define_insn "*vsx_fmav4sf4"
29aafbc4 1929 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
c36193c6 1930 (fma:V4SF
29aafbc4
MM
1931 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1932 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1933 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))]
c36193c6
MM
1934 "VECTOR_UNIT_VSX_P (V4SFmode)"
1935 "@
1936 xvmaddasp %x0,%x1,%x2
1937 xvmaddmsp %x0,%x1,%x3
1938 xvmaddasp %x0,%x1,%x2
1939 xvmaddmsp %x0,%x1,%x3
1940 vmaddfp %0,%1,%2,%3"
1941 [(set_attr "type" "vecfloat")])
1942
1943(define_insn "*vsx_fmav2df4"
29aafbc4 1944 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
c36193c6 1945 (fma:V2DF
29aafbc4
MM
1946 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
1947 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
1948 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))]
c36193c6
MM
1949 "VECTOR_UNIT_VSX_P (V2DFmode)"
1950 "@
1951 xvmaddadp %x0,%x1,%x2
1952 xvmaddmdp %x0,%x1,%x3
1953 xvmaddadp %x0,%x1,%x2
1954 xvmaddmdp %x0,%x1,%x3"
4356b75d 1955 [(set_attr "type" "vecdouble")])
c36193c6 1956
d6613781 1957(define_insn "*vsx_fms<mode>4"
59f5868d 1958 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
c36193c6 1959 (fma:VSX_F
59f5868d
MM
1960 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>")
1961 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
c36193c6 1962 (neg:VSX_F
59f5868d 1963 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
29e6733c
MM
1964 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1965 "@
0609bdf2
MM
1966 xvmsuba<VSs> %x0,%x1,%x2
1967 xvmsubm<VSs> %x0,%x1,%x3
1968 xvmsuba<VSs> %x0,%x1,%x2
1969 xvmsubm<VSs> %x0,%x1,%x3"
4356b75d 1970 [(set_attr "type" "<VStype_mul>")])
c36193c6 1971
d6613781 1972(define_insn "*vsx_nfma<mode>4"
59f5868d 1973 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>")
c36193c6
MM
1974 (neg:VSX_F
1975 (fma:VSX_F
59f5868d
MM
1976 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>")
1977 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0")
1978 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))]
29e6733c 1979 "VECTOR_UNIT_VSX_P (<MODE>mode)"
1b1562a5 1980 "@
0609bdf2
MM
1981 xvnmadda<VSs> %x0,%x1,%x2
1982 xvnmaddm<VSs> %x0,%x1,%x3
1983 xvnmadda<VSs> %x0,%x1,%x2
1984 xvnmaddm<VSs> %x0,%x1,%x3"
1b1562a5
MM
1985 [(set_attr "type" "<VStype_mul>")
1986 (set_attr "fp_type" "<VSfptype_mul>")])
29e6733c 1987
c36193c6
MM
1988(define_insn "*vsx_nfmsv4sf4"
1989 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v")
1990 (neg:V4SF
1991 (fma:V4SF
1992 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v")
1993 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v")
1994 (neg:V4SF
1995 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))]
1996 "VECTOR_UNIT_VSX_P (V4SFmode)"
1997 "@
1998 xvnmsubasp %x0,%x1,%x2
1999 xvnmsubmsp %x0,%x1,%x3
2000 xvnmsubasp %x0,%x1,%x2
2001 xvnmsubmsp %x0,%x1,%x3
2002 vnmsubfp %0,%1,%2,%3"
2003 [(set_attr "type" "vecfloat")])
2004
2005(define_insn "*vsx_nfmsv2df4"
2006 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa")
2007 (neg:V2DF
2008 (fma:V2DF
2009 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa")
2010 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0")
2011 (neg:V2DF
2012 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))]
2013 "VECTOR_UNIT_VSX_P (V2DFmode)"
2014 "@
2015 xvnmsubadp %x0,%x1,%x2
2016 xvnmsubmdp %x0,%x1,%x3
2017 xvnmsubadp %x0,%x1,%x2
2018 xvnmsubmdp %x0,%x1,%x3"
4356b75d 2019 [(set_attr "type" "vecdouble")])
29e6733c 2020
29e6733c
MM
2021;; Vector conditional expressions (no scalar version for these instructions)
2022(define_insn "vsx_eq<mode>"
59f5868d
MM
2023 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2024 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2025 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c
MM
2026 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2027 "xvcmpeq<VSs> %x0,%x1,%x2"
2028 [(set_attr "type" "<VStype_simple>")
2029 (set_attr "fp_type" "<VSfptype_simple>")])
2030
2031(define_insn "vsx_gt<mode>"
59f5868d
MM
2032 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2033 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2034 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c
MM
2035 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2036 "xvcmpgt<VSs> %x0,%x1,%x2"
2037 [(set_attr "type" "<VStype_simple>")
2038 (set_attr "fp_type" "<VSfptype_simple>")])
2039
2040(define_insn "*vsx_ge<mode>"
59f5868d
MM
2041 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2042 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2043 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c
MM
2044 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2045 "xvcmpge<VSs> %x0,%x1,%x2"
2046 [(set_attr "type" "<VStype_simple>")
2047 (set_attr "fp_type" "<VSfptype_simple>")])
2048
29e6733c
MM
2049;; Compare vectors producing a vector result and a predicate, setting CR6 to
2050;; indicate a combined status
2051(define_insn "*vsx_eq_<mode>_p"
b65261f6 2052 [(set (reg:CC CR6_REGNO)
29e6733c 2053 (unspec:CC
59f5868d
MM
2054 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2055 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
29e6733c 2056 UNSPEC_PREDICATE))
59f5868d 2057 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
29e6733c
MM
2058 (eq:VSX_F (match_dup 1)
2059 (match_dup 2)))]
2060 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2061 "xvcmpeq<VSs>. %x0,%x1,%x2"
4356b75d 2062 [(set_attr "type" "<VStype_simple>")])
29e6733c
MM
2063
2064(define_insn "*vsx_gt_<mode>_p"
b65261f6 2065 [(set (reg:CC CR6_REGNO)
29e6733c 2066 (unspec:CC
59f5868d
MM
2067 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2068 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
29e6733c 2069 UNSPEC_PREDICATE))
59f5868d 2070 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
29e6733c
MM
2071 (gt:VSX_F (match_dup 1)
2072 (match_dup 2)))]
2073 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2074 "xvcmpgt<VSs>. %x0,%x1,%x2"
4356b75d 2075 [(set_attr "type" "<VStype_simple>")])
29e6733c
MM
2076
2077(define_insn "*vsx_ge_<mode>_p"
b65261f6 2078 [(set (reg:CC CR6_REGNO)
29e6733c 2079 (unspec:CC
59f5868d
MM
2080 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>")
2081 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))]
29e6733c 2082 UNSPEC_PREDICATE))
59f5868d 2083 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
29e6733c
MM
2084 (ge:VSX_F (match_dup 1)
2085 (match_dup 2)))]
2086 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2087 "xvcmpge<VSs>. %x0,%x1,%x2"
4356b75d 2088 [(set_attr "type" "<VStype_simple>")])
29e6733c
MM
2089
2090;; Vector select
2091(define_insn "*vsx_xxsel<mode>"
59f5868d 2092 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
29e6733c 2093 (if_then_else:VSX_L
59f5868d 2094 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
70db9095 2095 (match_operand:VSX_L 4 "zero_constant" ""))
59f5868d
MM
2096 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2097 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c
MM
2098 "VECTOR_MEM_VSX_P (<MODE>mode)"
2099 "xxsel %x0,%x3,%x2,%x1"
7c788ce2 2100 [(set_attr "type" "vecmove")])
29e6733c
MM
2101
2102(define_insn "*vsx_xxsel<mode>_uns"
59f5868d 2103 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>")
29e6733c 2104 (if_then_else:VSX_L
59f5868d 2105 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>")
70db9095 2106 (match_operand:VSX_L 4 "zero_constant" ""))
59f5868d
MM
2107 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>")
2108 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c
MM
2109 "VECTOR_MEM_VSX_P (<MODE>mode)"
2110 "xxsel %x0,%x3,%x2,%x1"
7c788ce2 2111 [(set_attr "type" "vecmove")])
29e6733c
MM
2112
2113;; Copy sign
2114(define_insn "vsx_copysign<mode>3"
59f5868d 2115 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
0609bdf2 2116 (unspec:VSX_F
59f5868d
MM
2117 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")
2118 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")]
8119a6a6 2119 UNSPEC_COPYSIGN))]
29e6733c 2120 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 2121 "xvcpsgn<VSs> %x0,%x2,%x1"
29e6733c
MM
2122 [(set_attr "type" "<VStype_simple>")
2123 (set_attr "fp_type" "<VSfptype_simple>")])
2124
2125;; For the conversions, limit the register class for the integer value to be
2126;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2127;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2128;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
8a480dc3
AM
2129;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2130;; in allowing virtual registers.
29e6733c 2131(define_insn "vsx_float<VSi><mode>2"
0c307d8f
MM
2132 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2133 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
29e6733c 2134 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0c307d8f 2135 "xvcvsx<VSc><VSs> %x0,%x1"
29e6733c
MM
2136 [(set_attr "type" "<VStype_simple>")
2137 (set_attr "fp_type" "<VSfptype_simple>")])
2138
2139(define_insn "vsx_floatuns<VSi><mode>2"
0c307d8f
MM
2140 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
2141 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
29e6733c 2142 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0c307d8f 2143 "xvcvux<VSc><VSs> %x0,%x1"
29e6733c
MM
2144 [(set_attr "type" "<VStype_simple>")
2145 (set_attr "fp_type" "<VSfptype_simple>")])
2146
2147(define_insn "vsx_fix_trunc<mode><VSi>2"
28fc3eee 2148 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
0c307d8f 2149 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
29e6733c
MM
2150 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2151 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
2152 [(set_attr "type" "<VStype_simple>")
2153 (set_attr "fp_type" "<VSfptype_simple>")])
2154
2155(define_insn "vsx_fixuns_trunc<mode><VSi>2"
28fc3eee 2156 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
0c307d8f 2157 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
29e6733c
MM
2158 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2159 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
2160 [(set_attr "type" "<VStype_simple>")
2161 (set_attr "fp_type" "<VSfptype_simple>")])
2162
2163;; Math rounding functions
2164(define_insn "vsx_x<VSv>r<VSs>i"
59f5868d
MM
2165 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2166 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
29e6733c
MM
2167 UNSPEC_VSX_ROUND_I))]
2168 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2169 "x<VSv>r<VSs>i %x0,%x1"
2170 [(set_attr "type" "<VStype_simple>")
2171 (set_attr "fp_type" "<VSfptype_simple>")])
2172
2173(define_insn "vsx_x<VSv>r<VSs>ic"
59f5868d
MM
2174 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2175 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
29e6733c
MM
2176 UNSPEC_VSX_ROUND_IC))]
2177 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2178 "x<VSv>r<VSs>ic %x0,%x1"
2179 [(set_attr "type" "<VStype_simple>")
2180 (set_attr "fp_type" "<VSfptype_simple>")])
2181
2182(define_insn "vsx_btrunc<mode>2"
59f5868d
MM
2183 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2184 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))]
29e6733c 2185 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 2186 "xvr<VSs>iz %x0,%x1"
29e6733c
MM
2187 [(set_attr "type" "<VStype_simple>")
2188 (set_attr "fp_type" "<VSfptype_simple>")])
2189
2190(define_insn "*vsx_b2trunc<mode>2"
59f5868d
MM
2191 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2192 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")]
29e6733c
MM
2193 UNSPEC_FRIZ))]
2194 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2195 "x<VSv>r<VSs>iz %x0,%x1"
2196 [(set_attr "type" "<VStype_simple>")
2197 (set_attr "fp_type" "<VSfptype_simple>")])
2198
2199(define_insn "vsx_floor<mode>2"
59f5868d
MM
2200 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2201 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
29e6733c
MM
2202 UNSPEC_FRIM))]
2203 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 2204 "xvr<VSs>im %x0,%x1"
29e6733c
MM
2205 [(set_attr "type" "<VStype_simple>")
2206 (set_attr "fp_type" "<VSfptype_simple>")])
2207
2208(define_insn "vsx_ceil<mode>2"
59f5868d
MM
2209 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>")
2210 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")]
29e6733c
MM
2211 UNSPEC_FRIP))]
2212 "VECTOR_UNIT_VSX_P (<MODE>mode)"
0609bdf2 2213 "xvr<VSs>ip %x0,%x1"
29e6733c
MM
2214 [(set_attr "type" "<VStype_simple>")
2215 (set_attr "fp_type" "<VSfptype_simple>")])
2216
2217\f
2218;; VSX convert to/from double vector
2219
2220;; Convert between single and double precision
2221;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2222;; scalar single precision instructions internally use the double format.
2223;; Prefer the altivec registers, since we likely will need to do a vperm
2224(define_insn "vsx_<VS_spdp_insn>"
59f5868d
MM
2225 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>")
2226 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")]
29e6733c
MM
2227 UNSPEC_VSX_CVSPDP))]
2228 "VECTOR_UNIT_VSX_P (<MODE>mode)"
2229 "<VS_spdp_insn> %x0,%x1"
2230 [(set_attr "type" "<VS_spdp_type>")])
2231
2232;; xscvspdp, represent the scalar SF type as V4SF
2233(define_insn "vsx_xscvspdp"
59f5868d
MM
2234 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2235 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
29e6733c 2236 UNSPEC_VSX_CVSPDP))]
df5a9a7c 2237 "VECTOR_UNIT_VSX_P (V4SFmode)"
29e6733c
MM
2238 "xscvspdp %x0,%x1"
2239 [(set_attr "type" "fp")])
2240
2f448503
MM
2241;; Same as vsx_xscvspdp, but use SF as the type
2242(define_insn "vsx_xscvspdp_scalar2"
2243 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
2244 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2245 UNSPEC_VSX_CVSPDP))]
2246 "VECTOR_UNIT_VSX_P (V4SFmode)"
2247 "xscvspdp %x0,%x1"
2248 [(set_attr "type" "fp")])
2249
26bca0ed
CL
2250;; Generate xvcvhpsp instruction
2251(define_insn "vsx_xvcvhpsp"
2252 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2253 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2254 UNSPEC_VSX_CVHPSP))]
2255 "TARGET_P9_VECTOR"
2256 "xvcvhpsp %x0,%x1"
2257 [(set_attr "type" "vecfloat")])
2258
29e6733c
MM
2259;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2260;; format of scalars is actually DF.
2261(define_insn "vsx_xscvdpsp_scalar"
2262 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
57e6b981 2263 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
29e6733c 2264 UNSPEC_VSX_CVSPDP))]
df5a9a7c 2265 "VECTOR_UNIT_VSX_P (V4SFmode)"
29e6733c
MM
2266 "xscvdpsp %x0,%x1"
2267 [(set_attr "type" "fp")])
2268
0bd62dca
MM
2269;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2270(define_insn "vsx_xscvdpspn"
57e6b981
MM
2271 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww")
2272 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")]
0bd62dca
MM
2273 UNSPEC_VSX_CVDPSPN))]
2274 "TARGET_XSCVDPSPN"
2275 "xscvdpspn %x0,%x1"
2276 [(set_attr "type" "fp")])
2277
2278(define_insn "vsx_xscvspdpn"
57e6b981
MM
2279 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2280 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
0bd62dca
MM
2281 UNSPEC_VSX_CVSPDPN))]
2282 "TARGET_XSCVSPDPN"
2283 "xscvspdpn %x0,%x1"
2284 [(set_attr "type" "fp")])
2285
2286(define_insn "vsx_xscvdpspn_scalar"
57e6b981
MM
2287 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2288 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")]
0bd62dca
MM
2289 UNSPEC_VSX_CVDPSPN))]
2290 "TARGET_XSCVDPSPN"
2291 "xscvdpspn %x0,%x1"
2292 [(set_attr "type" "fp")])
2293
2294;; Used by direct move to move a SFmode value from GPR to VSX register
2295(define_insn "vsx_xscvspdpn_directmove"
2296 [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
b306ab3a 2297 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
0bd62dca
MM
2298 UNSPEC_VSX_CVSPDPN))]
2299 "TARGET_XSCVSPDPN"
2300 "xscvspdpn %x0,%x1"
2301 [(set_attr "type" "fp")])
2302
70f0f8b2
BS
2303;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2304
2305(define_expand "vsx_xvcvsxddp_scale"
ad18eed2
SB
2306 [(match_operand:V2DF 0 "vsx_register_operand")
2307 (match_operand:V2DI 1 "vsx_register_operand")
2308 (match_operand:QI 2 "immediate_operand")]
70f0f8b2
BS
2309 "VECTOR_UNIT_VSX_P (V2DFmode)"
2310{
2311 rtx op0 = operands[0];
2312 rtx op1 = operands[1];
2313 int scale = INTVAL(operands[2]);
2314 emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2315 if (scale != 0)
2316 rs6000_scale_v2df (op0, op0, -scale);
2317 DONE;
2318})
2319
2320(define_insn "vsx_xvcvsxddp"
2321 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2322 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2323 UNSPEC_VSX_XVCVSXDDP))]
2324 "VECTOR_UNIT_VSX_P (V2DFmode)"
2325 "xvcvsxddp %x0,%x1"
2326 [(set_attr "type" "vecdouble")])
2327
2328(define_expand "vsx_xvcvuxddp_scale"
ad18eed2
SB
2329 [(match_operand:V2DF 0 "vsx_register_operand")
2330 (match_operand:V2DI 1 "vsx_register_operand")
2331 (match_operand:QI 2 "immediate_operand")]
70f0f8b2
BS
2332 "VECTOR_UNIT_VSX_P (V2DFmode)"
2333{
2334 rtx op0 = operands[0];
2335 rtx op1 = operands[1];
2336 int scale = INTVAL(operands[2]);
2337 emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2338 if (scale != 0)
2339 rs6000_scale_v2df (op0, op0, -scale);
2340 DONE;
2341})
2342
2343(define_insn "vsx_xvcvuxddp"
2344 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2345 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2346 UNSPEC_VSX_XVCVUXDDP))]
2347 "VECTOR_UNIT_VSX_P (V2DFmode)"
2348 "xvcvuxddp %x0,%x1"
2349 [(set_attr "type" "vecdouble")])
2350
2351(define_expand "vsx_xvcvdpsxds_scale"
ad18eed2
SB
2352 [(match_operand:V2DI 0 "vsx_register_operand")
2353 (match_operand:V2DF 1 "vsx_register_operand")
2354 (match_operand:QI 2 "immediate_operand")]
70f0f8b2
BS
2355 "VECTOR_UNIT_VSX_P (V2DFmode)"
2356{
2357 rtx op0 = operands[0];
2358 rtx op1 = operands[1];
9b5ee426
BS
2359 rtx tmp;
2360 int scale = INTVAL (operands[2]);
2361 if (scale == 0)
2362 tmp = op1;
2363 else
2364 {
2365 tmp = gen_reg_rtx (V2DFmode);
2366 rs6000_scale_v2df (tmp, op1, scale);
2367 }
70f0f8b2
BS
2368 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2369 DONE;
2370})
2371
e5898daf
CL
2372;; convert vector of 64-bit floating point numbers to vector of
2373;; 64-bit signed integer
70f0f8b2
BS
2374(define_insn "vsx_xvcvdpsxds"
2375 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2376 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2377 UNSPEC_VSX_XVCVDPSXDS))]
2378 "VECTOR_UNIT_VSX_P (V2DFmode)"
2379 "xvcvdpsxds %x0,%x1"
2380 [(set_attr "type" "vecdouble")])
2381
e5898daf
CL
2382;; convert vector of 32-bit floating point numbers to vector of
2383;; 32-bit signed integer
2384(define_insn "vsx_xvcvspsxws"
2385 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2386 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2387 UNSPEC_VSX_XVCVSPSXWS))]
2388 "VECTOR_UNIT_VSX_P (V4SFmode)"
2389 "xvcvspsxws %x0,%x1"
2390 [(set_attr "type" "vecfloat")])
2391
2392;; convert vector of 64-bit floating point numbers to vector of
2393;; 64-bit unsigned integer
70f0f8b2 2394(define_expand "vsx_xvcvdpuxds_scale"
ad18eed2
SB
2395 [(match_operand:V2DI 0 "vsx_register_operand")
2396 (match_operand:V2DF 1 "vsx_register_operand")
2397 (match_operand:QI 2 "immediate_operand")]
70f0f8b2
BS
2398 "VECTOR_UNIT_VSX_P (V2DFmode)"
2399{
2400 rtx op0 = operands[0];
2401 rtx op1 = operands[1];
9b5ee426
BS
2402 rtx tmp;
2403 int scale = INTVAL (operands[2]);
2404 if (scale == 0)
2405 tmp = op1;
2406 else
2407 {
2408 tmp = gen_reg_rtx (V2DFmode);
2409 rs6000_scale_v2df (tmp, op1, scale);
2410 }
70f0f8b2
BS
2411 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2412 DONE;
2413})
2414
e5898daf
CL
2415;; convert vector of 32-bit floating point numbers to vector of
2416;; 32-bit unsigned integer
2417(define_insn "vsx_xvcvspuxws"
2418 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2419 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2420 UNSPEC_VSX_XVCVSPSXWS))]
2421 "VECTOR_UNIT_VSX_P (V4SFmode)"
2422 "xvcvspuxws %x0,%x1"
2423 [(set_attr "type" "vecfloat")])
2424
70f0f8b2
BS
2425(define_insn "vsx_xvcvdpuxds"
2426 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2427 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
2428 UNSPEC_VSX_XVCVDPUXDS))]
2429 "VECTOR_UNIT_VSX_P (V2DFmode)"
2430 "xvcvdpuxds %x0,%x1"
2431 [(set_attr "type" "vecdouble")])
2432
29e6733c
MM
2433;; Convert from 64-bit to 32-bit types
2434;; Note, favor the Altivec registers since the usual use of these instructions
2435;; is in vector converts and we need to use the Altivec vperm instruction.
2436
2437(define_insn "vsx_xvcvdpsxws"
2438 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2439 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2440 UNSPEC_VSX_CVDPSXWS))]
2441 "VECTOR_UNIT_VSX_P (V2DFmode)"
2442 "xvcvdpsxws %x0,%x1"
4356b75d 2443 [(set_attr "type" "vecdouble")])
29e6733c
MM
2444
2445(define_insn "vsx_xvcvdpuxws"
2446 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2447 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
2448 UNSPEC_VSX_CVDPUXWS))]
2449 "VECTOR_UNIT_VSX_P (V2DFmode)"
2450 "xvcvdpuxws %x0,%x1"
4356b75d 2451 [(set_attr "type" "vecdouble")])
29e6733c
MM
2452
2453(define_insn "vsx_xvcvsxdsp"
8722316b
CL
2454 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2455 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
29e6733c
MM
2456 UNSPEC_VSX_CVSXDSP))]
2457 "VECTOR_UNIT_VSX_P (V2DFmode)"
2458 "xvcvsxdsp %x0,%x1"
2459 [(set_attr "type" "vecfloat")])
2460
2461(define_insn "vsx_xvcvuxdsp"
8722316b
CL
2462 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2463 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")]
29e6733c
MM
2464 UNSPEC_VSX_CVUXDSP))]
2465 "VECTOR_UNIT_VSX_P (V2DFmode)"
8722316b 2466 "xvcvuxdsp %x0,%x1"
4356b75d 2467 [(set_attr "type" "vecdouble")])
29e6733c 2468
19d22f7c
CL
2469(define_insn "vsx_xvcdpsp"
2470 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa")
2471 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")]
2472 UNSPEC_VSX_XVCDPSP))]
2473 "VECTOR_UNIT_VSX_P (V2DFmode)"
2474 "xvcvdpsp %x0,%x1"
2475 [(set_attr "type" "vecdouble")])
2476
29e6733c 2477;; Convert from 32-bit to 64-bit types
156b5cca 2478;; Provide both vector and scalar targets
29e6733c
MM
2479(define_insn "vsx_xvcvsxwdp"
2480 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2481 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2482 UNSPEC_VSX_CVSXWDP))]
2483 "VECTOR_UNIT_VSX_P (V2DFmode)"
2484 "xvcvsxwdp %x0,%x1"
4356b75d 2485 [(set_attr "type" "vecdouble")])
29e6733c 2486
156b5cca
MM
2487(define_insn "vsx_xvcvsxwdp_df"
2488 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2489 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2490 UNSPEC_VSX_CVSXWDP))]
2491 "TARGET_VSX"
2492 "xvcvsxwdp %x0,%x1"
2493 [(set_attr "type" "vecdouble")])
2494
29e6733c
MM
2495(define_insn "vsx_xvcvuxwdp"
2496 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2497 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
2498 UNSPEC_VSX_CVUXWDP))]
2499 "VECTOR_UNIT_VSX_P (V2DFmode)"
2500 "xvcvuxwdp %x0,%x1"
4356b75d 2501 [(set_attr "type" "vecdouble")])
29e6733c 2502
156b5cca
MM
2503(define_insn "vsx_xvcvuxwdp_df"
2504 [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
2505 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2506 UNSPEC_VSX_CVUXWDP))]
2507 "TARGET_VSX"
2508 "xvcvuxwdp %x0,%x1"
2509 [(set_attr "type" "vecdouble")])
2510
29e6733c
MM
2511(define_insn "vsx_xvcvspsxds"
2512 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2513 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2514 UNSPEC_VSX_CVSPSXDS))]
2515 "VECTOR_UNIT_VSX_P (V2DFmode)"
2516 "xvcvspsxds %x0,%x1"
4356b75d 2517 [(set_attr "type" "vecdouble")])
29e6733c
MM
2518
2519(define_insn "vsx_xvcvspuxds"
2520 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2521 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
2522 UNSPEC_VSX_CVSPUXDS))]
2523 "VECTOR_UNIT_VSX_P (V2DFmode)"
2524 "xvcvspuxds %x0,%x1"
4356b75d 2525 [(set_attr "type" "vecdouble")])
688e4919 2526
be1418c7
CL
2527(define_insn "vsx_xvcvsxwsp"
2528 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2529 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2530 UNSPEC_VSX_CVSXWSP))]
2531 "VECTOR_UNIT_VSX_P (V4SFmode)"
2532 "xvcvsxwsp %x0,%x1"
2533 [(set_attr "type" "vecfloat")])
2534
2535(define_insn "vsx_xvcvuxwsp"
2536 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2537 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2538 UNSPEC_VSX_CVUXWSP))]
2539 "VECTOR_UNIT_VSX_P (V4SFmode)"
2540 "xvcvuxwsp %x0,%x1"
2541 [(set_attr "type" "vecfloat")])
2542
19d22f7c
CL
2543;; Generate float2 double
2544;; convert two double to float
2545(define_expand "float2_v2df"
2546 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2547 (use (match_operand:V2DF 1 "register_operand" "wa"))
2548 (use (match_operand:V2DF 2 "register_operand" "wa"))]
2549 "VECTOR_UNIT_VSX_P (V4SFmode)"
2550{
2551 rtx rtx_src1, rtx_src2, rtx_dst;
2552
2553 rtx_dst = operands[0];
2554 rtx_src1 = operands[1];
2555 rtx_src2 = operands[2];
2556
2557 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2558 DONE;
2559})
2560
be1418c7
CL
2561;; Generate float2
2562;; convert two long long signed ints to float
2563(define_expand "float2_v2di"
2564 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2565 (use (match_operand:V2DI 1 "register_operand" "wa"))
2566 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2567 "VECTOR_UNIT_VSX_P (V4SFmode)"
2568{
2569 rtx rtx_src1, rtx_src2, rtx_dst;
2570
2571 rtx_dst = operands[0];
2572 rtx_src1 = operands[1];
2573 rtx_src2 = operands[2];
2574
2575 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2576 DONE;
2577})
2578
2579;; Generate uns_float2
2580;; convert two long long unsigned ints to float
2581(define_expand "uns_float2_v2di"
2582 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2583 (use (match_operand:V2DI 1 "register_operand" "wa"))
2584 (use (match_operand:V2DI 2 "register_operand" "wa"))]
2585 "VECTOR_UNIT_VSX_P (V4SFmode)"
2586{
2587 rtx rtx_src1, rtx_src2, rtx_dst;
2588
2589 rtx_dst = operands[0];
2590 rtx_src1 = operands[1];
2591 rtx_src2 = operands[2];
2592
2593 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2594 DONE;
2595})
2596
2597;; Generate floate
2598;; convert double or long long signed to float
2599;; (Only even words are valid, BE numbering)
2600(define_expand "floate<mode>"
2601 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2602 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2603 "VECTOR_UNIT_VSX_P (V4SFmode)"
2604{
2605 if (VECTOR_ELT_ORDER_BIG)
2606 {
2607 /* Shift left one word to put even word correct location */
2608 rtx rtx_tmp;
2609 rtx rtx_val = GEN_INT (4);
2610
2611 rtx_tmp = gen_reg_rtx (V4SFmode);
2612 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2613 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2614 rtx_tmp, rtx_tmp, rtx_val));
2615 }
2616 else
394a527f 2617 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
be1418c7
CL
2618
2619 DONE;
2620})
2621
2622;; Generate uns_floate
2623;; convert long long unsigned to float
2624;; (Only even words are valid, BE numbering)
2625(define_expand "unsfloatev2di"
2626 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2627 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2628 "VECTOR_UNIT_VSX_P (V4SFmode)"
2629{
2630 if (VECTOR_ELT_ORDER_BIG)
2631 {
2632 /* Shift left one word to put even word correct location */
2633 rtx rtx_tmp;
2634 rtx rtx_val = GEN_INT (4);
2635
2636 rtx_tmp = gen_reg_rtx (V4SFmode);
2637 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2638 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2639 rtx_tmp, rtx_tmp, rtx_val));
2640 }
2641 else
2642 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2643
2644 DONE;
2645})
2646
2647;; Generate floato
2648;; convert double or long long signed to float
2649;; Only odd words are valid, BE numbering)
2650(define_expand "floato<mode>"
2651 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2652 (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2653 "VECTOR_UNIT_VSX_P (V4SFmode)"
2654{
2655 if (VECTOR_ELT_ORDER_BIG)
394a527f 2656 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
be1418c7
CL
2657 else
2658 {
2659 /* Shift left one word to put odd word correct location */
2660 rtx rtx_tmp;
2661 rtx rtx_val = GEN_INT (4);
2662
2663 rtx_tmp = gen_reg_rtx (V4SFmode);
394a527f 2664 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
be1418c7
CL
2665 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2666 rtx_tmp, rtx_tmp, rtx_val));
2667 }
2668 DONE;
2669})
2670
2671;; Generate uns_floato
2672;; convert long long unsigned to float
2673;; (Only odd words are valid, BE numbering)
2674(define_expand "unsfloatov2di"
2675 [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2676 (use (match_operand:V2DI 1 "register_operand" "wa"))]
2677 "VECTOR_UNIT_VSX_P (V4SFmode)"
2678{
2679 if (VECTOR_ELT_ORDER_BIG)
2680 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2681 else
2682 {
2683 /* Shift left one word to put odd word correct location */
2684 rtx rtx_tmp;
2685 rtx rtx_val = GEN_INT (4);
2686
2687 rtx_tmp = gen_reg_rtx (V4SFmode);
2688 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2689 emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2690 rtx_tmp, rtx_tmp, rtx_val));
2691 }
2692 DONE;
2693})
2694
e5898daf
CL
2695;; Generate vsigned2
2696;; convert two double float vectors to a vector of single precision ints
2697(define_expand "vsigned2_v2df"
2698 [(match_operand:V4SI 0 "register_operand" "=wa")
2699 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2700 (match_operand:V2DF 2 "register_operand" "wa")]
2701 UNSPEC_VSX_VSIGNED2)]
2702 "TARGET_VSX"
2703{
2704 rtx rtx_src1, rtx_src2, rtx_dst;
2705 bool signed_convert=true;
2706
2707 rtx_dst = operands[0];
2708 rtx_src1 = operands[1];
2709 rtx_src2 = operands[2];
2710
2711 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2712 DONE;
2713})
2714
2715;; Generate vsignedo_v2df
2716;; signed double float to int convert odd word
2717(define_expand "vsignedo_v2df"
2718 [(set (match_operand:V4SI 0 "register_operand" "=wa")
2719 (match_operand:V2DF 1 "register_operand" "wa"))]
2720 "TARGET_VSX"
2721{
2722 if (VECTOR_ELT_ORDER_BIG)
2723 {
2724 rtx rtx_tmp;
2725 rtx rtx_val = GEN_INT (12);
2726 rtx_tmp = gen_reg_rtx (V4SImode);
2727
2728 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2729
2730 /* Big endian word numbering for words in operand is 0 1 2 3.
2731 take (operand[1] operand[1]) and shift left one word
2732 0 1 2 3 0 1 2 3 => 1 2 3 0
2733 Words 1 and 3 are now are now where they need to be for result. */
2734
2735 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2736 rtx_tmp, rtx_val));
2737 }
2738 else
2739 /* Little endian word numbering for operand is 3 2 1 0.
2740 Result words 3 and 1 are where they need to be. */
2741 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2742
2743 DONE;
2744}
2745 [(set_attr "type" "veccomplex")])
2746
2747;; Generate vsignede_v2df
2748;; signed double float to int even word
2749(define_expand "vsignede_v2df"
2750 [(set (match_operand:V4SI 0 "register_operand" "=v")
2751 (match_operand:V2DF 1 "register_operand" "v"))]
2752 "TARGET_VSX"
2753{
2754 if (VECTOR_ELT_ORDER_BIG)
2755 /* Big endian word numbering for words in operand is 0 1
2756 Result words 0 is where they need to be. */
2757 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2758
2759 else
2760 {
2761 rtx rtx_tmp;
2762 rtx rtx_val = GEN_INT (12);
2763 rtx_tmp = gen_reg_rtx (V4SImode);
2764
2765 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2766
2767 /* Little endian word numbering for operand is 3 2 1 0.
2768 take (operand[1] operand[1]) and shift left three words
2769 0 1 2 3 0 1 2 3 => 3 0 1 2
2770 Words 0 and 2 are now where they need to be for the result. */
2771 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2772 rtx_tmp, rtx_val));
2773 }
2774 DONE;
2775}
2776 [(set_attr "type" "veccomplex")])
2777
2778;; Generate unsigned2
2779;; convert two double float vectors to a vector of single precision
2780;; unsigned ints
2781(define_expand "vunsigned2_v2df"
2782[(match_operand:V4SI 0 "register_operand" "=v")
2783 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2784 (match_operand:V2DF 2 "register_operand" "v")]
2785 UNSPEC_VSX_VSIGNED2)]
2786 "TARGET_VSX"
2787{
2788 rtx rtx_src1, rtx_src2, rtx_dst;
2789 bool signed_convert=false;
2790
2791 rtx_dst = operands[0];
2792 rtx_src1 = operands[1];
2793 rtx_src2 = operands[2];
2794
2795 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2796 DONE;
2797})
2798
2799;; Generate vunsignedo_v2df
2800;; unsigned double float to int convert odd word
2801(define_expand "vunsignedo_v2df"
2802 [(set (match_operand:V4SI 0 "register_operand" "=v")
2803 (match_operand:V2DF 1 "register_operand" "v"))]
2804 "TARGET_VSX"
2805{
2806 if (VECTOR_ELT_ORDER_BIG)
2807 {
2808 rtx rtx_tmp;
2809 rtx rtx_val = GEN_INT (12);
2810 rtx_tmp = gen_reg_rtx (V4SImode);
2811
2812 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2813
2814 /* Big endian word numbering for words in operand is 0 1 2 3.
2815 take (operand[1] operand[1]) and shift left one word
2816 0 1 2 3 0 1 2 3 => 1 2 3 0
2817 Words 1 and 3 are now are now where they need to be for result. */
2818
2819 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2820 rtx_tmp, rtx_val));
2821 }
2822 else
2823 /* Little endian word numbering for operand is 3 2 1 0.
2824 Result words 3 and 1 are where they need to be. */
2825 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2826
2827 DONE;
2828}
2829 [(set_attr "type" "veccomplex")])
2830
2831;; Generate vunsignede_v2df
2832;; unsigned double float to int even word
2833(define_expand "vunsignede_v2df"
2834 [(set (match_operand:V4SI 0 "register_operand" "=v")
2835 (match_operand:V2DF 1 "register_operand" "v"))]
2836 "TARGET_VSX"
2837{
2838 if (VECTOR_ELT_ORDER_BIG)
2839 /* Big endian word numbering for words in operand is 0 1
2840 Result words 0 is where they need to be. */
2841 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2842
2843 else
2844 {
2845 rtx rtx_tmp;
2846 rtx rtx_val = GEN_INT (12);
2847 rtx_tmp = gen_reg_rtx (V4SImode);
2848
2849 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2850
2851 /* Little endian word numbering for operand is 3 2 1 0.
2852 take (operand[1] operand[1]) and shift left three words
2853 0 1 2 3 0 1 2 3 => 3 0 1 2
2854 Words 0 and 2 are now where they need to be for the result. */
2855 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2856 rtx_tmp, rtx_val));
2857 }
2858 DONE;
2859}
2860 [(set_attr "type" "veccomplex")])
2861
688e4919 2862;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
0c307d8f 2863;; since the xvrdpiz instruction does not truncate the value if the floating
688e4919 2864;; point value is < LONG_MIN or > LONG_MAX.
0c307d8f
MM
2865(define_insn "*vsx_float_fix_v2df2"
2866 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
2867 (float:V2DF
2868 (fix:V2DI
2869 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
11d8d07e 2870 "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
0c307d8f 2871 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
688e4919 2872 && !flag_trapping_math && TARGET_FRIZ"
0c307d8f
MM
2873 "xvrdpiz %x0,%x1"
2874 [(set_attr "type" "vecdouble")
2875 (set_attr "fp_type" "fp_addsub_d")])
688e4919 2876
29e6733c
MM
2877\f
2878;; Permute operations
2879
2880;; Build a V2DF/V2DI vector from two scalars
2881(define_insn "vsx_concat_<mode>"
08c4c51e 2882 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
c6d5ff83 2883 (vec_concat:VSX_D
08c4c51e
MM
2884 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
2885 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
29e6733c 2886 "VECTOR_MEM_VSX_P (<MODE>mode)"
de75c876 2887{
e86aefb8
MM
2888 if (which_alternative == 0)
2889 return (BYTES_BIG_ENDIAN
2890 ? "xxpermdi %x0,%x1,%x2,0"
2891 : "xxpermdi %x0,%x2,%x1,0");
2892
2893 else if (which_alternative == 1)
2894 return (BYTES_BIG_ENDIAN
2895 ? "mtvsrdd %x0,%1,%2"
2896 : "mtvsrdd %x0,%2,%1");
2897
de75c876 2898 else
e86aefb8 2899 gcc_unreachable ();
de75c876 2900}
29e6733c
MM
2901 [(set_attr "type" "vecperm")])
2902
08c4c51e
MM
2903;; Combiner patterns to allow creating XXPERMDI's to access either double
2904;; word element in a vector register.
2905(define_insn "*vsx_concat_<mode>_1"
2906 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2907 (vec_concat:VSX_D
2908 (vec_select:<VS_scalar>
2909 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2910 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2911 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
2912 "VECTOR_MEM_VSX_P (<MODE>mode)"
2913{
2914 HOST_WIDE_INT dword = INTVAL (operands[2]);
2915 if (BYTES_BIG_ENDIAN)
2916 {
2917 operands[4] = GEN_INT (2*dword);
2918 return "xxpermdi %x0,%x1,%x3,%4";
2919 }
2920 else
2921 {
2922 operands[4] = GEN_INT (!dword);
2923 return "xxpermdi %x0,%x3,%x1,%4";
2924 }
2925}
2926 [(set_attr "type" "vecperm")])
2927
2928(define_insn "*vsx_concat_<mode>_2"
2929 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2930 (vec_concat:VSX_D
2931 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
2932 (vec_select:<VS_scalar>
2933 (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
2934 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
2935 "VECTOR_MEM_VSX_P (<MODE>mode)"
2936{
2937 HOST_WIDE_INT dword = INTVAL (operands[3]);
2938 if (BYTES_BIG_ENDIAN)
2939 {
2940 operands[4] = GEN_INT (dword);
2941 return "xxpermdi %x0,%x1,%x2,%4";
2942 }
2943 else
2944 {
2945 operands[4] = GEN_INT (2 * !dword);
2946 return "xxpermdi %x0,%x2,%x1,%4";
2947 }
2948}
2949 [(set_attr "type" "vecperm")])
2950
2951(define_insn "*vsx_concat_<mode>_3"
2952 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
2953 (vec_concat:VSX_D
2954 (vec_select:<VS_scalar>
2955 (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
2956 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
2957 (vec_select:<VS_scalar>
2958 (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
2959 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
2960 "VECTOR_MEM_VSX_P (<MODE>mode)"
2961{
2962 HOST_WIDE_INT dword1 = INTVAL (operands[2]);
2963 HOST_WIDE_INT dword2 = INTVAL (operands[4]);
2964 if (BYTES_BIG_ENDIAN)
2965 {
2966 operands[5] = GEN_INT ((2 * dword1) + dword2);
2967 return "xxpermdi %x0,%x1,%x3,%5";
2968 }
2969 else
2970 {
2971 operands[5] = GEN_INT ((2 * !dword2) + !dword1);
2972 return "xxpermdi %x0,%x3,%x1,%5";
2973 }
2974}
2975 [(set_attr "type" "vecperm")])
2976
29e6733c
MM
2977;; Special purpose concat using xxpermdi to glue two single precision values
2978;; together, relying on the fact that internally scalar floats are represented
2979;; as doubles. This is used to initialize a V4SF vector with 4 floats
2980(define_insn "vsx_concat_v2sf"
6019c0fc 2981 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
29e6733c 2982 (unspec:V2DF
6019c0fc
MM
2983 [(match_operand:SF 1 "vsx_register_operand" "ww")
2984 (match_operand:SF 2 "vsx_register_operand" "ww")]
29e6733c
MM
2985 UNSPEC_VSX_CONCAT))]
2986 "VECTOR_MEM_VSX_P (V2DFmode)"
de75c876
BS
2987{
2988 if (BYTES_BIG_ENDIAN)
2989 return "xxpermdi %x0,%x1,%x2,0";
2990 else
2991 return "xxpermdi %x0,%x2,%x1,0";
2992}
29e6733c
MM
2993 [(set_attr "type" "vecperm")])
2994
6019c0fc
MM
2995;; V4SImode initialization splitter
2996(define_insn_and_split "vsx_init_v4si"
2997 [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
2998 (unspec:V4SI
2999 [(match_operand:SI 1 "reg_or_cint_operand" "rn")
3000 (match_operand:SI 2 "reg_or_cint_operand" "rn")
3001 (match_operand:SI 3 "reg_or_cint_operand" "rn")
3002 (match_operand:SI 4 "reg_or_cint_operand" "rn")]
3003 UNSPEC_VSX_VEC_INIT))
3004 (clobber (match_scratch:DI 5 "=&r"))
3005 (clobber (match_scratch:DI 6 "=&r"))]
3006 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3007 "#"
3008 "&& reload_completed"
3009 [(const_int 0)]
3010{
3011 rs6000_split_v4si_init (operands);
3012 DONE;
3013})
3014
0cf68694
BS
3015;; xxpermdi for little endian loads and stores. We need several of
3016;; these since the form of the PARALLEL differs by mode.
3017(define_insn "*vsx_xxpermdi2_le_<mode>"
6579b156
BS
3018 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3019 (vec_select:VSX_D
3020 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
0cf68694
BS
3021 (parallel [(const_int 1) (const_int 0)])))]
3022 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3023 "xxpermdi %x0,%x1,%x1,2"
3024 [(set_attr "type" "vecperm")])
3025
3026(define_insn "*vsx_xxpermdi4_le_<mode>"
59f5868d 3027 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
0cf68694 3028 (vec_select:VSX_W
59f5868d 3029 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
0cf68694
BS
3030 (parallel [(const_int 2) (const_int 3)
3031 (const_int 0) (const_int 1)])))]
3032 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3033 "xxpermdi %x0,%x1,%x1,2"
3034 [(set_attr "type" "vecperm")])
3035
3036(define_insn "*vsx_xxpermdi8_le_V8HI"
3037 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3038 (vec_select:V8HI
3039 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3040 (parallel [(const_int 4) (const_int 5)
3041 (const_int 6) (const_int 7)
3042 (const_int 0) (const_int 1)
3043 (const_int 2) (const_int 3)])))]
3044 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)"
3045 "xxpermdi %x0,%x1,%x1,2"
3046 [(set_attr "type" "vecperm")])
3047
3048(define_insn "*vsx_xxpermdi16_le_V16QI"
3049 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3050 (vec_select:V16QI
3051 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3052 (parallel [(const_int 8) (const_int 9)
3053 (const_int 10) (const_int 11)
3054 (const_int 12) (const_int 13)
3055 (const_int 14) (const_int 15)
3056 (const_int 0) (const_int 1)
3057 (const_int 2) (const_int 3)
3058 (const_int 4) (const_int 5)
3059 (const_int 6) (const_int 7)])))]
3060 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)"
3061 "xxpermdi %x0,%x1,%x1,2"
3062 [(set_attr "type" "vecperm")])
3063
3064;; lxvd2x for little endian loads. We need several of
3065;; these since the form of the PARALLEL differs by mode.
3066(define_insn "*vsx_lxvd2x2_le_<mode>"
6579b156
BS
3067 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>")
3068 (vec_select:VSX_D
3069 (match_operand:VSX_D 1 "memory_operand" "Z")
0cf68694 3070 (parallel [(const_int 1) (const_int 0)])))]
5d57fdc1 3071 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
0cf68694
BS
3072 "lxvd2x %x0,%y1"
3073 [(set_attr "type" "vecload")])
3074
3075(define_insn "*vsx_lxvd2x4_le_<mode>"
59f5868d 3076 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
0cf68694
BS
3077 (vec_select:VSX_W
3078 (match_operand:VSX_W 1 "memory_operand" "Z")
3079 (parallel [(const_int 2) (const_int 3)
3080 (const_int 0) (const_int 1)])))]
5d57fdc1 3081 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
0cf68694
BS
3082 "lxvd2x %x0,%y1"
3083 [(set_attr "type" "vecload")])
3084
3085(define_insn "*vsx_lxvd2x8_le_V8HI"
3086 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3087 (vec_select:V8HI
3088 (match_operand:V8HI 1 "memory_operand" "Z")
3089 (parallel [(const_int 4) (const_int 5)
3090 (const_int 6) (const_int 7)
3091 (const_int 0) (const_int 1)
3092 (const_int 2) (const_int 3)])))]
5d57fdc1 3093 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
0cf68694
BS
3094 "lxvd2x %x0,%y1"
3095 [(set_attr "type" "vecload")])
3096
3097(define_insn "*vsx_lxvd2x16_le_V16QI"
3098 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3099 (vec_select:V16QI
3100 (match_operand:V16QI 1 "memory_operand" "Z")
3101 (parallel [(const_int 8) (const_int 9)
3102 (const_int 10) (const_int 11)
3103 (const_int 12) (const_int 13)
3104 (const_int 14) (const_int 15)
3105 (const_int 0) (const_int 1)
3106 (const_int 2) (const_int 3)
3107 (const_int 4) (const_int 5)
3108 (const_int 6) (const_int 7)])))]
5d57fdc1 3109 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
0cf68694
BS
3110 "lxvd2x %x0,%y1"
3111 [(set_attr "type" "vecload")])
3112
3113;; stxvd2x for little endian stores. We need several of
3114;; these since the form of the PARALLEL differs by mode.
3115(define_insn "*vsx_stxvd2x2_le_<mode>"
6579b156
BS
3116 [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3117 (vec_select:VSX_D
3118 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>")
0cf68694 3119 (parallel [(const_int 1) (const_int 0)])))]
5d57fdc1 3120 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
0cf68694
BS
3121 "stxvd2x %x1,%y0"
3122 [(set_attr "type" "vecstore")])
3123
3124(define_insn "*vsx_stxvd2x4_le_<mode>"
3125 [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3126 (vec_select:VSX_W
59f5868d 3127 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
0cf68694
BS
3128 (parallel [(const_int 2) (const_int 3)
3129 (const_int 0) (const_int 1)])))]
5d57fdc1 3130 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
0cf68694
BS
3131 "stxvd2x %x1,%y0"
3132 [(set_attr "type" "vecstore")])
3133
3134(define_insn "*vsx_stxvd2x8_le_V8HI"
3135 [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3136 (vec_select:V8HI
3137 (match_operand:V8HI 1 "vsx_register_operand" "wa")
3138 (parallel [(const_int 4) (const_int 5)
3139 (const_int 6) (const_int 7)
3140 (const_int 0) (const_int 1)
3141 (const_int 2) (const_int 3)])))]
5d57fdc1 3142 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
0cf68694
BS
3143 "stxvd2x %x1,%y0"
3144 [(set_attr "type" "vecstore")])
3145
3146(define_insn "*vsx_stxvd2x16_le_V16QI"
3147 [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3148 (vec_select:V16QI
3149 (match_operand:V16QI 1 "vsx_register_operand" "wa")
3150 (parallel [(const_int 8) (const_int 9)
3151 (const_int 10) (const_int 11)
3152 (const_int 12) (const_int 13)
3153 (const_int 14) (const_int 15)
3154 (const_int 0) (const_int 1)
3155 (const_int 2) (const_int 3)
3156 (const_int 4) (const_int 5)
3157 (const_int 6) (const_int 7)])))]
5d57fdc1 3158 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
0cf68694
BS
3159 "stxvd2x %x1,%y0"
3160 [(set_attr "type" "vecstore")])
3161
a16a872d
MM
3162;; Convert a TImode value into V1TImode
3163(define_expand "vsx_set_v1ti"
ad18eed2
SB
3164 [(match_operand:V1TI 0 "nonimmediate_operand")
3165 (match_operand:V1TI 1 "nonimmediate_operand")
3166 (match_operand:TI 2 "input_operand")
3167 (match_operand:QI 3 "u5bit_cint_operand")]
a16a872d
MM
3168 "VECTOR_MEM_VSX_P (V1TImode)"
3169{
3170 if (operands[3] != const0_rtx)
3171 gcc_unreachable ();
3172
3173 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3174 DONE;
3175})
3176
08c4c51e
MM
3177;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3178(define_expand "vsx_set_<mode>"
3179 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3180 (use (match_operand:VSX_D 1 "vsx_register_operand"))
3181 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
3182 (use (match_operand:QI 3 "const_0_to_1_operand"))]
29e6733c
MM
3183 "VECTOR_MEM_VSX_P (<MODE>mode)"
3184{
08c4c51e
MM
3185 rtx dest = operands[0];
3186 rtx vec_reg = operands[1];
3187 rtx value = operands[2];
3188 rtx ele = operands[3];
3189 rtx tmp = gen_reg_rtx (<VS_scalar>mode);
3190
3191 if (ele == const0_rtx)
3192 {
3193 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3194 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3195 DONE;
3196 }
3197 else if (ele == const1_rtx)
3198 {
3199 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3200 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3201 DONE;
3202 }
29e6733c
MM
3203 else
3204 gcc_unreachable ();
08c4c51e 3205})
29e6733c
MM
3206
3207;; Extract a DF/DI element from V2DF/V2DI
117f16fb
MM
3208;; Optimize cases were we can do a simple or direct move.
3209;; Or see if we can avoid doing the move at all
1a3c3ee9
MM
3210
3211;; There are some unresolved problems with reload that show up if an Altivec
3212;; register was picked. Limit the scalar value to FPRs for now.
3213
3214(define_insn "vsx_extract_<mode>"
e0d32185 3215 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr")
1a3c3ee9 3216
117f16fb 3217 (vec_select:<VS_scalar>
e0d32185 3218 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo")
1a3c3ee9 3219
117f16fb 3220 (parallel
e0d32185 3221 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))]
1a3c3ee9 3222 "VECTOR_MEM_VSX_P (<MODE>mode)"
117f16fb 3223{
1a3c3ee9 3224 int element = INTVAL (operands[2]);
117f16fb
MM
3225 int op0_regno = REGNO (operands[0]);
3226 int op1_regno = REGNO (operands[1]);
1a3c3ee9 3227 int fldDM;
117f16fb 3228
1a3c3ee9
MM
3229 gcc_assert (IN_RANGE (element, 0, 1));
3230 gcc_assert (VSX_REGNO_P (op1_regno));
117f16fb 3231
1a3c3ee9
MM
3232 if (element == VECTOR_ELEMENT_SCALAR_64BIT)
3233 {
3234 if (op0_regno == op1_regno)
3235 return ASM_COMMENT_START " vec_extract to same register";
117f16fb 3236
1a3c3ee9
MM
3237 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE
3238 && TARGET_POWERPC64)
3239 return "mfvsrd %0,%x1";
117f16fb 3240
1a3c3ee9
MM
3241 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
3242 return "fmr %0,%1";
117f16fb 3243
1a3c3ee9
MM
3244 else if (VSX_REGNO_P (op0_regno))
3245 return "xxlor %x0,%x1,%x1";
117f16fb 3246
1a3c3ee9
MM
3247 else
3248 gcc_unreachable ();
3249 }
117f16fb 3250
1a3c3ee9
MM
3251 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno)
3252 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
98060bbe 3253 return "mfvsrld %0,%x1";
117f16fb 3254
1a3c3ee9
MM
3255 else if (VSX_REGNO_P (op0_regno))
3256 {
3257 fldDM = element << 1;
3258 if (!BYTES_BIG_ENDIAN)
3259 fldDM = 3 - fldDM;
3260 operands[3] = GEN_INT (fldDM);
3261 return "xxpermdi %x0,%x1,%x1,%3";
117f16fb
MM
3262 }
3263
1a3c3ee9
MM
3264 else
3265 gcc_unreachable ();
29e6733c 3266}
7c788ce2 3267 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")])
29e6733c 3268
d0047a25
MM
3269;; Optimize extracting a single scalar element from memory.
3270(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3271 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr")
3272 (vec_select:<VSX_D:VS_scalar>
3273 (match_operand:VSX_D 1 "memory_operand" "m,m")
3274 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3275 (clobber (match_scratch:P 3 "=&b,&b"))]
3276 "VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3277 "#"
3278 "&& reload_completed"
3279 [(set (match_dup 0) (match_dup 4))]
3280{
3281 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3282 operands[3], <VSX_D:VS_scalar>mode);
3283}
3284 [(set_attr "type" "fpload,load")
3285 (set_attr "length" "8")])
117f16fb
MM
3286
3287;; Optimize storing a single scalar element that is the right location to
3288;; memory
3289(define_insn "*vsx_extract_<mode>_store"
b5aa1281 3290 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY")
27b097f8 3291 (vec_select:<VS_scalar>
d0047a25 3292 (match_operand:VSX_D 1 "register_operand" "d,wv,wb")
117f16fb
MM
3293 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
3294 "VECTOR_MEM_VSX_P (<MODE>mode)"
3295 "@
3296 stfd%U0%X0 %1,%0
3297 stxsd%U0x %x1,%y0
d0047a25 3298 stxsd %1,%0"
d839f53b 3299 [(set_attr "type" "fpstore")
117f16fb 3300 (set_attr "length" "4")])
27b097f8 3301
e0d32185
MM
3302;; Variable V2DI/V2DF extract shift
3303(define_insn "vsx_vslo_<mode>"
3304 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
3305 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3306 (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3307 UNSPEC_VSX_VSLO))]
3308 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3309 "vslo %0,%1,%2"
3310 [(set_attr "type" "vecperm")])
3311
3312;; Variable V2DI/V2DF extract
3313(define_insn_and_split "vsx_extract_<mode>_var"
d0047a25
MM
3314 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r")
3315 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m")
3316 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
e0d32185 3317 UNSPEC_VSX_EXTRACT))
d0047a25
MM
3318 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3319 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
e0d32185
MM
3320 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3321 "#"
3322 "&& reload_completed"
3323 [(const_int 0)]
3324{
3325 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3326 operands[3], operands[4]);
3327 DONE;
3328})
3329
df10b6d4
MM
3330;; Extract a SF element from V4SF
3331(define_insn_and_split "vsx_extract_v4sf"
e2a99194 3332 [(set (match_operand:SF 0 "vsx_register_operand" "=ww")
df10b6d4 3333 (vec_select:SF
e2a99194
MM
3334 (match_operand:V4SF 1 "vsx_register_operand" "wa")
3335 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3336 (clobber (match_scratch:V4SF 3 "=0"))]
df10b6d4 3337 "VECTOR_UNIT_VSX_P (V4SFmode)"
e2a99194
MM
3338 "#"
3339 "&& 1"
df10b6d4 3340 [(const_int 0)]
df10b6d4
MM
3341{
3342 rtx op0 = operands[0];
3343 rtx op1 = operands[1];
3344 rtx op2 = operands[2];
3345 rtx op3 = operands[3];
3346 rtx tmp;
27b097f8 3347 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
df10b6d4
MM
3348
3349 if (ele == 0)
3350 tmp = op1;
3351 else
3352 {
3353 if (GET_CODE (op3) == SCRATCH)
3354 op3 = gen_reg_rtx (V4SFmode);
6a742466 3355 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
df10b6d4
MM
3356 tmp = op3;
3357 }
3358 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3359 DONE;
e2a99194
MM
3360}
3361 [(set_attr "length" "8")
df10b6d4
MM
3362 (set_attr "type" "fp")])
3363
e2a99194
MM
3364(define_insn_and_split "*vsx_extract_v4sf_<mode>_load"
3365 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r")
3366 (vec_select:SF
3367 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3368 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3369 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3370 "VECTOR_MEM_VSX_P (V4SFmode)"
3371 "#"
3372 "&& reload_completed"
3373 [(set (match_dup 0) (match_dup 4))]
3374{
3375 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3376 operands[3], SFmode);
3377}
3378 [(set_attr "type" "fpload,fpload,fpload,load")
3379 (set_attr "length" "8")])
3380
3381;; Variable V4SF extract
3382(define_insn_and_split "vsx_extract_v4sf_var"
3383 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r")
3384 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m")
3385 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3386 UNSPEC_VSX_EXTRACT))
3387 (clobber (match_scratch:DI 3 "=r,&b,&b"))
3388 (clobber (match_scratch:V2DI 4 "=&v,X,X"))]
19970253 3389 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
e2a99194
MM
3390 "#"
3391 "&& reload_completed"
3392 [(const_int 0)]
3393{
3394 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3395 operands[3], operands[4]);
3396 DONE;
3397})
3398
5aebfdad
RH
3399;; Expand the builtin form of xxpermdi to canonical rtl.
3400(define_expand "vsx_xxpermdi_<mode>"
a530e181
BS
3401 [(match_operand:VSX_L 0 "vsx_register_operand")
3402 (match_operand:VSX_L 1 "vsx_register_operand")
3403 (match_operand:VSX_L 2 "vsx_register_operand")
3404 (match_operand:QI 3 "u5bit_cint_operand")]
3405 "VECTOR_MEM_VSX_P (<MODE>mode)"
3406{
3407 rtx target = operands[0];
3408 rtx op0 = operands[1];
3409 rtx op1 = operands[2];
3410 int mask = INTVAL (operands[3]);
3411 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3412 rtx perm1 = GEN_INT ((mask & 1) + 2);
3413 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3414
3415 if (<MODE>mode == V2DFmode)
3416 gen = gen_vsx_xxpermdi2_v2df_1;
3417 else
3418 {
3419 gen = gen_vsx_xxpermdi2_v2di_1;
3420 if (<MODE>mode != V2DImode)
3421 {
3422 target = gen_lowpart (V2DImode, target);
3423 op0 = gen_lowpart (V2DImode, op0);
3424 op1 = gen_lowpart (V2DImode, op1);
3425 }
3426 }
3427 emit_insn (gen (target, op0, op1, perm0, perm1));
3428 DONE;
3429})
3430
3431;; Special version of xxpermdi that retains big-endian semantics.
3432(define_expand "vsx_xxpermdi_<mode>_be"
3433 [(match_operand:VSX_L 0 "vsx_register_operand")
3434 (match_operand:VSX_L 1 "vsx_register_operand")
3435 (match_operand:VSX_L 2 "vsx_register_operand")
3436 (match_operand:QI 3 "u5bit_cint_operand")]
29e6733c 3437 "VECTOR_MEM_VSX_P (<MODE>mode)"
5aebfdad
RH
3438{
3439 rtx target = operands[0];
3440 rtx op0 = operands[1];
3441 rtx op1 = operands[2];
3442 int mask = INTVAL (operands[3]);
3443 rtx perm0 = GEN_INT ((mask >> 1) & 1);
3444 rtx perm1 = GEN_INT ((mask & 1) + 2);
3445 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3446
3447 if (<MODE>mode == V2DFmode)
3448 gen = gen_vsx_xxpermdi2_v2df_1;
3449 else
3450 {
3451 gen = gen_vsx_xxpermdi2_v2di_1;
3452 if (<MODE>mode != V2DImode)
3453 {
3454 target = gen_lowpart (V2DImode, target);
c6d5ff83
MM
3455 op0 = gen_lowpart (V2DImode, op0);
3456 op1 = gen_lowpart (V2DImode, op1);
5aebfdad
RH
3457 }
3458 }
54c4bfd7
BS
3459 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3460 transformation we don't want; it is necessary for
3461 rs6000_expand_vec_perm_const_1 but not for this use. So we
3462 prepare for that by reversing the transformation here. */
3463 if (BYTES_BIG_ENDIAN)
3464 emit_insn (gen (target, op0, op1, perm0, perm1));
3465 else
3466 {
3467 rtx p0 = GEN_INT (3 - INTVAL (perm1));
3468 rtx p1 = GEN_INT (3 - INTVAL (perm0));
3469 emit_insn (gen (target, op1, op0, p0, p1));
3470 }
5aebfdad
RH
3471 DONE;
3472})
29e6733c 3473
5aebfdad 3474(define_insn "vsx_xxpermdi2_<mode>_1"
29e6733c 3475 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd")
5aebfdad
RH
3476 (vec_select:VSX_D
3477 (vec_concat:<VS_double>
3478 (match_operand:VSX_D 1 "vsx_register_operand" "wd")
3479 (match_operand:VSX_D 2 "vsx_register_operand" "wd"))
3480 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3481 (match_operand 4 "const_2_to_3_operand" "")])))]
29e6733c
MM
3482 "VECTOR_MEM_VSX_P (<MODE>mode)"
3483{
8adcc78b
BS
3484 int op3, op4, mask;
3485
3486 /* For little endian, swap operands and invert/swap selectors
3487 to get the correct xxpermdi. The operand swap sets up the
3488 inputs as a little endian array. The selectors are swapped
3489 because they are defined to use big endian ordering. The
3490 selectors are inverted to get the correct doublewords for
3491 little endian ordering. */
3492 if (BYTES_BIG_ENDIAN)
3493 {
3494 op3 = INTVAL (operands[3]);
3495 op4 = INTVAL (operands[4]);
3496 }
3497 else
3498 {
3499 op3 = 3 - INTVAL (operands[4]);
3500 op4 = 3 - INTVAL (operands[3]);
3501 }
3502
3503 mask = (op3 << 1) | (op4 - 2);
5aebfdad 3504 operands[3] = GEN_INT (mask);
8adcc78b
BS
3505
3506 if (BYTES_BIG_ENDIAN)
3507 return "xxpermdi %x0,%x1,%x2,%3";
3508 else
3509 return "xxpermdi %x0,%x2,%x1,%3";
29e6733c
MM
3510}
3511 [(set_attr "type" "vecperm")])
3512
bcb9a772
MM
3513;; Extraction of a single element in a small integer vector. Until ISA 3.0,
3514;; none of the small types were allowed in a vector register, so we had to
3515;; extract to a DImode and either do a direct move or store.
e2a99194 3516(define_expand "vsx_extract_<mode>"
456f0dfa 3517 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand")
e2a99194
MM
3518 (vec_select:<VS_scalar>
3519 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3520 (parallel [(match_operand:QI 2 "const_int_operand")])))
456f0dfa 3521 (clobber (match_scratch:VSX_EXTRACT_I 3))])]
e2a99194
MM
3522 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3523{
456f0dfa 3524 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */
9bfda664 3525 if (TARGET_P9_VECTOR)
c5e74d9d 3526 {
456f0dfa
MM
3527 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3528 operands[2]));
3529 DONE;
c5e74d9d 3530 }
456f0dfa 3531})
c5e74d9d 3532
456f0dfa 3533(define_insn "vsx_extract_<mode>_p9"
16370e79 3534 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>")
456f0dfa 3535 (vec_select:<VS_scalar>
16370e79
MM
3536 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3537 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3538 (clobber (match_scratch:SI 3 "=r,X"))]
9bfda664 3539 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
c5e74d9d 3540{
16370e79
MM
3541 if (which_alternative == 0)
3542 return "#";
c5e74d9d 3543
c5e74d9d 3544 else
16370e79
MM
3545 {
3546 HOST_WIDE_INT elt = INTVAL (operands[2]);
3547 HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG
3548 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3549 : elt);
3550
3551 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3552 HOST_WIDE_INT offset = unit_size * elt_adj;
3553
3554 operands[2] = GEN_INT (offset);
3555 if (unit_size == 4)
3556 return "xxextractuw %x0,%x1,%2";
3557 else
3558 return "vextractu<wd> %0,%1,%2";
3559 }
c5e74d9d
MM
3560}
3561 [(set_attr "type" "vecsimple")])
3562
16370e79
MM
3563(define_split
3564 [(set (match_operand:<VS_scalar> 0 "int_reg_operand")
3565 (vec_select:<VS_scalar>
3566 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3567 (parallel [(match_operand:QI 2 "const_int_operand")])))
3568 (clobber (match_operand:SI 3 "int_reg_operand"))]
9bfda664 3569 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
16370e79
MM
3570 [(const_int 0)]
3571{
3572 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3573 rtx op1 = operands[1];
3574 rtx op2 = operands[2];
3575 rtx op3 = operands[3];
3576 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3577
3578 emit_move_insn (op3, GEN_INT (offset));
3579 if (VECTOR_ELT_ORDER_BIG)
3580 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3581 else
3582 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3583 DONE;
3584})
3585
456f0dfa
MM
3586;; Optimize zero extracts to eliminate the AND after the extract.
3587(define_insn_and_split "*vsx_extract_<mode>_di_p9"
16370e79 3588 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
456f0dfa
MM
3589 (zero_extend:DI
3590 (vec_select:<VS_scalar>
16370e79
MM
3591 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
3592 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3593 (clobber (match_scratch:SI 3 "=r,X"))]
9bfda664 3594 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
456f0dfa
MM
3595 "#"
3596 "&& reload_completed"
16370e79
MM
3597 [(parallel [(set (match_dup 4)
3598 (vec_select:<VS_scalar>
3599 (match_dup 1)
3600 (parallel [(match_dup 2)])))
3601 (clobber (match_dup 3))])]
456f0dfa 3602{
16370e79 3603 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0]));
456f0dfa
MM
3604})
3605
3606;; Optimize stores to use the ISA 3.0 scalar store instructions
3607(define_insn_and_split "*vsx_extract_<mode>_store_p9"
16370e79 3608 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m")
456f0dfa 3609 (vec_select:<VS_scalar>
60fb638f 3610 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
16370e79
MM
3611 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3612 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
3613 (clobber (match_scratch:SI 4 "=X,&r"))]
9bfda664 3614 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
456f0dfa
MM
3615 "#"
3616 "&& reload_completed"
16370e79
MM
3617 [(parallel [(set (match_dup 3)
3618 (vec_select:<VS_scalar>
3619 (match_dup 1)
3620 (parallel [(match_dup 2)])))
3621 (clobber (match_dup 4))])
456f0dfa
MM
3622 (set (match_dup 0)
3623 (match_dup 3))])
3624
787c7a65 3625(define_insn_and_split "*vsx_extract_si"
156b5cca 3626 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
787c7a65 3627 (vec_select:SI
156b5cca
MM
3628 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
3629 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3630 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
9bfda664 3631 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
787c7a65
MM
3632 "#"
3633 "&& reload_completed"
3634 [(const_int 0)]
3635{
3636 rtx dest = operands[0];
3637 rtx src = operands[1];
3638 rtx element = operands[2];
3639 rtx vec_tmp = operands[3];
3640 int value;
3641
3642 if (!VECTOR_ELT_ORDER_BIG)
3643 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3644
3645 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3646 instruction. */
3647 value = INTVAL (element);
3648 if (value != 1)
9bfda664 3649 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
787c7a65
MM
3650 else
3651 vec_tmp = src;
3652
3653 if (MEM_P (operands[0]))
3654 {
3655 if (can_create_pseudo_p ())
3656 dest = rs6000_address_for_fpconvert (dest);
3657
9bfda664 3658 if (TARGET_P8_VECTOR)
787c7a65
MM
3659 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3660 else
3661 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3662 }
3663
9bfda664 3664 else if (TARGET_P8_VECTOR)
787c7a65
MM
3665 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3666 else
3667 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3668 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3669
3670 DONE;
3671}
156b5cca 3672 [(set_attr "type" "mftgpr,vecperm,fpstore")
787c7a65
MM
3673 (set_attr "length" "8")])
3674
e2a99194
MM
3675(define_insn_and_split "*vsx_extract_<mode>_p8"
3676 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r")
3677 (vec_select:<VS_scalar>
787c7a65 3678 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
e2a99194 3679 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
787c7a65 3680 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
456f0dfa 3681 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
9bfda664 3682 && !TARGET_P9_VECTOR"
e2a99194
MM
3683 "#"
3684 "&& reload_completed"
3685 [(const_int 0)]
3686{
3687 rtx dest = operands[0];
3688 rtx src = operands[1];
3689 rtx element = operands[2];
3690 rtx vec_tmp = operands[3];
3691 int value;
3692
3693 if (!VECTOR_ELT_ORDER_BIG)
3694 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3695
3696 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3697 instruction. */
3698 value = INTVAL (element);
3699 if (<MODE>mode == V16QImode)
3700 {
3701 if (value != 7)
3702 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3703 else
3704 vec_tmp = src;
3705 }
3706 else if (<MODE>mode == V8HImode)
3707 {
3708 if (value != 3)
3709 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3710 else
3711 vec_tmp = src;
3712 }
e2a99194
MM
3713 else
3714 gcc_unreachable ();
3715
3716 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3717 gen_rtx_REG (DImode, REGNO (vec_tmp)));
3718 DONE;
3719}
3720 [(set_attr "type" "mftgpr")])
3721
3722;; Optimize extracting a single scalar element from memory.
3723(define_insn_and_split "*vsx_extract_<mode>_load"
3724 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r")
3725 (vec_select:<VS_scalar>
3726 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3727 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3728 (clobber (match_scratch:DI 3 "=&b"))]
3729 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3730 "#"
3731 "&& reload_completed"
3732 [(set (match_dup 0) (match_dup 4))]
3733{
3734 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3735 operands[3], <VS_scalar>mode);
3736}
3737 [(set_attr "type" "load")
3738 (set_attr "length" "8")])
3739
3740;; Variable V16QI/V8HI/V4SI extract
3741(define_insn_and_split "vsx_extract_<mode>_var"
16370e79 3742 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r")
e2a99194 3743 (unspec:<VS_scalar>
16370e79
MM
3744 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3745 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
e2a99194 3746 UNSPEC_VSX_EXTRACT))
16370e79
MM
3747 (clobber (match_scratch:DI 3 "=r,r,&b"))
3748 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
e2a99194
MM
3749 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3750 "#"
3751 "&& reload_completed"
3752 [(const_int 0)]
3753{
3754 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3755 operands[3], operands[4]);
3756 DONE;
3757})
c5e74d9d 3758
16370e79
MM
3759(define_insn_and_split "*vsx_extract_<VSX_EXTRACT_I:mode>_<SDI:mode>_var"
3760 [(set (match_operand:SDI 0 "gpc_reg_operand" "=r,r,r")
3761 (zero_extend:SDI
3762 (unspec:<VSX_EXTRACT_I:VS_scalar>
3763 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m")
3764 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")]
3765 UNSPEC_VSX_EXTRACT)))
3766 (clobber (match_scratch:DI 3 "=r,r,&b"))
3767 (clobber (match_scratch:V2DI 4 "=X,&v,X"))]
3768 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3769 "#"
3770 "&& reload_completed"
3771 [(const_int 0)]
3772{
3773 machine_mode smode = <VSX_EXTRACT_I:MODE>mode;
3774 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])),
3775 operands[1], operands[2],
3776 operands[3], operands[4]);
3777 DONE;
3778})
3779
156b5cca
MM
3780;; VSX_EXTRACT optimizations
3781;; Optimize double d = (double) vec_extract (vi, <n>)
3782;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
3783(define_insn_and_split "*vsx_extract_si_<uns>float_df"
3784 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
3785 (any_float:DF
3786 (vec_select:SI
3787 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3788 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3789 (clobber (match_scratch:V4SI 3 "=v"))]
3790 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3791 "#"
3792 "&& 1"
3793 [(const_int 0)]
3794{
3795 rtx dest = operands[0];
3796 rtx src = operands[1];
3797 rtx element = operands[2];
3798 rtx v4si_tmp = operands[3];
3799 int value;
3800
3801 if (!VECTOR_ELT_ORDER_BIG)
3802 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3803
3804 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3805 instruction. */
3806 value = INTVAL (element);
3807 if (value != 0)
3808 {
3809 if (GET_CODE (v4si_tmp) == SCRATCH)
3810 v4si_tmp = gen_reg_rtx (V4SImode);
3811 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3812 }
3813 else
3814 v4si_tmp = src;
3815
3816 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
3817 DONE;
3818})
3819
3820;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
3821;; where <type> is a floating point type that supported by the hardware that is
3822;; not double. First convert the value to double, and then to the desired
3823;; type.
3824(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
3825 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
3826 (any_float:VSX_EXTRACT_FL
3827 (vec_select:SI
3828 (match_operand:V4SI 1 "gpc_reg_operand" "v")
3829 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
3830 (clobber (match_scratch:V4SI 3 "=v"))
3831 (clobber (match_scratch:DF 4 "=ws"))]
3832 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3833 "#"
3834 "&& 1"
3835 [(const_int 0)]
3836{
3837 rtx dest = operands[0];
3838 rtx src = operands[1];
3839 rtx element = operands[2];
3840 rtx v4si_tmp = operands[3];
3841 rtx df_tmp = operands[4];
3842 int value;
3843
3844 if (!VECTOR_ELT_ORDER_BIG)
3845 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
3846
3847 /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3848 instruction. */
3849 value = INTVAL (element);
3850 if (value != 0)
3851 {
3852 if (GET_CODE (v4si_tmp) == SCRATCH)
3853 v4si_tmp = gen_reg_rtx (V4SImode);
3854 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
3855 }
3856 else
3857 v4si_tmp = src;
3858
3859 if (GET_CODE (df_tmp) == SCRATCH)
3860 df_tmp = gen_reg_rtx (DFmode);
3861
3862 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
3863
3864 if (<MODE>mode == SFmode)
3865 emit_insn (gen_truncdfsf2 (dest, df_tmp));
3866 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
3867 emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
3868 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
3869 && TARGET_FLOAT128_HW)
3870 emit_insn (gen_extenddftf2_hw (dest, df_tmp));
3871 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
3872 emit_insn (gen_extenddfif2 (dest, df_tmp));
3873 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
3874 emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
3875 else
3876 gcc_unreachable ();
3877
3878 DONE;
3879})
3880
16370e79
MM
3881;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
3882;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
3883;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
3884;; vector short or vector unsigned short.
3885(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>"
3886 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3887 (float:FL_CONV
3888 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3889 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3890 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3891 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3892 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
9bfda664 3893 && TARGET_P9_VECTOR"
16370e79
MM
3894 "#"
3895 "&& reload_completed"
3896 [(parallel [(set (match_dup 3)
3897 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3898 (match_dup 1)
3899 (parallel [(match_dup 2)])))
3900 (clobber (scratch:SI))])
3901 (set (match_dup 4)
3902 (sign_extend:DI (match_dup 3)))
3903 (set (match_dup 0)
3904 (float:<FL_CONV:MODE> (match_dup 4)))]
3905{
3906 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3907})
3908
3909(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>"
3910 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>")
3911 (unsigned_float:FL_CONV
3912 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3913 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
3914 (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
3915 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
3916 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
9bfda664 3917 && TARGET_P9_VECTOR"
16370e79
MM
3918 "#"
3919 "&& reload_completed"
3920 [(parallel [(set (match_dup 3)
3921 (vec_select:<VSX_EXTRACT_I:VS_scalar>
3922 (match_dup 1)
3923 (parallel [(match_dup 2)])))
3924 (clobber (scratch:SI))])
3925 (set (match_dup 0)
3926 (float:<FL_CONV:MODE> (match_dup 4)))]
3927{
3928 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
3929})
3930
bcb9a772
MM
3931;; V4SI/V8HI/V16QI set operation on ISA 3.0
3932(define_insn "vsx_set_<mode>_p9"
3933 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
3934 (unspec:VSX_EXTRACT_I
3935 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
3936 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
3937 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
3938 UNSPEC_VSX_SET))]
9bfda664 3939 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
bcb9a772
MM
3940{
3941 int ele = INTVAL (operands[3]);
3942 int nunits = GET_MODE_NUNITS (<MODE>mode);
3943
3944 if (!VECTOR_ELT_ORDER_BIG)
3945 ele = nunits - 1 - ele;
3946
bc28bbb6 3947 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele);
bcb9a772
MM
3948 if (<MODE>mode == V4SImode)
3949 return "xxinsertw %x0,%x2,%3";
3950 else
3951 return "vinsert<wd> %0,%2,%3";
3952}
3953 [(set_attr "type" "vecperm")])
3954
16122c22
MM
3955(define_insn_and_split "vsx_set_v4sf_p9"
3956 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3957 (unspec:V4SF
3958 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3959 (match_operand:SF 2 "gpc_reg_operand" "ww")
3960 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3961 UNSPEC_VSX_SET))
3962 (clobber (match_scratch:SI 4 "=&wJwK"))]
9bfda664 3963 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
16122c22
MM
3964 "#"
3965 "&& reload_completed"
3966 [(set (match_dup 5)
3967 (unspec:V4SF [(match_dup 2)]
3968 UNSPEC_VSX_CVDPSPN))
3969 (parallel [(set (match_dup 4)
3970 (vec_select:SI (match_dup 6)
3971 (parallel [(match_dup 7)])))
3972 (clobber (scratch:SI))])
3973 (set (match_dup 8)
3974 (unspec:V4SI [(match_dup 8)
3975 (match_dup 4)
3976 (match_dup 3)]
3977 UNSPEC_VSX_SET))]
3978{
3979 unsigned int tmp_regno = reg_or_subregno (operands[4]);
3980
3981 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
3982 operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
3983 operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
3984 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
3985}
3986 [(set_attr "type" "vecperm")
3987 (set_attr "length" "12")])
3988
3989;; Special case setting 0.0f to a V4SF element
3990(define_insn_and_split "*vsx_set_v4sf_p9_zero"
3991 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
3992 (unspec:V4SF
3993 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
3994 (match_operand:SF 2 "zero_fp_constant" "j")
3995 (match_operand:QI 3 "const_0_to_3_operand" "n")]
3996 UNSPEC_VSX_SET))
3997 (clobber (match_scratch:SI 4 "=&wJwK"))]
9bfda664 3998 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
16122c22
MM
3999 "#"
4000 "&& reload_completed"
4001 [(set (match_dup 4)
4002 (const_int 0))
4003 (set (match_dup 5)
4004 (unspec:V4SI [(match_dup 5)
4005 (match_dup 4)
4006 (match_dup 3)]
4007 UNSPEC_VSX_SET))]
4008{
4009 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4010}
4011 [(set_attr "type" "vecperm")
4012 (set_attr "length" "8")])
4013
4014;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4015;; that is in the default scalar position (1 for big endian, 2 for little
4016;; endian). We just need to do an xxinsertw since the element is in the
4017;; correct location.
4018
4019(define_insn "*vsx_insert_extract_v4sf_p9"
4020 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4021 (unspec:V4SF
4022 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4023 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4024 (parallel
4025 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4026 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4027 UNSPEC_VSX_SET))]
9bfda664 4028 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
16122c22
MM
4029 && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4030{
4031 int ele = INTVAL (operands[4]);
4032
4033 if (!VECTOR_ELT_ORDER_BIG)
4034 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
4035
4036 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4037 return "xxinsertw %x0,%x2,%4";
4038}
4039 [(set_attr "type" "vecperm")])
4040
4041;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4042;; that is in the default scalar position (1 for big endian, 2 for little
4043;; endian). Convert the insert/extract to int and avoid doing the conversion.
4044
4045(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4046 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4047 (unspec:V4SF
4048 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4049 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4050 (parallel
4051 [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4052 (match_operand:QI 4 "const_0_to_3_operand" "n")]
4053 UNSPEC_VSX_SET))
4054 (clobber (match_scratch:SI 5 "=&wJwK"))]
4055 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
9bfda664 4056 && TARGET_P9_VECTOR && TARGET_POWERPC64
16122c22
MM
4057 && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
4058 "#"
4059 "&& 1"
4060 [(parallel [(set (match_dup 5)
4061 (vec_select:SI (match_dup 6)
4062 (parallel [(match_dup 3)])))
4063 (clobber (scratch:SI))])
4064 (set (match_dup 7)
4065 (unspec:V4SI [(match_dup 8)
4066 (match_dup 5)
4067 (match_dup 4)]
4068 UNSPEC_VSX_SET))]
4069{
4070 if (GET_CODE (operands[5]) == SCRATCH)
4071 operands[5] = gen_reg_rtx (SImode);
4072
4073 operands[6] = gen_lowpart (V4SImode, operands[2]);
4074 operands[7] = gen_lowpart (V4SImode, operands[0]);
4075 operands[8] = gen_lowpart (V4SImode, operands[1]);
4076}
4077 [(set_attr "type" "vecperm")])
4078
5aebfdad
RH
4079;; Expanders for builtins
4080(define_expand "vsx_mergel_<mode>"
ad18eed2
SB
4081 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4082 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4083 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
5aebfdad 4084 "VECTOR_MEM_VSX_P (<MODE>mode)"
68d3bacf
BS
4085{
4086 rtvec v;
4087 rtx x;
4088
4089 /* Special handling for LE with -maltivec=be. */
4090 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4091 {
4092 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4093 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4094 }
4095 else
4096 {
4097 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4098 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4099 }
4100
4101 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
f7df4a84 4102 emit_insn (gen_rtx_SET (operands[0], x));
c3e96073 4103 DONE;
68d3bacf 4104})
5aebfdad
RH
4105
4106(define_expand "vsx_mergeh_<mode>"
ad18eed2
SB
4107 [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4108 (use (match_operand:VSX_D 1 "vsx_register_operand"))
4109 (use (match_operand:VSX_D 2 "vsx_register_operand"))]
5aebfdad 4110 "VECTOR_MEM_VSX_P (<MODE>mode)"
68d3bacf
BS
4111{
4112 rtvec v;
4113 rtx x;
4114
4115 /* Special handling for LE with -maltivec=be. */
4116 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
4117 {
4118 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4119 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]);
4120 }
4121 else
4122 {
4123 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4124 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4125 }
4126
4127 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
f7df4a84 4128 emit_insn (gen_rtx_SET (operands[0], x));
c3e96073 4129 DONE;
68d3bacf 4130})
5aebfdad 4131
29e6733c 4132;; V2DF/V2DI splat
d1f6caae
MM
4133;; We separate the register splat insn from the memory splat insn to force the
4134;; register allocator to generate the indexed form of the SPLAT when it is
4135;; given an offsettable memory reference. Otherwise, if the register and
4136;; memory insns were combined into a single insn, the register allocator will
4137;; load the value into a register, and then do a double word permute.
4138(define_expand "vsx_splat_<mode>"
4139 [(set (match_operand:VSX_D 0 "vsx_register_operand")
29e6733c 4140 (vec_duplicate:VSX_D
d1f6caae
MM
4141 (match_operand:<VS_scalar> 1 "input_operand")))]
4142 "VECTOR_MEM_VSX_P (<MODE>mode)"
4143{
4144 rtx op1 = operands[1];
4145 if (MEM_P (op1))
4146 operands[1] = rs6000_address_for_fpconvert (op1);
4147 else if (!REG_P (op1))
4148 op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
4149})
4150
4151(define_insn "vsx_splat_<mode>_reg"
4152 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
4153 (vec_duplicate:VSX_D
4154 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
29e6733c
MM
4155 "VECTOR_MEM_VSX_P (<MODE>mode)"
4156 "@
29e6733c 4157 xxpermdi %x0,%x1,%x1,0
d1f6caae
MM
4158 mtvsrdd %x0,%1,%1"
4159 [(set_attr "type" "vecperm")])
4160
4161(define_insn "vsx_splat_<VSX_D:mode>_mem"
4162 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>")
4163 (vec_duplicate:VSX_D
4164 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))]
4165 "VECTOR_MEM_VSX_P (<MODE>mode)"
4166 "lxvdsx %x0,%y1"
4167 [(set_attr "type" "vecload")])
29e6733c 4168
6019c0fc
MM
4169;; V4SI splat support
4170(define_insn "vsx_splat_v4si"
4171 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")
50c78b9a 4172 (vec_duplicate:V4SI
58f2fb5c 4173 (match_operand:SI 1 "splat_input_operand" "r,Z")))]
50c78b9a
MM
4174 "TARGET_P9_VECTOR"
4175 "@
4176 mtvsrws %x0,%1
4177 lxvwsx %x0,%y1"
6019c0fc
MM
4178 [(set_attr "type" "vecperm,vecload")])
4179
4180;; SImode is not currently allowed in vector registers. This pattern
4181;; allows us to use direct move to get the value in a vector register
4182;; so that we can use XXSPLTW
4183(define_insn "vsx_splat_v4si_di"
4184 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4185 (vec_duplicate:V4SI
4186 (truncate:SI
4187 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))]
4188 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4189 "@
4190 xxspltw %x0,%x1,1
4191 mtvsrws %x0,%1"
4192 [(set_attr "type" "vecperm")])
50c78b9a
MM
4193
4194;; V4SF splat (ISA 3.0)
6019c0fc 4195(define_insn_and_split "vsx_splat_v4sf"
50c78b9a
MM
4196 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4197 (vec_duplicate:V4SF
58f2fb5c 4198 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))]
50c78b9a
MM
4199 "TARGET_P9_VECTOR"
4200 "@
4201 lxvwsx %x0,%y1
4202 #
4203 mtvsrws %x0,%1"
4204 "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4205 [(set (match_dup 0)
4206 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4207 (set (match_dup 0)
70c11966
MM
4208 (unspec:V4SF [(match_dup 0)
4209 (const_int 0)] UNSPEC_VSX_XXSPLTW))]
50c78b9a
MM
4210 ""
4211 [(set_attr "type" "vecload,vecperm,mftgpr")
4212 (set_attr "length" "4,8,4")])
4213
4214;; V4SF/V4SI splat from a vector element
29e6733c 4215(define_insn "vsx_xxspltw_<mode>"
6019c0fc 4216 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
29e6733c
MM
4217 (vec_duplicate:VSX_W
4218 (vec_select:<VS_scalar>
6019c0fc 4219 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
29e6733c 4220 (parallel
6019c0fc 4221 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
29e6733c 4222 "VECTOR_MEM_VSX_P (<MODE>mode)"
bf53d4b8
BS
4223{
4224 if (!BYTES_BIG_ENDIAN)
4225 operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4226
4227 return "xxspltw %x0,%x1,%2";
4228}
4229 [(set_attr "type" "vecperm")])
4230
4231(define_insn "vsx_xxspltw_<mode>_direct"
6019c0fc
MM
4232 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>")
4233 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>")
4234 (match_operand:QI 2 "u5bit_cint_operand" "i")]
bf53d4b8
BS
4235 UNSPEC_VSX_XXSPLTW))]
4236 "VECTOR_MEM_VSX_P (<MODE>mode)"
29e6733c
MM
4237 "xxspltw %x0,%x1,%2"
4238 [(set_attr "type" "vecperm")])
4239
6019c0fc
MM
4240;; V16QI/V8HI splat support on ISA 2.07
4241(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4242 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4243 (vec_duplicate:VSX_SPLAT_I
4244 (truncate:<VS_scalar>
4245 (match_operand:DI 1 "altivec_register_operand" "v"))))]
4246 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4247 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4248 [(set_attr "type" "vecperm")])
4249
2ccdda19
BS
4250;; V2DF/V2DI splat for use by vec_splat builtin
4251(define_insn "vsx_xxspltd_<mode>"
4252 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4253 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4254 (match_operand:QI 2 "u5bit_cint_operand" "i")]
4255 UNSPEC_VSX_XXSPLTD))]
4256 "VECTOR_MEM_VSX_P (<MODE>mode)"
4257{
4258 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0)
4259 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1))
4260 return "xxpermdi %x0,%x1,%x1,0";
4261 else
4262 return "xxpermdi %x0,%x1,%x1,3";
4263}
4264 [(set_attr "type" "vecperm")])
4265
29e6733c
MM
4266;; V4SF/V4SI interleave
4267(define_insn "vsx_xxmrghw_<mode>"
59f5868d 4268 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
5aebfdad
RH
4269 (vec_select:VSX_W
4270 (vec_concat:<VS_double>
59f5868d
MM
4271 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4272 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>"))
5aebfdad
RH
4273 (parallel [(const_int 0) (const_int 4)
4274 (const_int 1) (const_int 5)])))]
29e6733c 4275 "VECTOR_MEM_VSX_P (<MODE>mode)"
ed79f4d0
BS
4276{
4277 if (BYTES_BIG_ENDIAN)
4278 return "xxmrghw %x0,%x1,%x2";
4279 else
4280 return "xxmrglw %x0,%x2,%x1";
4281}
29e6733c
MM
4282 [(set_attr "type" "vecperm")])
4283
4284(define_insn "vsx_xxmrglw_<mode>"
59f5868d 4285 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
5aebfdad
RH
4286 (vec_select:VSX_W
4287 (vec_concat:<VS_double>
59f5868d
MM
4288 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>")
4289 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>"))
5aebfdad
RH
4290 (parallel [(const_int 2) (const_int 6)
4291 (const_int 3) (const_int 7)])))]
29e6733c 4292 "VECTOR_MEM_VSX_P (<MODE>mode)"
ed79f4d0
BS
4293{
4294 if (BYTES_BIG_ENDIAN)
4295 return "xxmrglw %x0,%x1,%x2";
4296 else
4297 return "xxmrghw %x0,%x2,%x1";
4298}
29e6733c
MM
4299 [(set_attr "type" "vecperm")])
4300
4301;; Shift left double by word immediate
4302(define_insn "vsx_xxsldwi_<mode>"
59f5868d
MM
4303 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>")
4304 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>")
4305 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>")
29e6733c
MM
4306 (match_operand:QI 3 "u5bit_cint_operand" "i")]
4307 UNSPEC_VSX_SLDWI))]
4308 "VECTOR_MEM_VSX_P (<MODE>mode)"
4309 "xxsldwi %x0,%x1,%x2,%3"
4310 [(set_attr "type" "vecperm")])
df10b6d4
MM
4311
4312\f
4313;; Vector reduction insns and splitters
4314
5e8edf67 4315(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
df10b6d4
MM
4316 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
4317 (VEC_reduc:V2DF
4318 (vec_concat:V2DF
4319 (vec_select:DF
4320 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4321 (parallel [(const_int 1)]))
4322 (vec_select:DF
4323 (match_dup 1)
4324 (parallel [(const_int 0)])))
4325 (match_dup 1)))
4326 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
4327 "VECTOR_UNIT_VSX_P (V2DFmode)"
4328 "#"
4329 ""
4330 [(const_int 0)]
df10b6d4
MM
4331{
4332 rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4333 ? gen_reg_rtx (V2DFmode)
4334 : operands[2];
4335 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4336 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4337 DONE;
6c332313 4338}
df10b6d4
MM
4339 [(set_attr "length" "8")
4340 (set_attr "type" "veccomplex")])
4341
5e8edf67 4342(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
df10b6d4
MM
4343 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
4344 (VEC_reduc:V4SF
4345 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4346 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
4347 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4348 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
4349 "VECTOR_UNIT_VSX_P (V4SFmode)"
4350 "#"
4351 ""
4352 [(const_int 0)]
df10b6d4
MM
4353{
4354 rtx op0 = operands[0];
4355 rtx op1 = operands[1];
4356 rtx tmp2, tmp3, tmp4;
4357
4358 if (can_create_pseudo_p ())
4359 {
4360 tmp2 = gen_reg_rtx (V4SFmode);
4361 tmp3 = gen_reg_rtx (V4SFmode);
4362 tmp4 = gen_reg_rtx (V4SFmode);
4363 }
4364 else
4365 {
4366 tmp2 = operands[2];
4367 tmp3 = operands[3];
4368 tmp4 = tmp2;
4369 }
4370
4371 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4372 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4373 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4374 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4375 DONE;
6c332313 4376}
df10b6d4
MM
4377 [(set_attr "length" "16")
4378 (set_attr "type" "veccomplex")])
4379
4380;; Combiner patterns with the vector reduction patterns that knows we can get
4381;; to the top element of the V2DF array without doing an extract.
4382
4383(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
29aafbc4 4384 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws")
df10b6d4
MM
4385 (vec_select:DF
4386 (VEC_reduc:V2DF
4387 (vec_concat:V2DF
4388 (vec_select:DF
4389 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
4390 (parallel [(const_int 1)]))
4391 (vec_select:DF
4392 (match_dup 1)
4393 (parallel [(const_int 0)])))
4394 (match_dup 1))
4395 (parallel [(const_int 1)])))
4396 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
4397 "VECTOR_UNIT_VSX_P (V2DFmode)"
4398 "#"
4399 ""
4400 [(const_int 0)]
df10b6d4
MM
4401{
4402 rtx hi = gen_highpart (DFmode, operands[1]);
4403 rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4404 ? gen_reg_rtx (DFmode)
4405 : operands[2];
4406
4407 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4408 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4409 DONE;
6c332313 4410}
df10b6d4
MM
4411 [(set_attr "length" "8")
4412 (set_attr "type" "veccomplex")])
4413
4414(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4415 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
4416 (vec_select:SF
4417 (VEC_reduc:V4SF
4418 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4419 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
4420 (parallel [(const_int 3)])))
4421 (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
4422 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
4423 (clobber (match_scratch:V4SF 4 "=0,0"))]
4424 "VECTOR_UNIT_VSX_P (V4SFmode)"
4425 "#"
4426 ""
4427 [(const_int 0)]
df10b6d4
MM
4428{
4429 rtx op0 = operands[0];
4430 rtx op1 = operands[1];
4431 rtx tmp2, tmp3, tmp4, tmp5;
4432
4433 if (can_create_pseudo_p ())
4434 {
4435 tmp2 = gen_reg_rtx (V4SFmode);
4436 tmp3 = gen_reg_rtx (V4SFmode);
4437 tmp4 = gen_reg_rtx (V4SFmode);
4438 tmp5 = gen_reg_rtx (V4SFmode);
4439 }
4440 else
4441 {
4442 tmp2 = operands[2];
4443 tmp3 = operands[3];
4444 tmp4 = tmp2;
4445 tmp5 = operands[4];
4446 }
4447
4448 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4449 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4450 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4451 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4452 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4453 DONE;
6c332313 4454}
df10b6d4
MM
4455 [(set_attr "length" "20")
4456 (set_attr "type" "veccomplex")])
d86e633a
MM
4457
4458\f
4459;; Power8 Vector fusion. The fused ops must be physically adjacent.
4460(define_peephole
ad18eed2
SB
4461 [(set (match_operand:P 0 "base_reg_operand")
4462 (match_operand:P 1 "short_cint_operand"))
4463 (set (match_operand:VSX_M 2 "vsx_register_operand")
50c78b9a 4464 (mem:VSX_M (plus:P (match_dup 0)
ad18eed2 4465 (match_operand:P 3 "int_reg_operand"))))]
5d57fdc1 4466 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
0bcd172e 4467 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
d86e633a
MM
4468 [(set_attr "length" "8")
4469 (set_attr "type" "vecload")])
4470
4471(define_peephole
ad18eed2
SB
4472 [(set (match_operand:P 0 "base_reg_operand")
4473 (match_operand:P 1 "short_cint_operand"))
4474 (set (match_operand:VSX_M 2 "vsx_register_operand")
4475 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
50c78b9a 4476 (match_dup 0))))]
5d57fdc1 4477 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
0bcd172e 4478 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
d86e633a
MM
4479 [(set_attr "length" "8")
4480 (set_attr "type" "vecload")])
50c78b9a
MM
4481
4482\f
4483;; ISA 3.0 vector extend sign support
4484
4485(define_insn "vsx_sign_extend_qi_<mode>"
4486 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4487 (unspec:VSINT_84
4488 [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4489 UNSPEC_VSX_SIGN_EXTEND))]
4490 "TARGET_P9_VECTOR"
4491 "vextsb2<wd> %0,%1"
7c788ce2 4492 [(set_attr "type" "vecexts")])
50c78b9a 4493
ac11b8c0 4494(define_insn "vsx_sign_extend_hi_<mode>"
50c78b9a
MM
4495 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4496 (unspec:VSINT_84
4497 [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4498 UNSPEC_VSX_SIGN_EXTEND))]
4499 "TARGET_P9_VECTOR"
4500 "vextsh2<wd> %0,%1"
7c788ce2 4501 [(set_attr "type" "vecexts")])
50c78b9a
MM
4502
4503(define_insn "*vsx_sign_extend_si_v2di"
4504 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4505 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4506 UNSPEC_VSX_SIGN_EXTEND))]
4507 "TARGET_P9_VECTOR"
4508 "vextsw2d %0,%1"
7c788ce2 4509 [(set_attr "type" "vecexts")])
ac11b8c0
MM
4510
4511\f
e9e6d4f6
KN
4512;; ISA 3.0 Binary Floating-Point Support
4513
b70bb05b 4514;; VSX Scalar Extract Exponent Quad-Precision
cdb4b7aa 4515(define_insn "xsxexpqp_<mode>"
b70bb05b 4516 [(set (match_operand:DI 0 "altivec_register_operand" "=v")
cdb4b7aa 4517 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
b70bb05b
KN
4518 UNSPEC_VSX_SXEXPDP))]
4519 "TARGET_P9_VECTOR"
4520 "xsxexpqp %0,%1"
4521 [(set_attr "type" "vecmove")])
4522
e9e6d4f6
KN
4523;; VSX Scalar Extract Exponent Double-Precision
4524(define_insn "xsxexpdp"
4525 [(set (match_operand:DI 0 "register_operand" "=r")
4526 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
4527 UNSPEC_VSX_SXEXPDP))]
4528 "TARGET_P9_VECTOR && TARGET_64BIT"
4529 "xsxexpdp %0,%x1"
4530 [(set_attr "type" "integer")])
4531
b70bb05b 4532;; VSX Scalar Extract Significand Quad-Precision
cdb4b7aa 4533(define_insn "xsxsigqp_<mode>"
b70bb05b 4534 [(set (match_operand:TI 0 "altivec_register_operand" "=v")
cdb4b7aa 4535 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
b70bb05b
KN
4536 UNSPEC_VSX_SXSIG))]
4537 "TARGET_P9_VECTOR"
4538 "xsxsigqp %0,%1"
4539 [(set_attr "type" "vecmove")])
4540
e9e6d4f6
KN
4541;; VSX Scalar Extract Significand Double-Precision
4542(define_insn "xsxsigdp"
4543 [(set (match_operand:DI 0 "register_operand" "=r")
4544 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
b70bb05b 4545 UNSPEC_VSX_SXSIG))]
e9e6d4f6
KN
4546 "TARGET_P9_VECTOR && TARGET_64BIT"
4547 "xsxsigdp %0,%x1"
4548 [(set_attr "type" "integer")])
4549
b70bb05b 4550;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
cdb4b7aa
MM
4551(define_insn "xsiexpqpf_<mode>"
4552 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4553 (unspec:IEEE128
4554 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4555 (match_operand:DI 2 "altivec_register_operand" "v")]
b70bb05b
KN
4556 UNSPEC_VSX_SIEXPQP))]
4557 "TARGET_P9_VECTOR"
4558 "xsiexpqp %0,%1,%2"
4559 [(set_attr "type" "vecmove")])
4560
4561;; VSX Scalar Insert Exponent Quad-Precision
cdb4b7aa
MM
4562(define_insn "xsiexpqp_<mode>"
4563 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
4564 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
4565 (match_operand:DI 2 "altivec_register_operand" "v")]
b70bb05b
KN
4566 UNSPEC_VSX_SIEXPQP))]
4567 "TARGET_P9_VECTOR"
4568 "xsiexpqp %0,%1,%2"
4569 [(set_attr "type" "vecmove")])
4570
e9e6d4f6
KN
4571;; VSX Scalar Insert Exponent Double-Precision
4572(define_insn "xsiexpdp"
4573 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4574 (unspec:DF [(match_operand:DI 1 "register_operand" "r")
4575 (match_operand:DI 2 "register_operand" "r")]
4576 UNSPEC_VSX_SIEXPDP))]
4577 "TARGET_P9_VECTOR && TARGET_64BIT"
4578 "xsiexpdp %x0,%1,%2"
4579 [(set_attr "type" "fpsimple")])
4580
28826a66
KN
4581;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
4582(define_insn "xsiexpdpf"
4583 [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
4584 (unspec:DF [(match_operand:DF 1 "register_operand" "r")
4585 (match_operand:DI 2 "register_operand" "r")]
4586 UNSPEC_VSX_SIEXPDP))]
4587 "TARGET_P9_VECTOR && TARGET_64BIT"
4588 "xsiexpdp %x0,%1,%2"
4589 [(set_attr "type" "fpsimple")])
4590
e9e6d4f6
KN
4591;; VSX Scalar Compare Exponents Double-Precision
4592(define_expand "xscmpexpdp_<code>"
4593 [(set (match_dup 3)
4594 (compare:CCFP
4595 (unspec:DF
4596 [(match_operand:DF 1 "vsx_register_operand" "wa")
4597 (match_operand:DF 2 "vsx_register_operand" "wa")]
4598 UNSPEC_VSX_SCMPEXPDP)
4599 (const_int 0)))
4600 (set (match_operand:SI 0 "register_operand" "=r")
4601 (CMP_TEST:SI (match_dup 3)
4602 (const_int 0)))]
4603 "TARGET_P9_VECTOR"
4604{
4605 operands[3] = gen_reg_rtx (CCFPmode);
4606})
4607
4608(define_insn "*xscmpexpdp"
4609 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
4610 (compare:CCFP
4611 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
4612 (match_operand:DF 2 "vsx_register_operand" "wa")]
4613 UNSPEC_VSX_SCMPEXPDP)
4614 (match_operand:SI 3 "zero_constant" "j")))]
4615 "TARGET_P9_VECTOR"
4616 "xscmpexpdp %0,%x1,%x2"
4617 [(set_attr "type" "fpcompare")])
4618
b70bb05b
KN
4619;; VSX Scalar Test Data Class Quad-Precision
4620;; (Expansion for scalar_test_data_class (__ieee128, int))
4621;; (Has side effect of setting the lt bit if operand 1 is negative,
4622;; setting the eq bit if any of the conditions tested by operand 2
4623;; are satisfied, and clearing the gt and undordered bits to zero.)
cdb4b7aa 4624(define_expand "xststdcqp_<mode>"
b70bb05b
KN
4625 [(set (match_dup 3)
4626 (compare:CCFP
cdb4b7aa
MM
4627 (unspec:IEEE128
4628 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
b70bb05b
KN
4629 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4630 UNSPEC_VSX_STSTDC)
4631 (const_int 0)))
4632 (set (match_operand:SI 0 "register_operand" "=r")
4633 (eq:SI (match_dup 3)
4634 (const_int 0)))]
4635 "TARGET_P9_VECTOR"
4636{
4637 operands[3] = gen_reg_rtx (CCFPmode);
4638})
4639
e9e6d4f6
KN
4640;; VSX Scalar Test Data Class Double- and Single-Precision
4641;; (The lt bit is set if operand 1 is negative. The eq bit is set
4642;; if any of the conditions tested by operand 2 are satisfied.
4643;; The gt and unordered bits are cleared to zero.)
4644(define_expand "xststdc<Fvsx>"
4645 [(set (match_dup 3)
4646 (compare:CCFP
4647 (unspec:SFDF
4648 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4649 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4650 UNSPEC_VSX_STSTDC)
4651 (match_dup 4)))
4652 (set (match_operand:SI 0 "register_operand" "=r")
4653 (eq:SI (match_dup 3)
4654 (const_int 0)))]
4655 "TARGET_P9_VECTOR"
4656{
4657 operands[3] = gen_reg_rtx (CCFPmode);
4658 operands[4] = CONST0_RTX (SImode);
4659})
4660
b70bb05b 4661;; The VSX Scalar Test Negative Quad-Precision
cdb4b7aa 4662(define_expand "xststdcnegqp_<mode>"
b70bb05b
KN
4663 [(set (match_dup 2)
4664 (compare:CCFP
cdb4b7aa
MM
4665 (unspec:IEEE128
4666 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
b70bb05b
KN
4667 (const_int 0)]
4668 UNSPEC_VSX_STSTDC)
4669 (const_int 0)))
4670 (set (match_operand:SI 0 "register_operand" "=r")
4671 (lt:SI (match_dup 2)
4672 (const_int 0)))]
4673 "TARGET_P9_VECTOR"
4674{
4675 operands[2] = gen_reg_rtx (CCFPmode);
4676})
4677
4678;; The VSX Scalar Test Negative Double- and Single-Precision
e9e6d4f6
KN
4679(define_expand "xststdcneg<Fvsx>"
4680 [(set (match_dup 2)
4681 (compare:CCFP
4682 (unspec:SFDF
4683 [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4684 (const_int 0)]
4685 UNSPEC_VSX_STSTDC)
4686 (match_dup 3)))
4687 (set (match_operand:SI 0 "register_operand" "=r")
4688 (lt:SI (match_dup 2)
4689 (const_int 0)))]
4690 "TARGET_P9_VECTOR"
4691{
4692 operands[2] = gen_reg_rtx (CCFPmode);
4693 operands[3] = CONST0_RTX (SImode);
4694})
4695
cdb4b7aa 4696(define_insn "*xststdcqp_<mode>"
b70bb05b
KN
4697 [(set (match_operand:CCFP 0 "" "=y")
4698 (compare:CCFP
cdb4b7aa
MM
4699 (unspec:IEEE128
4700 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
4701 (match_operand:SI 2 "u7bit_cint_operand" "n")]
b70bb05b
KN
4702 UNSPEC_VSX_STSTDC)
4703 (const_int 0)))]
4704 "TARGET_P9_VECTOR"
4705 "xststdcqp %0,%1,%2"
4706 [(set_attr "type" "fpcompare")])
4707
e9e6d4f6
KN
4708(define_insn "*xststdc<Fvsx>"
4709 [(set (match_operand:CCFP 0 "" "=y")
4710 (compare:CCFP
4711 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
4712 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4713 UNSPEC_VSX_STSTDC)
4714 (match_operand:SI 3 "zero_constant" "j")))]
4715 "TARGET_P9_VECTOR"
4716 "xststdc<Fvsx> %0,%x1,%2"
4717 [(set_attr "type" "fpcompare")])
4718
4719;; VSX Vector Extract Exponent Double and Single Precision
4720(define_insn "xvxexp<VSs>"
4721 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4722 (unspec:VSX_F
4723 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4724 UNSPEC_VSX_VXEXP))]
4725 "TARGET_P9_VECTOR"
4726 "xvxexp<VSs> %x0,%x1"
4727 [(set_attr "type" "vecsimple")])
4728
4729;; VSX Vector Extract Significand Double and Single Precision
4730(define_insn "xvxsig<VSs>"
4731 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4732 (unspec:VSX_F
4733 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
4734 UNSPEC_VSX_VXSIG))]
4735 "TARGET_P9_VECTOR"
4736 "xvxsig<VSs> %x0,%x1"
4737 [(set_attr "type" "vecsimple")])
4738
4739;; VSX Vector Insert Exponent Double and Single Precision
4740(define_insn "xviexp<VSs>"
4741 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
4742 (unspec:VSX_F
4743 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4744 (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
4745 UNSPEC_VSX_VIEXP))]
4746 "TARGET_P9_VECTOR"
4747 "xviexp<VSs> %x0,%x1,%x2"
4748 [(set_attr "type" "vecsimple")])
4749
4750;; VSX Vector Test Data Class Double and Single Precision
4751;; The corresponding elements of the result vector are all ones
4752;; if any of the conditions tested by operand 3 are satisfied.
4753(define_insn "xvtstdc<VSs>"
4754 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
4755 (unspec:<VSI>
4756 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
4757 (match_operand:SI 2 "u7bit_cint_operand" "n")]
4758 UNSPEC_VSX_VTSTDC))]
4759 "TARGET_P9_VECTOR"
4760 "xvtstdc<VSs> %x0,%x1,%2"
4761 [(set_attr "type" "vecsimple")])
902cb7b1
KN
4762
4763;; ISA 3.0 String Operations Support
4764
4765;; Compare vectors producing a vector result and a predicate, setting CR6
4766;; to indicate a combined status. This pattern matches v16qi, v8hi, and
4767;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no
50181506
KN
4768;; need to match v4sf, v2df, or v2di modes because those are expanded
4769;; to use Power8 instructions.
902cb7b1
KN
4770(define_insn "*vsx_ne_<mode>_p"
4771 [(set (reg:CC CR6_REGNO)
4772 (unspec:CC
4773 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4774 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
4775 UNSPEC_PREDICATE))
4776 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
4777 (ne:VSX_EXTRACT_I (match_dup 1)
4778 (match_dup 2)))]
4779 "TARGET_P9_VECTOR"
50181506 4780 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
902cb7b1
KN
4781 [(set_attr "type" "vecsimple")])
4782
4783(define_insn "*vector_nez_<mode>_p"
4784 [(set (reg:CC CR6_REGNO)
4785 (unspec:CC [(unspec:VI
4786 [(match_operand:VI 1 "gpc_reg_operand" "v")
4787 (match_operand:VI 2 "gpc_reg_operand" "v")]
4788 UNSPEC_NEZ_P)]
4789 UNSPEC_PREDICATE))
4790 (set (match_operand:VI 0 "gpc_reg_operand" "=v")
4791 (unspec:VI [(match_dup 1)
4792 (match_dup 2)]
4793 UNSPEC_NEZ_P))]
4794 "TARGET_P9_VECTOR"
4795 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
4796 [(set_attr "type" "vecsimple")])
4797
4d85d480
CL
4798;; Return first position of match between vectors
4799(define_expand "first_match_index_<mode>"
4800 [(match_operand:SI 0 "register_operand")
4801 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4802 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4803 UNSPEC_VSX_FIRST_MATCH_INDEX)]
4804 "TARGET_P9_VECTOR"
4805{
4806 int sh;
4807
4808 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4809 rtx not_result = gen_reg_rtx (<MODE>mode);
4810
4811 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4812 operands[2]));
4813 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
4814
4815 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4816
4817 if (<MODE>mode == V16QImode)
4818 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
4819 else
4820 {
4821 rtx tmp = gen_reg_rtx (SImode);
4822 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
4823 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4824 }
4825 DONE;
4826})
4827
4828;; Return first position of match between vectors or end of string (EOS)
4829(define_expand "first_match_or_eos_index_<mode>"
4830 [(match_operand:SI 0 "register_operand")
4831 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4832 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4833 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
4834 "TARGET_P9_VECTOR"
4835{
4836 int sh;
4837 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4838 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4839 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4840 rtx and_result = gen_reg_rtx (<MODE>mode);
4841 rtx result = gen_reg_rtx (<MODE>mode);
4842 rtx vzero = gen_reg_rtx (<MODE>mode);
4843
4844 /* Vector with zeros in elements that correspond to zeros in operands. */
4845 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4846 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4847 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4848 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4849
4850 /* Vector with ones in elments that do not match. */
4851 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4852 operands[2]));
4853
4854 /* Create vector with ones in elements where there was a zero in one of
4855 the source elements or the elements that match. */
4856 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
4857 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4858
4859 if (<MODE>mode == V16QImode)
4860 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4861 else
4862 {
4863 rtx tmp = gen_reg_rtx (SImode);
4864 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4865 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4866 }
4867 DONE;
4868})
4869
4870;; Return first position of mismatch between vectors
4871(define_expand "first_mismatch_index_<mode>"
4872 [(match_operand:SI 0 "register_operand")
4873 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4874 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4875 UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
4876 "TARGET_P9_VECTOR"
4877{
4878 int sh;
4879 rtx cmp_result = gen_reg_rtx (<MODE>mode);
4880
4881 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
4882 operands[2]));
4883 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4884
4885 if (<MODE>mode == V16QImode)
4886 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
4887 else
4888 {
4889 rtx tmp = gen_reg_rtx (SImode);
4890 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
4891 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4892 }
4893 DONE;
4894})
4895
4896;; Return first position of mismatch between vectors or end of string (EOS)
4897(define_expand "first_mismatch_or_eos_index_<mode>"
4898 [(match_operand:SI 0 "register_operand")
4899 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
4900 (match_operand:VSX_EXTRACT_I 2 "register_operand")]
4901 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
4902 "TARGET_P9_VECTOR"
4903{
4904 int sh;
4905 rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
4906 rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
4907 rtx cmpz_result = gen_reg_rtx (<MODE>mode);
4908 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
4909 rtx and_result = gen_reg_rtx (<MODE>mode);
4910 rtx result = gen_reg_rtx (<MODE>mode);
4911 rtx vzero = gen_reg_rtx (<MODE>mode);
4912
4913 /* Vector with zeros in elements that correspond to zeros in operands. */
4914 emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
4915
4916 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
4917 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
4918 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
4919
4920 /* Vector with ones in elments that match. */
4921 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
4922 operands[2]));
4923 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
4924
4925 /* Create vector with ones in elements where there was a zero in one of
4926 the source elements or the elements did not match. */
4927 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
4928 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
4929
4930 if (<MODE>mode == V16QImode)
4931 emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
4932 else
4933 {
4934 rtx tmp = gen_reg_rtx (SImode);
4935 emit_insn (gen_vctzlsbb_<mode> (tmp, result));
4936 emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (sh)));
4937 }
4938 DONE;
4939})
4940
902cb7b1
KN
4941;; Load VSX Vector with Length
4942(define_expand "lxvl"
4943 [(set (match_dup 3)
84e77783
CL
4944 (ashift:DI (match_operand:DI 2 "register_operand")
4945 (const_int 56)))
902cb7b1
KN
4946 (set (match_operand:V16QI 0 "vsx_register_operand")
4947 (unspec:V16QI
4948 [(match_operand:DI 1 "gpc_reg_operand")
84e77783 4949 (mem:V16QI (match_dup 1))
902cb7b1
KN
4950 (match_dup 3)]
4951 UNSPEC_LXVL))]
4952 "TARGET_P9_VECTOR && TARGET_64BIT"
4953{
4954 operands[3] = gen_reg_rtx (DImode);
4955})
4956
4957(define_insn "*lxvl"
4958 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4959 (unspec:V16QI
4960 [(match_operand:DI 1 "gpc_reg_operand" "b")
84e77783
CL
4961 (mem:V16QI (match_dup 1))
4962 (match_operand:DI 2 "register_operand" "r")]
902cb7b1
KN
4963 UNSPEC_LXVL))]
4964 "TARGET_P9_VECTOR && TARGET_64BIT"
84e77783
CL
4965 "lxvl %x0,%1,%2"
4966 [(set_attr "type" "vecload")])
902cb7b1 4967
1262c6cf
CL
4968(define_insn "lxvll"
4969 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
4970 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
84e77783 4971 (mem:V16QI (match_dup 1))
1262c6cf
CL
4972 (match_operand:DI 2 "register_operand" "r")]
4973 UNSPEC_LXVLL))]
4974 "TARGET_P9_VECTOR"
4975 "lxvll %x0,%1,%2"
4976 [(set_attr "type" "vecload")])
4977
4978;; Expand for builtin xl_len_r
4979(define_expand "xl_len_r"
4980 [(match_operand:V16QI 0 "vsx_register_operand")
4981 (match_operand:DI 1 "register_operand")
4982 (match_operand:DI 2 "register_operand")]
4983 ""
4984{
4985 rtx shift_mask = gen_reg_rtx (V16QImode);
4986 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
4987 rtx tmp = gen_reg_rtx (DImode);
4988
4989 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2]));
4990 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
4991 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
4992 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
4993 shift_mask));
4994 DONE;
4995})
4996
4997(define_insn "stxvll"
4998 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
4999 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
84e77783 5000 (mem:V16QI (match_dup 1))
1262c6cf
CL
5001 (match_operand:DI 2 "register_operand" "r")]
5002 UNSPEC_STXVLL))]
5003 "TARGET_P9_VECTOR"
5004 "stxvll %x0,%1,%2"
5005 [(set_attr "type" "vecstore")])
5006
902cb7b1
KN
5007;; Store VSX Vector with Length
5008(define_expand "stxvl"
5009 [(set (match_dup 3)
84e77783
CL
5010 (ashift:DI (match_operand:DI 2 "register_operand")
5011 (const_int 56)))
902cb7b1
KN
5012 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5013 (unspec:V16QI
5014 [(match_operand:V16QI 0 "vsx_register_operand")
84e77783 5015 (mem:V16QI (match_dup 1))
902cb7b1
KN
5016 (match_dup 3)]
5017 UNSPEC_STXVL))]
5018 "TARGET_P9_VECTOR && TARGET_64BIT"
5019{
5020 operands[3] = gen_reg_rtx (DImode);
5021})
5022
5023(define_insn "*stxvl"
5024 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5025 (unspec:V16QI
5026 [(match_operand:V16QI 0 "vsx_register_operand" "wa")
84e77783
CL
5027 (mem:V16QI (match_dup 1))
5028 (match_operand:DI 2 "register_operand" "r")]
902cb7b1
KN
5029 UNSPEC_STXVL))]
5030 "TARGET_P9_VECTOR && TARGET_64BIT"
84e77783
CL
5031 "stxvl %x0,%1,%2"
5032 [(set_attr "type" "vecstore")])
902cb7b1 5033
1262c6cf
CL
5034;; Expand for builtin xst_len_r
5035(define_expand "xst_len_r"
5036 [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5037 (match_operand:DI 1 "register_operand" "b")
5038 (match_operand:DI 2 "register_operand" "r")]
5039 "UNSPEC_XST_LEN_R"
5040{
5041 rtx shift_mask = gen_reg_rtx (V16QImode);
5042 rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5043 rtx tmp = gen_reg_rtx (DImode);
5044
5045 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2]));
5046 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5047 shift_mask));
5048 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5049 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5050 DONE;
5051})
5052
41e18197 5053;; Vector Compare Not Equal Byte (specified/not+eq:)
902cb7b1
KN
5054(define_insn "vcmpneb"
5055 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
41e18197
WS
5056 (not:V16QI
5057 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5058 (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
902cb7b1
KN
5059 "TARGET_P9_VECTOR"
5060 "vcmpneb %0,%1,%2"
5061 [(set_attr "type" "vecsimple")])
5062
5063;; Vector Compare Not Equal or Zero Byte
5064(define_insn "vcmpnezb"
5065 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5066 (unspec:V16QI
5067 [(match_operand:V16QI 1 "altivec_register_operand" "v")
5068 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5069 UNSPEC_VCMPNEZB))]
5070 "TARGET_P9_VECTOR"
5071 "vcmpnezb %0,%1,%2"
5072 [(set_attr "type" "vecsimple")])
5073
41e18197 5074;; Vector Compare Not Equal Half Word (specified/not+eq:)
902cb7b1
KN
5075(define_insn "vcmpneh"
5076 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
41e18197
WS
5077 (not:V8HI
5078 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5079 (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
902cb7b1
KN
5080 "TARGET_P9_VECTOR"
5081 "vcmpneh %0,%1,%2"
5082 [(set_attr "type" "vecsimple")])
5083
5084;; Vector Compare Not Equal or Zero Half Word
5085(define_insn "vcmpnezh"
5086 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5087 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5088 (match_operand:V8HI 2 "altivec_register_operand" "v")]
5089 UNSPEC_VCMPNEZH))]
5090 "TARGET_P9_VECTOR"
5091 "vcmpnezh %0,%1,%2"
5092 [(set_attr "type" "vecsimple")])
5093
41e18197 5094;; Vector Compare Not Equal Word (specified/not+eq:)
902cb7b1
KN
5095(define_insn "vcmpnew"
5096 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
41e18197
WS
5097 (not:V4SI
5098 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5099 (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
902cb7b1
KN
5100 "TARGET_P9_VECTOR"
5101 "vcmpnew %0,%1,%2"
5102 [(set_attr "type" "vecsimple")])
5103
902cb7b1
KN
5104;; Vector Compare Not Equal or Zero Word
5105(define_insn "vcmpnezw"
5106 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5107 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5108 (match_operand:V4SI 2 "altivec_register_operand" "v")]
5109 UNSPEC_VCMPNEZW))]
5110 "TARGET_P9_VECTOR"
5111 "vcmpnezw %0,%1,%2"
5112 [(set_attr "type" "vecsimple")])
5113
5114;; Vector Count Leading Zero Least-Significant Bits Byte
5115(define_insn "vclzlsbb"
5116 [(set (match_operand:SI 0 "register_operand" "=r")
5117 (unspec:SI
5118 [(match_operand:V16QI 1 "altivec_register_operand" "v")]
5119 UNSPEC_VCLZLSBB))]
5120 "TARGET_P9_VECTOR"
5121 "vclzlsbb %0,%1"
5122 [(set_attr "type" "vecsimple")])
5123
5124;; Vector Count Trailing Zero Least-Significant Bits Byte
4d85d480 5125(define_insn "vctzlsbb_<mode>"
902cb7b1
KN
5126 [(set (match_operand:SI 0 "register_operand" "=r")
5127 (unspec:SI
4d85d480 5128 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
902cb7b1
KN
5129 UNSPEC_VCTZLSBB))]
5130 "TARGET_P9_VECTOR"
5131 "vctzlsbb %0,%1"
5132 [(set_attr "type" "vecsimple")])
5133
5134;; Vector Extract Unsigned Byte Left-Indexed
5135(define_insn "vextublx"
5136 [(set (match_operand:SI 0 "register_operand" "=r")
5137 (unspec:SI
5138 [(match_operand:SI 1 "register_operand" "r")
5139 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5140 UNSPEC_VEXTUBLX))]
5141 "TARGET_P9_VECTOR"
5142 "vextublx %0,%1,%2"
5143 [(set_attr "type" "vecsimple")])
5144
5145;; Vector Extract Unsigned Byte Right-Indexed
5146(define_insn "vextubrx"
5147 [(set (match_operand:SI 0 "register_operand" "=r")
5148 (unspec:SI
5149 [(match_operand:SI 1 "register_operand" "r")
5150 (match_operand:V16QI 2 "altivec_register_operand" "v")]
5151 UNSPEC_VEXTUBRX))]
5152 "TARGET_P9_VECTOR"
5153 "vextubrx %0,%1,%2"
5154 [(set_attr "type" "vecsimple")])
5155
5156;; Vector Extract Unsigned Half Word Left-Indexed
5157(define_insn "vextuhlx"
5158 [(set (match_operand:SI 0 "register_operand" "=r")
5159 (unspec:SI
5160 [(match_operand:SI 1 "register_operand" "r")
f13d510e 5161 (match_operand:V8HI 2 "altivec_register_operand" "v")]
902cb7b1
KN
5162 UNSPEC_VEXTUHLX))]
5163 "TARGET_P9_VECTOR"
5164 "vextuhlx %0,%1,%2"
5165 [(set_attr "type" "vecsimple")])
5166
5167;; Vector Extract Unsigned Half Word Right-Indexed
5168(define_insn "vextuhrx"
5169 [(set (match_operand:SI 0 "register_operand" "=r")
5170 (unspec:SI
5171 [(match_operand:SI 1 "register_operand" "r")
f13d510e 5172 (match_operand:V8HI 2 "altivec_register_operand" "v")]
902cb7b1
KN
5173 UNSPEC_VEXTUHRX))]
5174 "TARGET_P9_VECTOR"
5175 "vextuhrx %0,%1,%2"
5176 [(set_attr "type" "vecsimple")])
5177
5178;; Vector Extract Unsigned Word Left-Indexed
5179(define_insn "vextuwlx"
5180 [(set (match_operand:SI 0 "register_operand" "=r")
5181 (unspec:SI
5182 [(match_operand:SI 1 "register_operand" "r")
f13d510e 5183 (match_operand:V4SI 2 "altivec_register_operand" "v")]
902cb7b1
KN
5184 UNSPEC_VEXTUWLX))]
5185 "TARGET_P9_VECTOR"
5186 "vextuwlx %0,%1,%2"
5187 [(set_attr "type" "vecsimple")])
5188
5189;; Vector Extract Unsigned Word Right-Indexed
5190(define_insn "vextuwrx"
5191 [(set (match_operand:SI 0 "register_operand" "=r")
5192 (unspec:SI
5193 [(match_operand:SI 1 "register_operand" "r")
f13d510e 5194 (match_operand:V4SI 2 "altivec_register_operand" "v")]
902cb7b1
KN
5195 UNSPEC_VEXTUWRX))]
5196 "TARGET_P9_VECTOR"
5197 "vextuwrx %0,%1,%2"
5198 [(set_attr "type" "vecsimple")])
16370e79
MM
5199
5200;; Vector insert/extract word at arbitrary byte values. Note, the little
5201;; endian version needs to adjust the byte number, and the V4SI element in
5202;; vinsert4b.
b8bf5603
CL
5203(define_insn "extract4b"
5204 [(set (match_operand:V2DI 0 "vsx_register_operand")
5205 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5206 (match_operand:QI 2 "const_0_to_12_operand" "n")]
5207 UNSPEC_XXEXTRACTUW))]
5208 "TARGET_P9_VECTOR"
5209{
5210 if (!VECTOR_ELT_ORDER_BIG)
5211 operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5212
5213 return "xxextractuw %x0,%x1,%2";
5214})
5215
5216(define_expand "insert4b"
5217 [(set (match_operand:V16QI 0 "vsx_register_operand")
5218 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5219 (match_operand:V16QI 2 "vsx_register_operand")
5220 (match_operand:QI 3 "const_0_to_12_operand")]
5221 UNSPEC_XXINSERTW))]
5222 "TARGET_P9_VECTOR"
5223{
5224 if (!VECTOR_ELT_ORDER_BIG)
5225 {
5226 rtx op1 = operands[1];
5227 rtx v4si_tmp = gen_reg_rtx (V4SImode);
5228 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5229 operands[1] = v4si_tmp;
5230 operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5231 }
5232})
5233
5234(define_insn "*insert4b_internal"
5235 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5236 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5237 (match_operand:V16QI 2 "vsx_register_operand" "0")
5238 (match_operand:QI 3 "const_0_to_12_operand" "n")]
5239 UNSPEC_XXINSERTW))]
5240 "TARGET_P9_VECTOR"
5241 "xxinsertw %x0,%x1,%3"
5242 [(set_attr "type" "vecperm")])
5243
fba4b861 5244
26bca0ed
CL
5245;; Generate vector extract four float 32 values from left four elements
5246;; of eight element vector of float 16 values.
5247(define_expand "vextract_fp_from_shorth"
5248 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5249 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5250 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5251 "TARGET_P9_VECTOR"
5252{
5253 int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5254 int i;
5255
5256 rtx rvals[16];
5257 rtx mask = gen_reg_rtx (V16QImode);
5258 rtx tmp = gen_reg_rtx (V16QImode);
5259 rtvec v;
5260
5261 for (i = 0; i < 16; i++)
5262 rvals[i] = GEN_INT (vals[i]);
5263
5264 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5265 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5266 src half words 0,1,2,3 for the conversion instruction. */
5267 v = gen_rtvec_v (16, rvals);
8e1863ec 5268 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
26bca0ed
CL
5269 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5270 operands[1], mask));
5271 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5272 DONE;
5273})
5274
5275;; Generate vector extract four float 32 values from right four elements
5276;; of eight element vector of float 16 values.
5277(define_expand "vextract_fp_from_shortl"
5278 [(set (match_operand:V4SF 0 "register_operand" "=wa")
5279 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5280 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5281 "TARGET_P9_VECTOR"
5282{
5283 int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5284 int i;
5285 rtx rvals[16];
5286 rtx mask = gen_reg_rtx (V16QImode);
5287 rtx tmp = gen_reg_rtx (V16QImode);
5288 rtvec v;
5289
5290 for (i = 0; i < 16; i++)
5291 rvals[i] = GEN_INT (vals[i]);
5292
5293 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5294 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
5295 src half words 4,5,6,7 for the conversion instruction. */
5296 v = gen_rtvec_v (16, rvals);
8e1863ec 5297 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
26bca0ed
CL
5298 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5299 operands[1], mask));
5300 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5301 DONE;
5302})
5303
b7d3a6a6
MM
5304;; Support for ISA 3.0 vector byte reverse
5305
5306;; Swap all bytes with in a vector
5307(define_insn "p9_xxbrq_v1ti"
5308 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5309 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5310 "TARGET_P9_VECTOR"
5311 "xxbrq %x0,%x1"
5312 [(set_attr "type" "vecperm")])
5313
5314(define_expand "p9_xxbrq_v16qi"
5315 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
d6126f8b 5316 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
b7d3a6a6
MM
5317 "TARGET_P9_VECTOR"
5318{
d6126f8b 5319 rtx op0 = gen_reg_rtx (V1TImode);
b7d3a6a6
MM
5320 rtx op1 = gen_lowpart (V1TImode, operands[1]);
5321 emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
d6126f8b 5322 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
b7d3a6a6
MM
5323 DONE;
5324})
5325
5326;; Swap all bytes in each 64-bit element
d6126f8b
JJ
5327(define_insn "p9_xxbrd_v2di"
5328 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5329 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
b7d3a6a6
MM
5330 "TARGET_P9_VECTOR"
5331 "xxbrd %x0,%x1"
5332 [(set_attr "type" "vecperm")])
5333
d6126f8b
JJ
5334(define_expand "p9_xxbrd_v2df"
5335 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5336 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5337 "TARGET_P9_VECTOR"
5338{
5339 rtx op0 = gen_reg_rtx (V2DImode);
5340 rtx op1 = gen_lowpart (V2DImode, operands[1]);
5341 emit_insn (gen_p9_xxbrd_v2di (op0, op1));
5342 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
5343 DONE;
5344})
5345
b7d3a6a6 5346;; Swap all bytes in each 32-bit element
d6126f8b
JJ
5347(define_insn "p9_xxbrw_v4si"
5348 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
5349 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
b7d3a6a6
MM
5350 "TARGET_P9_VECTOR"
5351 "xxbrw %x0,%x1"
5352 [(set_attr "type" "vecperm")])
5353
d6126f8b
JJ
5354(define_expand "p9_xxbrw_v4sf"
5355 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
5356 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
5357 "TARGET_P9_VECTOR"
5358{
5359 rtx op0 = gen_reg_rtx (V4SImode);
5360 rtx op1 = gen_lowpart (V4SImode, operands[1]);
5361 emit_insn (gen_p9_xxbrw_v4si (op0, op1));
5362 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
5363 DONE;
5364})
5365
fc504349
CL
5366;; Swap all bytes in each element of vector
5367(define_expand "revb_<mode>"
d6126f8b
JJ
5368 [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
5369 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
fc504349
CL
5370 ""
5371{
5372 if (TARGET_P9_VECTOR)
5373 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
5374 else
5375 {
5376 /* Want to have the elements in reverse order relative
5377 to the endian mode in use, i.e. in LE mode, put elements
5378 in BE order. */
5379 rtx sel = swap_endian_selector_for_mode(<MODE>mode);
5380 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
5381 operands[1], sel));
5382 }
5383
5384 DONE;
5385})
5386
5387;; Reversing bytes in vector char is just a NOP.
5388(define_expand "revb_v16qi"
5389 [(set (match_operand:V16QI 0 "vsx_register_operand")
5390 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
5391 ""
5392{
5393 emit_move_insn (operands[0], operands[1]);
5394 DONE;
5395})
5396
b7d3a6a6
MM
5397;; Swap all bytes in each 16-bit element
5398(define_insn "p9_xxbrh_v8hi"
5399 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
5400 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
5401 "TARGET_P9_VECTOR"
5402 "xxbrh %x0,%x1"
5403 [(set_attr "type" "vecperm")])
5404\f
fba4b861
MM
5405
5406;; Operand numbers for the following peephole2
5407(define_constants
5408 [(SFBOOL_TMP_GPR 0) ;; GPR temporary
5409 (SFBOOL_TMP_VSX 1) ;; vector temporary
5410 (SFBOOL_MFVSR_D 2) ;; move to gpr dest
5411 (SFBOOL_MFVSR_A 3) ;; move to gpr src
5412 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest
5413 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1
5414 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1
5415 (SFBOOL_SHL_D 7) ;; shift left dest
5416 (SFBOOL_SHL_A 8) ;; shift left arg
5417 (SFBOOL_MTVSR_D 9) ;; move to vecter dest
7a6ed74d
MM
5418 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode
5419 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode
5420 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode
5421 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode
fba4b861
MM
5422
5423;; Attempt to optimize some common GLIBC operations using logical operations to
5424;; pick apart SFmode operations. For example, there is code from e_powf.c
5425;; after macro expansion that looks like:
5426;;
5427;; typedef union {
5428;; float value;
5429;; uint32_t word;
5430;; } ieee_float_shape_type;
5431;;
5432;; float t1;
5433;; int32_t is;
5434;;
5435;; do {
5436;; ieee_float_shape_type gf_u;
5437;; gf_u.value = (t1);
5438;; (is) = gf_u.word;
5439;; } while (0);
5440;;
5441;; do {
5442;; ieee_float_shape_type sf_u;
5443;; sf_u.word = (is & 0xfffff000);
5444;; (t1) = sf_u.value;
5445;; } while (0);
5446;;
5447;;
5448;; This would result in two direct move operations (convert to memory format,
5449;; direct move to GPR, do the AND operation, direct move to VSX, convert to
5450;; scalar format). With this peephole, we eliminate the direct move to the
5451;; GPR, and instead move the integer mask value to the vector register after a
5452;; shift and do the VSX logical operation.
5453
5454;; The insns for dealing with SFmode in GPR registers looks like:
5455;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
5456;;
5457;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
5458;;
7a6ed74d 5459;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
fba4b861 5460;;
7a6ed74d 5461;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
fba4b861 5462;;
7a6ed74d 5463;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
fba4b861 5464;;
7a6ed74d 5465;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
fba4b861
MM
5466
5467(define_peephole2
5468 [(match_scratch:DI SFBOOL_TMP_GPR "r")
5469 (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
5470
7a6ed74d 5471 ;; MFVSRWZ (aka zero_extend)
fba4b861 5472 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
7a6ed74d
MM
5473 (zero_extend:DI
5474 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
fba4b861
MM
5475
5476 ;; AND/IOR/XOR operation on int
5477 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
5478 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
5479 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
5480
5481 ;; SLDI
5482 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
5483 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
5484 (const_int 32)))
5485
5486 ;; MTVSRD
5487 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
5488 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
5489
5490 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
5491 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
5492 to compare registers, when the mode is different. */
5493 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
5494 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
5495 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D])
5496 && (REG_P (operands[SFBOOL_BOOL_A2])
5497 || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
5498 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
7a6ed74d 5499 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
fba4b861
MM
5500 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
5501 || (REG_P (operands[SFBOOL_BOOL_A2])
5502 && REGNO (operands[SFBOOL_MFVSR_D])
5503 == REGNO (operands[SFBOOL_BOOL_A2])))
5504 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
5505 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
7a6ed74d
MM
5506 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
5507 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
fba4b861
MM
5508 [(set (match_dup SFBOOL_TMP_GPR)
5509 (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
5510 (const_int 32)))
5511
5512 (set (match_dup SFBOOL_TMP_VSX_DI)
5513 (match_dup SFBOOL_TMP_GPR))
5514
5515 (set (match_dup SFBOOL_MTVSR_D_V4SF)
7a6ed74d 5516 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
fba4b861
MM
5517 (match_dup SFBOOL_TMP_VSX)))]
5518{
5519 rtx bool_a1 = operands[SFBOOL_BOOL_A1];
5520 rtx bool_a2 = operands[SFBOOL_BOOL_A2];
5521 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
7a6ed74d 5522 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
fba4b861
MM
5523 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
5524 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
5525
5526 if (CONST_INT_P (bool_a2))
5527 {
5528 rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
5529 emit_move_insn (tmp_gpr, bool_a2);
5530 operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
5531 }
5532 else
5533 {
5534 int regno_bool_a1 = REGNO (bool_a1);
5535 int regno_bool_a2 = REGNO (bool_a2);
5536 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
5537 ? regno_bool_a2 : regno_bool_a1);
5538 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
5539 }
5540
7a6ed74d 5541 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
fba4b861
MM
5542 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
5543 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
5544})
This page took 3.74239 seconds and 5 git commands to generate.