]>
Commit | Line | Data |
---|---|---|
29e6733c | 1 | ;; VSX patterns. |
99dee823 | 2 | ;; Copyright (C) 2009-2021 Free Software Foundation, Inc. |
29e6733c MM |
3 | ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> |
4 | ||
5 | ;; This file is part of GCC. | |
6 | ||
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published | |
9 | ;; by the Free Software Foundation; either version 3, or (at your | |
10 | ;; option) any later version. | |
11 | ||
12 | ;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | ;; License for more details. | |
16 | ||
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
e9e6d4f6 KN |
21 | ;; Iterator for comparison types |
22 | (define_code_iterator CMP_TEST [eq lt gt unordered]) | |
23 | ||
394a527f CL |
24 | ;; Mode attribute for vector floate and floato conversions |
25 | (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")]) | |
26 | ||
29e6733c MM |
27 | ;; Iterator for both scalar and vector floating point types supported by VSX |
28 | (define_mode_iterator VSX_B [DF V4SF V2DF]) | |
29 | ||
30 | ;; Iterator for the 2 64-bit vector types | |
31 | (define_mode_iterator VSX_D [V2DF V2DI]) | |
32 | ||
c477a667 MM |
33 | ;; Mode iterator to handle swapping words on little endian for the 128-bit |
34 | ;; types that goes in a single vector register. | |
35 | (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)") | |
9393bc31 | 36 | (TF "FLOAT128_VECTOR_P (TFmode)") |
4a89b7e7 | 37 | TI |
6579b156 | 38 | V1TI]) |
c477a667 | 39 | |
29e6733c MM |
40 | ;; Iterator for the 2 32-bit vector types |
41 | (define_mode_iterator VSX_W [V4SF V4SI]) | |
42 | ||
688e4919 MM |
43 | ;; Iterator for the DF types |
44 | (define_mode_iterator VSX_DF [V2DF DF]) | |
45 | ||
29e6733c MM |
46 | ;; Iterator for vector floating point types supported by VSX |
47 | (define_mode_iterator VSX_F [V4SF V2DF]) | |
48 | ||
49 | ;; Iterator for logical types supported by VSX | |
c477a667 MM |
50 | (define_mode_iterator VSX_L [V16QI |
51 | V8HI | |
52 | V4SI | |
53 | V2DI | |
54 | V4SF | |
55 | V2DF | |
56 | V1TI | |
57 | TI | |
58 | (KF "FLOAT128_VECTOR_P (KFmode)") | |
711c065c | 59 | (TF "FLOAT128_VECTOR_P (TFmode)")]) |
29e6733c | 60 | |
50c78b9a | 61 | ;; Iterator for memory moves. |
c477a667 MM |
62 | (define_mode_iterator VSX_M [V16QI |
63 | V8HI | |
64 | V4SI | |
65 | V2DI | |
66 | V4SF | |
67 | V2DF | |
68 | V1TI | |
69 | (KF "FLOAT128_VECTOR_P (KFmode)") | |
50c78b9a | 70 | (TF "FLOAT128_VECTOR_P (TFmode)") |
4a89b7e7 | 71 | TI]) |
d86e633a | 72 | |
fc504349 CL |
73 | (define_mode_attr VSX_XXBR [(V8HI "h") |
74 | (V4SI "w") | |
75 | (V4SF "w") | |
76 | (V2DF "d") | |
77 | (V2DI "d") | |
78 | (V1TI "q")]) | |
79 | ||
29e6733c MM |
80 | ;; Map into the appropriate load/store name based on the type |
81 | (define_mode_attr VSm [(V16QI "vw4") | |
82 | (V8HI "vw4") | |
83 | (V4SI "vw4") | |
84 | (V4SF "vw4") | |
85 | (V2DF "vd2") | |
86 | (V2DI "vd2") | |
87 | (DF "d") | |
c477a667 MM |
88 | (TF "vd2") |
89 | (KF "vd2") | |
a16a872d | 90 | (V1TI "vd2") |
c6d5ff83 | 91 | (TI "vd2")]) |
29e6733c | 92 | |
29e6733c MM |
93 | ;; Map the register class used |
94 | (define_mode_attr VSr [(V16QI "v") | |
95 | (V8HI "v") | |
96 | (V4SI "v") | |
8d3620ba | 97 | (V4SF "wa") |
85949949 SB |
98 | (V2DI "wa") |
99 | (V2DF "wa") | |
e670418f | 100 | (DI "wa") |
cc998fd5 | 101 | (DF "wa") |
72e3386e | 102 | (SF "wa") |
cb152d12 SB |
103 | (TF "wa") |
104 | (KF "wa") | |
a16a872d | 105 | (V1TI "v") |
e670418f | 106 | (TI "wa")]) |
29e6733c | 107 | |
cb152d12 SB |
108 | ;; What value we need in the "isa" field, to make the IEEE QP float work. |
109 | (define_mode_attr VSisa [(V16QI "*") | |
110 | (V8HI "*") | |
111 | (V4SI "*") | |
112 | (V4SF "*") | |
113 | (V2DI "*") | |
114 | (V2DF "*") | |
115 | (DI "*") | |
116 | (DF "*") | |
117 | (SF "*") | |
118 | (V1TI "*") | |
119 | (TI "*") | |
120 | (TF "p9tf") | |
121 | (KF "p9kf")]) | |
59f5868d | 122 | |
c3217088 PB |
123 | ;; A mode attribute to disparage use of GPR registers, except for scalar |
124 | ;; integer modes. | |
125 | (define_mode_attr ??r [(V16QI "??r") | |
126 | (V8HI "??r") | |
127 | (V4SI "??r") | |
128 | (V4SF "??r") | |
129 | (V2DI "??r") | |
130 | (V2DF "??r") | |
131 | (V1TI "??r") | |
132 | (KF "??r") | |
133 | (TF "??r") | |
134 | (TI "r")]) | |
135 | ||
00fd0628 PB |
136 | ;; A mode attribute used for 128-bit constant values. |
137 | (define_mode_attr nW [(V16QI "W") | |
138 | (V8HI "W") | |
139 | (V4SI "W") | |
140 | (V4SF "W") | |
141 | (V2DI "W") | |
142 | (V2DF "W") | |
143 | (V1TI "W") | |
144 | (KF "W") | |
145 | (TF "W") | |
146 | (TI "n")]) | |
147 | ||
29e6733c MM |
148 | ;; Same size integer type for floating point data |
149 | (define_mode_attr VSi [(V4SF "v4si") | |
150 | (V2DF "v2di") | |
151 | (DF "di")]) | |
152 | ||
153 | (define_mode_attr VSI [(V4SF "V4SI") | |
154 | (V2DF "V2DI") | |
155 | (DF "DI")]) | |
156 | ||
157 | ;; Word size for same size conversion | |
158 | (define_mode_attr VSc [(V4SF "w") | |
159 | (V2DF "d") | |
160 | (DF "d")]) | |
161 | ||
29e6733c MM |
162 | ;; Map into either s or v, depending on whether this is a scalar or vector |
163 | ;; operation | |
164 | (define_mode_attr VSv [(V16QI "v") | |
165 | (V8HI "v") | |
166 | (V4SI "v") | |
167 | (V4SF "v") | |
168 | (V2DI "v") | |
169 | (V2DF "v") | |
a16a872d | 170 | (V1TI "v") |
c477a667 MM |
171 | (DF "s") |
172 | (KF "v")]) | |
29e6733c MM |
173 | |
174 | ;; Appropriate type for add ops (and other simple FP ops) | |
4356b75d | 175 | (define_mode_attr VStype_simple [(V2DF "vecdouble") |
29e6733c MM |
176 | (V4SF "vecfloat") |
177 | (DF "fp")]) | |
178 | ||
29e6733c | 179 | ;; Appropriate type for multiply ops |
4356b75d | 180 | (define_mode_attr VStype_mul [(V2DF "vecdouble") |
29e6733c MM |
181 | (V4SF "vecfloat") |
182 | (DF "dmul")]) | |
183 | ||
4356b75d PH |
184 | ;; Appropriate type for divide ops. |
185 | (define_mode_attr VStype_div [(V2DF "vecdiv") | |
186 | (V4SF "vecfdiv") | |
29e6733c MM |
187 | (DF "ddiv")]) |
188 | ||
29e6733c | 189 | ;; Map the scalar mode for a vector type |
a16a872d MM |
190 | (define_mode_attr VS_scalar [(V1TI "TI") |
191 | (V2DF "DF") | |
29e6733c MM |
192 | (V2DI "DI") |
193 | (V4SF "SF") | |
194 | (V4SI "SI") | |
195 | (V8HI "HI") | |
196 | (V16QI "QI")]) | |
5aebfdad RH |
197 | |
198 | ;; Map to a double-sized vector mode | |
199 | (define_mode_attr VS_double [(V4SI "V8SI") | |
200 | (V4SF "V8SF") | |
201 | (V2DI "V4DI") | |
a16a872d MM |
202 | (V2DF "V4DF") |
203 | (V1TI "V2TI")]) | |
5aebfdad | 204 | |
50c78b9a | 205 | ;; Iterators for loading constants with xxspltib |
787c7a65 | 206 | (define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) |
50c78b9a MM |
207 | (define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) |
208 | ||
fc504349 CL |
209 | ;; Vector reverse byte modes |
210 | (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI]) | |
211 | ||
787c7a65 MM |
212 | ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. |
213 | ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be | |
214 | ;; done on ISA 2.07 and not just ISA 3.0. | |
215 | (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI]) | |
216 | (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI]) | |
b8eaa754 | 217 | (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI]) |
c5e74d9d | 218 | |
902cb7b1 KN |
219 | (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b") |
220 | (V8HI "h") | |
221 | (V4SI "w")]) | |
222 | ||
c5e74d9d MM |
223 | ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and |
224 | ;; insert to validate the operand number. | |
225 | (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand") | |
226 | (V8HI "const_0_to_7_operand") | |
227 | (V4SI "const_0_to_3_operand")]) | |
228 | ||
229 | ;; Mode attribute to give the constraint for vector extract and insert | |
230 | ;; operations. | |
231 | (define_mode_attr VSX_EX [(V16QI "v") | |
232 | (V8HI "v") | |
233 | (V4SI "wa")]) | |
234 | ||
156b5cca MM |
235 | ;; Mode iterator for binary floating types other than double to |
236 | ;; optimize convert to that floating point type from an extract | |
237 | ;; of an integer type | |
238 | (define_mode_iterator VSX_EXTRACT_FL [SF | |
239 | (IF "FLOAT128_2REG_P (IFmode)") | |
240 | (KF "TARGET_FLOAT128_HW") | |
241 | (TF "FLOAT128_2REG_P (TFmode) | |
242 | || (FLOAT128_IEEE_P (TFmode) | |
243 | && TARGET_FLOAT128_HW)")]) | |
244 | ||
16370e79 MM |
245 | ;; Mode iterator for binary floating types that have a direct conversion |
246 | ;; from 64-bit integer to floating point | |
247 | (define_mode_iterator FL_CONV [SF | |
248 | DF | |
249 | (KF "TARGET_FLOAT128_HW") | |
250 | (TF "TARGET_FLOAT128_HW | |
251 | && FLOAT128_IEEE_P (TFmode)")]) | |
252 | ||
6019c0fc MM |
253 | ;; Iterator for the 2 short vector types to do a splat from an integer |
254 | (define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) | |
255 | ||
256 | ;; Mode attribute to give the count for the splat instruction to splat | |
257 | ;; the value in the 64-bit integer slot | |
258 | (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")]) | |
259 | ||
260 | ;; Mode attribute to give the suffix for the splat instruction | |
261 | (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")]) | |
262 | ||
02ef74ba CL |
263 | ;; Iterator for the move to mask instructions |
264 | (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI]) | |
265 | (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI]) | |
266 | ||
f1ad419e CL |
267 | ;; Longer vec int modes for rotate/mask ops |
268 | ;; and Vector Integer Multiply/Divide/Modulo Instructions | |
269 | (define_mode_iterator VIlong [V2DI V4SI]) | |
270 | ||
29e6733c | 271 | ;; Constants for creating unspecs |
f3c33d9d MM |
272 | (define_c_enum "unspec" |
273 | [UNSPEC_VSX_CONCAT | |
274 | UNSPEC_VSX_CVDPSXWS | |
275 | UNSPEC_VSX_CVDPUXWS | |
276 | UNSPEC_VSX_CVSPDP | |
26bca0ed | 277 | UNSPEC_VSX_CVHPSP |
0bd62dca MM |
278 | UNSPEC_VSX_CVSPDPN |
279 | UNSPEC_VSX_CVDPSPN | |
f3c33d9d MM |
280 | UNSPEC_VSX_CVSXWDP |
281 | UNSPEC_VSX_CVUXWDP | |
282 | UNSPEC_VSX_CVSXDSP | |
283 | UNSPEC_VSX_CVUXDSP | |
be1418c7 CL |
284 | UNSPEC_VSX_FLOAT2 |
285 | UNSPEC_VSX_UNS_FLOAT2 | |
286 | UNSPEC_VSX_FLOATE | |
287 | UNSPEC_VSX_UNS_FLOATE | |
288 | UNSPEC_VSX_FLOATO | |
289 | UNSPEC_VSX_UNS_FLOATO | |
f3c33d9d MM |
290 | UNSPEC_VSX_TDIV |
291 | UNSPEC_VSX_TSQRT | |
f3c33d9d MM |
292 | UNSPEC_VSX_SET |
293 | UNSPEC_VSX_ROUND_I | |
294 | UNSPEC_VSX_ROUND_IC | |
295 | UNSPEC_VSX_SLDWI | |
26bca0ed CL |
296 | UNSPEC_VSX_XXPERM |
297 | ||
bf53d4b8 | 298 | UNSPEC_VSX_XXSPLTW |
2ccdda19 BS |
299 | UNSPEC_VSX_XXSPLTD |
300 | UNSPEC_VSX_DIVSD | |
301 | UNSPEC_VSX_DIVUD | |
f03122f2 CL |
302 | UNSPEC_VSX_DIVSQ |
303 | UNSPEC_VSX_DIVUQ | |
304 | UNSPEC_VSX_DIVESQ | |
305 | UNSPEC_VSX_DIVEUQ | |
306 | UNSPEC_VSX_MODSQ | |
307 | UNSPEC_VSX_MODUQ | |
2ccdda19 | 308 | UNSPEC_VSX_MULSD |
50c78b9a | 309 | UNSPEC_VSX_SIGN_EXTEND |
94bedeaf | 310 | UNSPEC_VSX_XVCVBF16SPN |
8ee2640b | 311 | UNSPEC_VSX_XVCVSPBF16 |
e5898daf | 312 | UNSPEC_VSX_XVCVSPSXDS |
58b475a2 | 313 | UNSPEC_VSX_XVCVSPHP |
e0d32185 MM |
314 | UNSPEC_VSX_VSLO |
315 | UNSPEC_VSX_EXTRACT | |
e9e6d4f6 | 316 | UNSPEC_VSX_SXEXPDP |
b70bb05b | 317 | UNSPEC_VSX_SXSIG |
e9e6d4f6 | 318 | UNSPEC_VSX_SIEXPDP |
b70bb05b | 319 | UNSPEC_VSX_SIEXPQP |
e9e6d4f6 | 320 | UNSPEC_VSX_SCMPEXPDP |
fc756f9f | 321 | UNSPEC_VSX_SCMPEXPQP |
e9e6d4f6 | 322 | UNSPEC_VSX_STSTDC |
26bca0ed CL |
323 | UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH |
324 | UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL | |
e9e6d4f6 KN |
325 | UNSPEC_VSX_VXEXP |
326 | UNSPEC_VSX_VXSIG | |
327 | UNSPEC_VSX_VIEXP | |
328 | UNSPEC_VSX_VTSTDC | |
e5898daf | 329 | UNSPEC_VSX_VSIGNED2 |
1262c6cf | 330 | |
902cb7b1 | 331 | UNSPEC_LXVL |
1262c6cf CL |
332 | UNSPEC_LXVLL |
333 | UNSPEC_LVSL_REG | |
334 | UNSPEC_LVSR_REG | |
902cb7b1 | 335 | UNSPEC_STXVL |
1262c6cf CL |
336 | UNSPEC_STXVLL |
337 | UNSPEC_XL_LEN_R | |
338 | UNSPEC_XST_LEN_R | |
339 | ||
902cb7b1 KN |
340 | UNSPEC_VCLZLSBB |
341 | UNSPEC_VCTZLSBB | |
342 | UNSPEC_VEXTUBLX | |
343 | UNSPEC_VEXTUHLX | |
344 | UNSPEC_VEXTUWLX | |
345 | UNSPEC_VEXTUBRX | |
346 | UNSPEC_VEXTUHRX | |
347 | UNSPEC_VEXTUWRX | |
348 | UNSPEC_VCMPNEB | |
349 | UNSPEC_VCMPNEZB | |
350 | UNSPEC_VCMPNEH | |
351 | UNSPEC_VCMPNEZH | |
352 | UNSPEC_VCMPNEW | |
353 | UNSPEC_VCMPNEZW | |
16370e79 MM |
354 | UNSPEC_XXEXTRACTUW |
355 | UNSPEC_XXINSERTW | |
4d85d480 CL |
356 | UNSPEC_VSX_FIRST_MATCH_INDEX |
357 | UNSPEC_VSX_FIRST_MATCH_EOS_INDEX | |
358 | UNSPEC_VSX_FIRST_MISMATCH_INDEX | |
359 | UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX | |
b8eaa754 | 360 | UNSPEC_XXGENPCV |
02ef74ba | 361 | UNSPEC_MTVSBM |
87325119 WS |
362 | UNSPEC_EXTENDDITI2 |
363 | UNSPEC_MTVSRD_DITI_W1 | |
02ef74ba CL |
364 | UNSPEC_VCNTMB |
365 | UNSPEC_VEXPAND | |
366 | UNSPEC_VEXTRACT | |
30d02149 CL |
367 | UNSPEC_EXTRACTL |
368 | UNSPEC_EXTRACTR | |
530e9095 CL |
369 | UNSPEC_INSERTL |
370 | UNSPEC_INSERTR | |
3f029aea CL |
371 | UNSPEC_REPLACE_ELT |
372 | UNSPEC_REPLACE_UN | |
f1ad419e CL |
373 | UNSPEC_VDIVES |
374 | UNSPEC_VDIVEU | |
d2883be3 MM |
375 | UNSPEC_XXEVAL |
376 | UNSPEC_XXSPLTIW | |
5973dac9 | 377 | UNSPEC_XXSPLTIDP |
d2883be3 MM |
378 | UNSPEC_XXSPLTI32DX |
379 | UNSPEC_XXBLEND | |
380 | UNSPEC_XXPERMX | |
f3c33d9d | 381 | ]) |
29e6733c | 382 | |
8ee2640b | 383 | (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16 |
94bedeaf | 384 | UNSPEC_VSX_XVCVBF16SPN]) |
8ee2640b PB |
385 | |
386 | (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16") | |
94bedeaf | 387 | (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")]) |
8ee2640b | 388 | |
30d02149 CL |
389 | ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops |
390 | (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) | |
391 | ||
3f029aea CL |
392 | ;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements |
393 | (define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF]) | |
394 | (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w") | |
395 | (V2DI "d") (V2DF "d")]) | |
396 | (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2") | |
397 | (V2DI "3") (V2DF "3")]) | |
398 | (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12") | |
399 | (V2DI "8") (V2DF "8")]) | |
400 | ||
d2883be3 MM |
401 | ;; Like VM2 in altivec.md, just do char, short, int, long, float and double |
402 | (define_mode_iterator VM3 [V4SI | |
403 | V8HI | |
404 | V16QI | |
405 | V4SF | |
406 | V2DF | |
407 | V2DI]) | |
408 | ||
409 | (define_mode_attr VM3_char [(V2DI "d") | |
410 | (V4SI "w") | |
411 | (V8HI "h") | |
412 | (V16QI "b") | |
413 | (V2DF "d") | |
414 | (V4SF "w")]) | |
415 | ||
416 | ||
29e6733c | 417 | ;; VSX moves |
0cf68694 BS |
418 | |
419 | ;; The patterns for LE permuted loads and stores come before the general | |
420 | ;; VSX moves so they match first. | |
6e8b7d9c | 421 | (define_insn_and_split "*vsx_le_perm_load_<mode>" |
012f609e | 422 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") |
2025a48d | 423 | (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))] |
5d57fdc1 | 424 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 425 | "#" |
a3a821c9 | 426 | "&& 1" |
0cf68694 | 427 | [(set (match_dup 2) |
6e8b7d9c | 428 | (vec_select:<MODE> |
0cf68694 BS |
429 | (match_dup 1) |
430 | (parallel [(const_int 1) (const_int 0)]))) | |
431 | (set (match_dup 0) | |
6e8b7d9c | 432 | (vec_select:<MODE> |
0cf68694 BS |
433 | (match_dup 2) |
434 | (parallel [(const_int 1) (const_int 0)])))] | |
0cf68694 | 435 | { |
a3a821c9 KN |
436 | rtx mem = operands[1]; |
437 | ||
438 | /* Don't apply the swap optimization if we've already performed register | |
439 | allocation and the hard register destination is not in the altivec | |
440 | range. */ | |
441 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f | 442 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0])) |
a3a821c9 KN |
443 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[0])))) |
444 | { | |
445 | rtx mem_address = XEXP (mem, 0); | |
446 | enum machine_mode mode = GET_MODE (mem); | |
447 | ||
448 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
449 | { | |
450 | /* Replace the source memory address with masked address. */ | |
451 | rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); | |
452 | emit_insn (lvx_set_expr); | |
453 | DONE; | |
454 | } | |
455 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
456 | { | |
457 | /* This rtl is already in the form that matches lvx | |
458 | instruction, so leave it alone. */ | |
459 | DONE; | |
460 | } | |
461 | /* Otherwise, fall through to transform into a swapping load. */ | |
462 | } | |
0cf68694 BS |
463 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) |
464 | : operands[0]; | |
465 | } | |
0cf68694 BS |
466 | [(set_attr "type" "vecload") |
467 | (set_attr "length" "8")]) | |
468 | ||
6e8b7d9c | 469 | (define_insn_and_split "*vsx_le_perm_load_<mode>" |
7858932e | 470 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
2025a48d | 471 | (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))] |
5d57fdc1 | 472 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 473 | "#" |
a3a821c9 | 474 | "&& 1" |
0cf68694 | 475 | [(set (match_dup 2) |
6e8b7d9c | 476 | (vec_select:<MODE> |
0cf68694 BS |
477 | (match_dup 1) |
478 | (parallel [(const_int 2) (const_int 3) | |
479 | (const_int 0) (const_int 1)]))) | |
480 | (set (match_dup 0) | |
6e8b7d9c | 481 | (vec_select:<MODE> |
0cf68694 BS |
482 | (match_dup 2) |
483 | (parallel [(const_int 2) (const_int 3) | |
484 | (const_int 0) (const_int 1)])))] | |
0cf68694 | 485 | { |
a3a821c9 KN |
486 | rtx mem = operands[1]; |
487 | ||
488 | /* Don't apply the swap optimization if we've already performed register | |
489 | allocation and the hard register destination is not in the altivec | |
490 | range. */ | |
491 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f | 492 | && (!HARD_REGISTER_P (operands[0]) |
a3a821c9 KN |
493 | || ALTIVEC_REGNO_P (REGNO(operands[0])))) |
494 | { | |
495 | rtx mem_address = XEXP (mem, 0); | |
496 | enum machine_mode mode = GET_MODE (mem); | |
497 | ||
498 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
499 | { | |
500 | /* Replace the source memory address with masked address. */ | |
501 | rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); | |
502 | emit_insn (lvx_set_expr); | |
503 | DONE; | |
504 | } | |
505 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
506 | { | |
507 | /* This rtl is already in the form that matches lvx | |
508 | instruction, so leave it alone. */ | |
509 | DONE; | |
510 | } | |
511 | /* Otherwise, fall through to transform into a swapping load. */ | |
512 | } | |
0cf68694 BS |
513 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) |
514 | : operands[0]; | |
515 | } | |
0cf68694 BS |
516 | [(set_attr "type" "vecload") |
517 | (set_attr "length" "8")]) | |
518 | ||
519 | (define_insn_and_split "*vsx_le_perm_load_v8hi" | |
520 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
2025a48d | 521 | (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] |
5d57fdc1 | 522 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 523 | "#" |
a3a821c9 | 524 | "&& 1" |
0cf68694 BS |
525 | [(set (match_dup 2) |
526 | (vec_select:V8HI | |
527 | (match_dup 1) | |
528 | (parallel [(const_int 4) (const_int 5) | |
529 | (const_int 6) (const_int 7) | |
530 | (const_int 0) (const_int 1) | |
531 | (const_int 2) (const_int 3)]))) | |
532 | (set (match_dup 0) | |
533 | (vec_select:V8HI | |
534 | (match_dup 2) | |
535 | (parallel [(const_int 4) (const_int 5) | |
536 | (const_int 6) (const_int 7) | |
537 | (const_int 0) (const_int 1) | |
538 | (const_int 2) (const_int 3)])))] | |
0cf68694 | 539 | { |
a3a821c9 KN |
540 | rtx mem = operands[1]; |
541 | ||
542 | /* Don't apply the swap optimization if we've already performed register | |
543 | allocation and the hard register destination is not in the altivec | |
544 | range. */ | |
545 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f | 546 | && (!HARD_REGISTER_P (operands[0]) |
a3a821c9 KN |
547 | || ALTIVEC_REGNO_P (REGNO(operands[0])))) |
548 | { | |
549 | rtx mem_address = XEXP (mem, 0); | |
550 | enum machine_mode mode = GET_MODE (mem); | |
551 | ||
552 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
553 | { | |
554 | /* Replace the source memory address with masked address. */ | |
555 | rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); | |
556 | emit_insn (lvx_set_expr); | |
557 | DONE; | |
558 | } | |
559 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
560 | { | |
561 | /* This rtl is already in the form that matches lvx | |
562 | instruction, so leave it alone. */ | |
563 | DONE; | |
564 | } | |
565 | /* Otherwise, fall through to transform into a swapping load. */ | |
566 | } | |
0cf68694 BS |
567 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) |
568 | : operands[0]; | |
569 | } | |
0cf68694 BS |
570 | [(set_attr "type" "vecload") |
571 | (set_attr "length" "8")]) | |
572 | ||
573 | (define_insn_and_split "*vsx_le_perm_load_v16qi" | |
574 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
2025a48d | 575 | (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))] |
5d57fdc1 | 576 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 577 | "#" |
a3a821c9 | 578 | "&& 1" |
0cf68694 BS |
579 | [(set (match_dup 2) |
580 | (vec_select:V16QI | |
581 | (match_dup 1) | |
582 | (parallel [(const_int 8) (const_int 9) | |
583 | (const_int 10) (const_int 11) | |
584 | (const_int 12) (const_int 13) | |
585 | (const_int 14) (const_int 15) | |
586 | (const_int 0) (const_int 1) | |
587 | (const_int 2) (const_int 3) | |
588 | (const_int 4) (const_int 5) | |
589 | (const_int 6) (const_int 7)]))) | |
590 | (set (match_dup 0) | |
591 | (vec_select:V16QI | |
592 | (match_dup 2) | |
593 | (parallel [(const_int 8) (const_int 9) | |
594 | (const_int 10) (const_int 11) | |
595 | (const_int 12) (const_int 13) | |
596 | (const_int 14) (const_int 15) | |
597 | (const_int 0) (const_int 1) | |
598 | (const_int 2) (const_int 3) | |
599 | (const_int 4) (const_int 5) | |
600 | (const_int 6) (const_int 7)])))] | |
0cf68694 | 601 | { |
a3a821c9 KN |
602 | rtx mem = operands[1]; |
603 | ||
604 | /* Don't apply the swap optimization if we've already performed register | |
605 | allocation and the hard register destination is not in the altivec | |
606 | range. */ | |
607 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f | 608 | && (!HARD_REGISTER_P (operands[0]) |
a3a821c9 KN |
609 | || ALTIVEC_REGNO_P (REGNO(operands[0])))) |
610 | { | |
611 | rtx mem_address = XEXP (mem, 0); | |
612 | enum machine_mode mode = GET_MODE (mem); | |
613 | ||
614 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
615 | { | |
616 | /* Replace the source memory address with masked address. */ | |
617 | rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); | |
618 | emit_insn (lvx_set_expr); | |
619 | DONE; | |
620 | } | |
621 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
622 | { | |
623 | /* This rtl is already in the form that matches lvx | |
624 | instruction, so leave it alone. */ | |
625 | DONE; | |
626 | } | |
627 | /* Otherwise, fall through to transform into a swapping load. */ | |
628 | } | |
0cf68694 BS |
629 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) |
630 | : operands[0]; | |
631 | } | |
0cf68694 BS |
632 | [(set_attr "type" "vecload") |
633 | (set_attr "length" "8")]) | |
634 | ||
411f1755 | 635 | (define_insn "*vsx_le_perm_store_<mode>" |
2025a48d | 636 | [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z") |
012f609e | 637 | (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] |
5d57fdc1 | 638 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 639 | "#" |
411f1755 BS |
640 | [(set_attr "type" "vecstore") |
641 | (set_attr "length" "12")]) | |
642 | ||
643 | (define_split | |
ad18eed2 SB |
644 | [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") |
645 | (match_operand:VSX_D 1 "vsx_register_operand"))] | |
5d57fdc1 | 646 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" |
0cf68694 | 647 | [(set (match_dup 2) |
6e8b7d9c | 648 | (vec_select:<MODE> |
0cf68694 BS |
649 | (match_dup 1) |
650 | (parallel [(const_int 1) (const_int 0)]))) | |
651 | (set (match_dup 0) | |
6e8b7d9c | 652 | (vec_select:<MODE> |
0cf68694 BS |
653 | (match_dup 2) |
654 | (parallel [(const_int 1) (const_int 0)])))] | |
0cf68694 | 655 | { |
a3a821c9 KN |
656 | rtx mem = operands[0]; |
657 | ||
658 | /* Don't apply the swap optimization if we've already performed register | |
659 | allocation and the hard register source is not in the altivec range. */ | |
660 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f PB |
661 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) |
662 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) | |
a3a821c9 KN |
663 | { |
664 | rtx mem_address = XEXP (mem, 0); | |
665 | enum machine_mode mode = GET_MODE (mem); | |
666 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
667 | { | |
668 | rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); | |
669 | emit_insn (stvx_set_expr); | |
670 | DONE; | |
671 | } | |
672 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
673 | { | |
674 | /* This rtl is already in the form that matches stvx instruction, | |
675 | so leave it alone. */ | |
676 | DONE; | |
677 | } | |
678 | /* Otherwise, fall through to transform into a swapping store. */ | |
679 | } | |
680 | ||
0cf68694 BS |
681 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) |
682 | : operands[1]; | |
411f1755 BS |
683 | }) |
684 | ||
685 | ;; The post-reload split requires that we re-permute the source | |
686 | ;; register in case it is still live. | |
687 | (define_split | |
ad18eed2 SB |
688 | [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") |
689 | (match_operand:VSX_D 1 "vsx_register_operand"))] | |
5d57fdc1 | 690 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" |
411f1755 BS |
691 | [(set (match_dup 1) |
692 | (vec_select:<MODE> | |
693 | (match_dup 1) | |
694 | (parallel [(const_int 1) (const_int 0)]))) | |
695 | (set (match_dup 0) | |
696 | (vec_select:<MODE> | |
697 | (match_dup 1) | |
698 | (parallel [(const_int 1) (const_int 0)]))) | |
699 | (set (match_dup 1) | |
700 | (vec_select:<MODE> | |
701 | (match_dup 1) | |
702 | (parallel [(const_int 1) (const_int 0)])))] | |
703 | "") | |
0cf68694 | 704 | |
411f1755 | 705 | (define_insn "*vsx_le_perm_store_<mode>" |
2025a48d | 706 | [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z") |
7858932e | 707 | (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))] |
5d57fdc1 | 708 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 709 | "#" |
411f1755 BS |
710 | [(set_attr "type" "vecstore") |
711 | (set_attr "length" "12")]) | |
712 | ||
713 | (define_split | |
ad18eed2 SB |
714 | [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") |
715 | (match_operand:VSX_W 1 "vsx_register_operand"))] | |
5d57fdc1 | 716 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" |
0cf68694 | 717 | [(set (match_dup 2) |
6e8b7d9c | 718 | (vec_select:<MODE> |
0cf68694 BS |
719 | (match_dup 1) |
720 | (parallel [(const_int 2) (const_int 3) | |
721 | (const_int 0) (const_int 1)]))) | |
722 | (set (match_dup 0) | |
6e8b7d9c | 723 | (vec_select:<MODE> |
0cf68694 BS |
724 | (match_dup 2) |
725 | (parallel [(const_int 2) (const_int 3) | |
726 | (const_int 0) (const_int 1)])))] | |
0cf68694 | 727 | { |
a3a821c9 KN |
728 | rtx mem = operands[0]; |
729 | ||
730 | /* Don't apply the swap optimization if we've already performed register | |
731 | allocation and the hard register source is not in the altivec range. */ | |
732 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f PB |
733 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) |
734 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) | |
a3a821c9 KN |
735 | { |
736 | rtx mem_address = XEXP (mem, 0); | |
737 | enum machine_mode mode = GET_MODE (mem); | |
738 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
739 | { | |
740 | rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); | |
741 | emit_insn (stvx_set_expr); | |
742 | DONE; | |
743 | } | |
744 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
745 | { | |
746 | /* This rtl is already in the form that matches stvx instruction, | |
747 | so leave it alone. */ | |
748 | DONE; | |
749 | } | |
750 | /* Otherwise, fall through to transform into a swapping store. */ | |
751 | } | |
752 | ||
0cf68694 BS |
753 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) |
754 | : operands[1]; | |
411f1755 BS |
755 | }) |
756 | ||
757 | ;; The post-reload split requires that we re-permute the source | |
758 | ;; register in case it is still live. | |
759 | (define_split | |
ad18eed2 SB |
760 | [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") |
761 | (match_operand:VSX_W 1 "vsx_register_operand"))] | |
5d57fdc1 | 762 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" |
411f1755 BS |
763 | [(set (match_dup 1) |
764 | (vec_select:<MODE> | |
765 | (match_dup 1) | |
766 | (parallel [(const_int 2) (const_int 3) | |
767 | (const_int 0) (const_int 1)]))) | |
768 | (set (match_dup 0) | |
769 | (vec_select:<MODE> | |
770 | (match_dup 1) | |
771 | (parallel [(const_int 2) (const_int 3) | |
772 | (const_int 0) (const_int 1)]))) | |
773 | (set (match_dup 1) | |
774 | (vec_select:<MODE> | |
775 | (match_dup 1) | |
776 | (parallel [(const_int 2) (const_int 3) | |
777 | (const_int 0) (const_int 1)])))] | |
778 | "") | |
0cf68694 | 779 | |
411f1755 | 780 | (define_insn "*vsx_le_perm_store_v8hi" |
2025a48d | 781 | [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") |
0cf68694 | 782 | (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] |
5d57fdc1 | 783 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 784 | "#" |
411f1755 BS |
785 | [(set_attr "type" "vecstore") |
786 | (set_attr "length" "12")]) | |
787 | ||
788 | (define_split | |
ad18eed2 SB |
789 | [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") |
790 | (match_operand:V8HI 1 "vsx_register_operand"))] | |
5d57fdc1 | 791 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" |
0cf68694 BS |
792 | [(set (match_dup 2) |
793 | (vec_select:V8HI | |
794 | (match_dup 1) | |
795 | (parallel [(const_int 4) (const_int 5) | |
796 | (const_int 6) (const_int 7) | |
797 | (const_int 0) (const_int 1) | |
798 | (const_int 2) (const_int 3)]))) | |
799 | (set (match_dup 0) | |
800 | (vec_select:V8HI | |
801 | (match_dup 2) | |
802 | (parallel [(const_int 4) (const_int 5) | |
803 | (const_int 6) (const_int 7) | |
804 | (const_int 0) (const_int 1) | |
805 | (const_int 2) (const_int 3)])))] | |
0cf68694 | 806 | { |
a3a821c9 KN |
807 | rtx mem = operands[0]; |
808 | ||
809 | /* Don't apply the swap optimization if we've already performed register | |
810 | allocation and the hard register source is not in the altivec range. */ | |
811 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f PB |
812 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) |
813 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) | |
a3a821c9 KN |
814 | { |
815 | rtx mem_address = XEXP (mem, 0); | |
816 | enum machine_mode mode = GET_MODE (mem); | |
817 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
818 | { | |
819 | rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); | |
820 | emit_insn (stvx_set_expr); | |
821 | DONE; | |
822 | } | |
823 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
824 | { | |
825 | /* This rtl is already in the form that matches stvx instruction, | |
826 | so leave it alone. */ | |
827 | DONE; | |
828 | } | |
829 | /* Otherwise, fall through to transform into a swapping store. */ | |
830 | } | |
831 | ||
0cf68694 BS |
832 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) |
833 | : operands[1]; | |
411f1755 BS |
834 | }) |
835 | ||
836 | ;; The post-reload split requires that we re-permute the source | |
837 | ;; register in case it is still live. | |
838 | (define_split | |
ad18eed2 SB |
839 | [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") |
840 | (match_operand:V8HI 1 "vsx_register_operand"))] | |
5d57fdc1 | 841 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" |
411f1755 BS |
842 | [(set (match_dup 1) |
843 | (vec_select:V8HI | |
844 | (match_dup 1) | |
845 | (parallel [(const_int 4) (const_int 5) | |
846 | (const_int 6) (const_int 7) | |
847 | (const_int 0) (const_int 1) | |
848 | (const_int 2) (const_int 3)]))) | |
849 | (set (match_dup 0) | |
850 | (vec_select:V8HI | |
851 | (match_dup 1) | |
852 | (parallel [(const_int 4) (const_int 5) | |
853 | (const_int 6) (const_int 7) | |
854 | (const_int 0) (const_int 1) | |
855 | (const_int 2) (const_int 3)]))) | |
856 | (set (match_dup 1) | |
857 | (vec_select:V8HI | |
858 | (match_dup 1) | |
859 | (parallel [(const_int 4) (const_int 5) | |
860 | (const_int 6) (const_int 7) | |
861 | (const_int 0) (const_int 1) | |
862 | (const_int 2) (const_int 3)])))] | |
863 | "") | |
0cf68694 | 864 | |
411f1755 | 865 | (define_insn "*vsx_le_perm_store_v16qi" |
2025a48d | 866 | [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z") |
0cf68694 | 867 | (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] |
5d57fdc1 | 868 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 869 | "#" |
411f1755 BS |
870 | [(set_attr "type" "vecstore") |
871 | (set_attr "length" "12")]) | |
872 | ||
873 | (define_split | |
ad18eed2 SB |
874 | [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") |
875 | (match_operand:V16QI 1 "vsx_register_operand"))] | |
5d57fdc1 | 876 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" |
0cf68694 BS |
877 | [(set (match_dup 2) |
878 | (vec_select:V16QI | |
879 | (match_dup 1) | |
880 | (parallel [(const_int 8) (const_int 9) | |
881 | (const_int 10) (const_int 11) | |
882 | (const_int 12) (const_int 13) | |
883 | (const_int 14) (const_int 15) | |
884 | (const_int 0) (const_int 1) | |
885 | (const_int 2) (const_int 3) | |
886 | (const_int 4) (const_int 5) | |
887 | (const_int 6) (const_int 7)]))) | |
888 | (set (match_dup 0) | |
889 | (vec_select:V16QI | |
890 | (match_dup 2) | |
891 | (parallel [(const_int 8) (const_int 9) | |
892 | (const_int 10) (const_int 11) | |
893 | (const_int 12) (const_int 13) | |
894 | (const_int 14) (const_int 15) | |
895 | (const_int 0) (const_int 1) | |
896 | (const_int 2) (const_int 3) | |
897 | (const_int 4) (const_int 5) | |
898 | (const_int 6) (const_int 7)])))] | |
0cf68694 | 899 | { |
a3a821c9 KN |
900 | rtx mem = operands[0]; |
901 | ||
902 | /* Don't apply the swap optimization if we've already performed register | |
903 | allocation and the hard register source is not in the altivec range. */ | |
904 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f PB |
905 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) |
906 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) | |
a3a821c9 KN |
907 | { |
908 | rtx mem_address = XEXP (mem, 0); | |
909 | enum machine_mode mode = GET_MODE (mem); | |
910 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
911 | { | |
912 | rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); | |
913 | emit_insn (stvx_set_expr); | |
914 | DONE; | |
915 | } | |
916 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
917 | { | |
918 | /* This rtl is already in the form that matches stvx instruction, | |
919 | so leave it alone. */ | |
920 | DONE; | |
921 | } | |
922 | /* Otherwise, fall through to transform into a swapping store. */ | |
923 | } | |
924 | ||
0cf68694 BS |
925 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) |
926 | : operands[1]; | |
411f1755 BS |
927 | }) |
928 | ||
929 | ;; The post-reload split requires that we re-permute the source | |
930 | ;; register in case it is still live. | |
931 | (define_split | |
ad18eed2 SB |
932 | [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") |
933 | (match_operand:V16QI 1 "vsx_register_operand"))] | |
5d57fdc1 | 934 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" |
411f1755 BS |
935 | [(set (match_dup 1) |
936 | (vec_select:V16QI | |
937 | (match_dup 1) | |
938 | (parallel [(const_int 8) (const_int 9) | |
939 | (const_int 10) (const_int 11) | |
940 | (const_int 12) (const_int 13) | |
941 | (const_int 14) (const_int 15) | |
942 | (const_int 0) (const_int 1) | |
943 | (const_int 2) (const_int 3) | |
944 | (const_int 4) (const_int 5) | |
945 | (const_int 6) (const_int 7)]))) | |
946 | (set (match_dup 0) | |
947 | (vec_select:V16QI | |
948 | (match_dup 1) | |
949 | (parallel [(const_int 8) (const_int 9) | |
950 | (const_int 10) (const_int 11) | |
951 | (const_int 12) (const_int 13) | |
952 | (const_int 14) (const_int 15) | |
953 | (const_int 0) (const_int 1) | |
954 | (const_int 2) (const_int 3) | |
955 | (const_int 4) (const_int 5) | |
956 | (const_int 6) (const_int 7)]))) | |
957 | (set (match_dup 1) | |
958 | (vec_select:V16QI | |
959 | (match_dup 1) | |
960 | (parallel [(const_int 8) (const_int 9) | |
961 | (const_int 10) (const_int 11) | |
962 | (const_int 12) (const_int 13) | |
963 | (const_int 14) (const_int 15) | |
964 | (const_int 0) (const_int 1) | |
965 | (const_int 2) (const_int 3) | |
966 | (const_int 4) (const_int 5) | |
967 | (const_int 6) (const_int 7)])))] | |
968 | "") | |
0cf68694 | 969 | |
c477a667 MM |
970 | ;; Little endian word swapping for 128-bit types that are either scalars or the |
971 | ;; special V1TI container class, which it is not appropriate to use vec_select | |
972 | ;; for the type. | |
973 | (define_insn "*vsx_le_permute_<mode>" | |
f1701864 CL |
974 | [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q") |
975 | (rotate:VEC_TI | |
976 | (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r") | |
c477a667 | 977 | (const_int 64)))] |
32928931 | 978 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
c477a667 MM |
979 | "@ |
980 | xxpermdi %x0,%x1,%x1,2 | |
981 | lxvd2x %x0,%y1 | |
d00fdf85 PB |
982 | stxvd2x %x1,%y0 |
983 | mr %0,%L1\;mr %L0,%1 | |
984 | ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1 | |
985 | std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0" | |
911c8df0 | 986 | [(set_attr "length" "*,*,*,8,8,8") |
d00fdf85 | 987 | (set_attr "type" "vecperm,vecload,vecstore,*,load,store")]) |
c477a667 MM |
988 | |
989 | (define_insn_and_split "*vsx_le_undo_permute_<mode>" | |
f1701864 CL |
990 | [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa") |
991 | (rotate:VEC_TI | |
992 | (rotate:VEC_TI | |
993 | (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa") | |
c477a667 MM |
994 | (const_int 64)) |
995 | (const_int 64)))] | |
996 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
997 | "@ | |
998 | # | |
999 | xxlor %x0,%x1" | |
0ec7641e | 1000 | "&& 1" |
c477a667 MM |
1001 | [(set (match_dup 0) (match_dup 1))] |
1002 | { | |
1003 | if (reload_completed && REGNO (operands[0]) == REGNO (operands[1])) | |
1004 | { | |
1005 | emit_note (NOTE_INSN_DELETED); | |
1006 | DONE; | |
1007 | } | |
1008 | } | |
1009 | [(set_attr "length" "0,4") | |
7c788ce2 | 1010 | (set_attr "type" "veclogical")]) |
c477a667 MM |
1011 | |
1012 | (define_insn_and_split "*vsx_le_perm_load_<mode>" | |
cb152d12 | 1013 | [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r") |
d00fdf85 | 1014 | (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))] |
cb25dea3 PB |
1015 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
1016 | && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" | |
d00fdf85 PB |
1017 | "@ |
1018 | # | |
1019 | #" | |
cb25dea3 PB |
1020 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
1021 | && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" | |
02d3ba0e | 1022 | [(const_int 0)] |
c477a667 | 1023 | { |
02d3ba0e RS |
1024 | rtx tmp = (can_create_pseudo_p () |
1025 | ? gen_reg_rtx_and_attrs (operands[0]) | |
1026 | : operands[0]); | |
1027 | rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); | |
1028 | rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); | |
1029 | DONE; | |
c477a667 | 1030 | } |
d00fdf85 | 1031 | [(set_attr "type" "vecload,load") |
cb152d12 SB |
1032 | (set_attr "length" "8,8") |
1033 | (set_attr "isa" "<VSisa>,*")]) | |
c477a667 MM |
1034 | |
1035 | (define_insn "*vsx_le_perm_store_<mode>" | |
d00fdf85 | 1036 | [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q") |
cb152d12 | 1037 | (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))] |
cb25dea3 | 1038 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
3a7794b4 | 1039 | && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" |
d00fdf85 PB |
1040 | "@ |
1041 | # | |
1042 | #" | |
1043 | [(set_attr "type" "vecstore,store") | |
cb152d12 SB |
1044 | (set_attr "length" "12,8") |
1045 | (set_attr "isa" "<VSisa>,*")]) | |
c477a667 MM |
1046 | |
1047 | (define_split | |
ad18eed2 SB |
1048 | [(set (match_operand:VSX_LE_128 0 "memory_operand") |
1049 | (match_operand:VSX_LE_128 1 "vsx_register_operand"))] | |
cb25dea3 PB |
1050 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR |
1051 | && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" | |
02d3ba0e | 1052 | [(const_int 0)] |
c477a667 | 1053 | { |
02d3ba0e RS |
1054 | rtx tmp = (can_create_pseudo_p () |
1055 | ? gen_reg_rtx_and_attrs (operands[0]) | |
1056 | : operands[0]); | |
1057 | rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); | |
1058 | rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); | |
1059 | DONE; | |
c477a667 MM |
1060 | }) |
1061 | ||
d00fdf85 PB |
1062 | ;; Peepholes to catch loads and stores for TImode if TImode landed in |
1063 | ;; GPR registers on a little endian system. | |
1064 | (define_peephole2 | |
f1701864 CL |
1065 | [(set (match_operand:VEC_TI 0 "int_reg_operand") |
1066 | (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand") | |
d00fdf85 | 1067 | (const_int 64))) |
f1701864 CL |
1068 | (set (match_operand:VEC_TI 2 "int_reg_operand") |
1069 | (rotate:VEC_TI (match_dup 0) | |
d00fdf85 | 1070 | (const_int 64)))] |
4a89b7e7 | 1071 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
d00fdf85 PB |
1072 | && (rtx_equal_p (operands[0], operands[2]) |
1073 | || peep2_reg_dead_p (2, operands[0]))" | |
1074 | [(set (match_dup 2) (match_dup 1))]) | |
1075 | ||
1076 | (define_peephole2 | |
f1701864 CL |
1077 | [(set (match_operand:VEC_TI 0 "int_reg_operand") |
1078 | (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand") | |
d00fdf85 | 1079 | (const_int 64))) |
f1701864 CL |
1080 | (set (match_operand:VEC_TI 2 "memory_operand") |
1081 | (rotate:VEC_TI (match_dup 0) | |
d00fdf85 | 1082 | (const_int 64)))] |
4a89b7e7 | 1083 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
d00fdf85 PB |
1084 | && peep2_reg_dead_p (2, operands[0])" |
1085 | [(set (match_dup 2) (match_dup 1))]) | |
1086 | ||
9393bc31 MM |
1087 | ;; Peephole to catch memory to memory transfers for TImode if TImode landed in |
1088 | ;; VSX registers on a little endian system. The vector types and IEEE 128-bit | |
1089 | ;; floating point are handled by the more generic swap elimination pass. | |
1090 | (define_peephole2 | |
ad18eed2 SB |
1091 | [(set (match_operand:TI 0 "vsx_register_operand") |
1092 | (rotate:TI (match_operand:TI 1 "vsx_register_operand") | |
9393bc31 | 1093 | (const_int 64))) |
ad18eed2 | 1094 | (set (match_operand:TI 2 "vsx_register_operand") |
9393bc31 MM |
1095 | (rotate:TI (match_dup 0) |
1096 | (const_int 64)))] | |
4a89b7e7 | 1097 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
9393bc31 MM |
1098 | && (rtx_equal_p (operands[0], operands[2]) |
1099 | || peep2_reg_dead_p (2, operands[0]))" | |
1100 | [(set (match_dup 2) (match_dup 1))]) | |
1101 | ||
c477a667 MM |
1102 | ;; The post-reload split requires that we re-permute the source |
1103 | ;; register in case it is still live. | |
1104 | (define_split | |
ad18eed2 SB |
1105 | [(set (match_operand:VSX_LE_128 0 "memory_operand") |
1106 | (match_operand:VSX_LE_128 1 "vsx_register_operand"))] | |
cb25dea3 PB |
1107 | "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR |
1108 | && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" | |
02d3ba0e RS |
1109 | [(const_int 0)] |
1110 | { | |
1111 | rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); | |
1112 | rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode); | |
1113 | rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); | |
1114 | DONE; | |
1115 | }) | |
0cf68694 | 1116 | |
50c78b9a MM |
1117 | ;; Vector constants that can be generated with XXSPLTIB that was added in ISA |
1118 | ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. | |
1119 | (define_insn "xxspltib_v16qi" | |
1120 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
1121 | (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))] | |
1122 | "TARGET_P9_VECTOR" | |
29e6733c | 1123 | { |
50c78b9a MM |
1124 | operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff); |
1125 | return "xxspltib %x0,%2"; | |
29e6733c | 1126 | } |
50c78b9a MM |
1127 | [(set_attr "type" "vecperm")]) |
1128 | ||
1129 | (define_insn "xxspltib_<mode>_nosplit" | |
58f2fb5c MM |
1130 | [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") |
1131 | (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] | |
50c78b9a | 1132 | "TARGET_P9_VECTOR" |
29e6733c | 1133 | { |
50c78b9a MM |
1134 | rtx op1 = operands[1]; |
1135 | int value = 256; | |
1136 | int num_insns = -1; | |
1137 | ||
1138 | if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) | |
1139 | || num_insns != 1) | |
1140 | gcc_unreachable (); | |
1141 | ||
1142 | operands[2] = GEN_INT (value & 0xff); | |
1143 | return "xxspltib %x0,%2"; | |
c6d5ff83 | 1144 | } |
50c78b9a MM |
1145 | [(set_attr "type" "vecperm")]) |
1146 | ||
1147 | (define_insn_and_split "*xxspltib_<mode>_split" | |
1148 | [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v") | |
1149 | (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))] | |
1150 | "TARGET_P9_VECTOR" | |
1151 | "#" | |
1152 | "&& 1" | |
1153 | [(const_int 0)] | |
c6d5ff83 | 1154 | { |
50c78b9a MM |
1155 | int value = 256; |
1156 | int num_insns = -1; | |
1157 | rtx op0 = operands[0]; | |
1158 | rtx op1 = operands[1]; | |
1159 | rtx tmp = ((can_create_pseudo_p ()) | |
1160 | ? gen_reg_rtx (V16QImode) | |
1161 | : gen_lowpart (V16QImode, op0)); | |
c6d5ff83 | 1162 | |
50c78b9a MM |
1163 | if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) |
1164 | || num_insns != 2) | |
1165 | gcc_unreachable (); | |
c6d5ff83 | 1166 | |
50c78b9a | 1167 | emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value))); |
c6d5ff83 | 1168 | |
50c78b9a MM |
1169 | if (<MODE>mode == V2DImode) |
1170 | emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp)); | |
c6d5ff83 | 1171 | |
50c78b9a MM |
1172 | else if (<MODE>mode == V4SImode) |
1173 | emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp)); | |
1174 | ||
1175 | else if (<MODE>mode == V8HImode) | |
1176 | emit_insn (gen_altivec_vupkhsb (op0, tmp)); | |
1177 | ||
1178 | else | |
1179 | gcc_unreachable (); | |
29e6733c | 1180 | |
50c78b9a MM |
1181 | DONE; |
1182 | } | |
1183 | [(set_attr "type" "vecperm") | |
1184 | (set_attr "length" "8")]) | |
29e6733c | 1185 | |
29e6733c | 1186 | |
50c78b9a MM |
1187 | ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB |
1188 | ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or | |
1189 | ;; all 1's, since the machine does not have to wait for the previous | |
1190 | ;; instruction using the register being set (such as a store waiting on a slow | |
1191 | ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. | |
c6d5ff83 | 1192 | |
50c78b9a | 1193 | ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) |
05a7a104 | 1194 | ;; XXSPLTIDP LXVKQ |
50c78b9a | 1195 | ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW |
00fd0628 | 1196 | ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) |
f7e94dfb | 1197 | (define_insn "vsx_mov<mode>_64bit" |
50c78b9a | 1198 | [(set (match_operand:VSX_M 0 "nonimmediate_operand" |
cb152d12 | 1199 | "=ZwO, wa, wa, r, we, ?wQ, |
05a7a104 | 1200 | wa, wa, |
afc69d4e | 1201 | ?&r, ??r, ??Y, <??r>, wa, v, |
cb152d12 | 1202 | ?wa, v, <??r>, wZ, v") |
c6d5ff83 | 1203 | |
50c78b9a | 1204 | (match_operand:VSX_M 1 "input_operand" |
cb152d12 | 1205 | "wa, ZwO, wa, we, r, r, |
05a7a104 | 1206 | eF, eQ, |
50c78b9a | 1207 | wQ, Y, r, r, wE, jwM, |
00fd0628 | 1208 | ?jwM, W, <nW>, v, wZ"))] |
c6d5ff83 | 1209 | |
50c78b9a MM |
1210 | "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) |
1211 | && (register_operand (operands[0], <MODE>mode) | |
1212 | || register_operand (operands[1], <MODE>mode))" | |
1213 | { | |
1214 | return rs6000_output_move_128bit (operands); | |
1215 | } | |
1216 | [(set_attr "type" | |
863e8d53 | 1217 | "vecstore, vecload, vecsimple, mtvsr, mfvsr, load, |
05a7a104 | 1218 | vecperm, vecperm, |
50c78b9a | 1219 | store, load, store, *, vecsimple, vecsimple, |
00fd0628 | 1220 | vecsimple, *, *, vecstore, vecload") |
ca06b86c MM |
1221 | (set_attr "num_insns" |
1222 | "*, *, *, 2, *, 2, | |
05a7a104 | 1223 | *, *, |
ca06b86c MM |
1224 | 2, 2, 2, 2, *, *, |
1225 | *, 5, 2, *, *") | |
1226 | (set_attr "max_prefixed_insns" | |
1227 | "*, *, *, *, *, 2, | |
05a7a104 | 1228 | *, *, |
ca06b86c MM |
1229 | 2, 2, 2, 2, *, *, |
1230 | *, *, *, *, *") | |
50c78b9a | 1231 | (set_attr "length" |
911c8df0 | 1232 | "*, *, *, 8, *, 8, |
05a7a104 | 1233 | *, *, |
911c8df0 MM |
1234 | 8, 8, 8, 8, *, *, |
1235 | *, 20, 8, *, *") | |
afc69d4e | 1236 | (set_attr "isa" |
cb152d12 | 1237 | "<VSisa>, <VSisa>, <VSisa>, *, *, *, |
05a7a104 | 1238 | p10, p10, |
afc69d4e | 1239 | *, *, *, *, p9v, *, |
cb152d12 | 1240 | <VSisa>, *, *, *, *")]) |
50c78b9a MM |
1241 | |
1242 | ;; VSX store VSX load VSX move GPR load GPR store GPR move | |
05a7a104 | 1243 | ;; XXSPLTIDP LXVKQ |
00fd0628 | 1244 | ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const |
50c78b9a MM |
1245 | ;; LVX (VMX) STVX (VMX) |
1246 | (define_insn "*vsx_mov<mode>_32bit" | |
1247 | [(set (match_operand:VSX_M 0 "nonimmediate_operand" | |
cb152d12 | 1248 | "=ZwO, wa, wa, ??r, ??Y, <??r>, |
05a7a104 | 1249 | wa, wa, |
cb152d12 | 1250 | wa, v, ?wa, v, <??r>, |
50c78b9a MM |
1251 | wZ, v") |
1252 | ||
1253 | (match_operand:VSX_M 1 "input_operand" | |
cb152d12 | 1254 | "wa, ZwO, wa, Y, r, r, |
05a7a104 | 1255 | eF, eQ, |
00fd0628 | 1256 | wE, jwM, ?jwM, W, <nW>, |
50c78b9a MM |
1257 | v, wZ"))] |
1258 | ||
1259 | "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) | |
1260 | && (register_operand (operands[0], <MODE>mode) | |
1261 | || register_operand (operands[1], <MODE>mode))" | |
1262 | { | |
1263 | return rs6000_output_move_128bit (operands); | |
29e6733c | 1264 | } |
50c78b9a MM |
1265 | [(set_attr "type" |
1266 | "vecstore, vecload, vecsimple, load, store, *, | |
05a7a104 | 1267 | vecperm, vecperm, |
00fd0628 | 1268 | vecsimple, vecsimple, vecsimple, *, *, |
50c78b9a | 1269 | vecstore, vecload") |
50c78b9a | 1270 | (set_attr "length" |
911c8df0 | 1271 | "*, *, *, 16, 16, 16, |
05a7a104 | 1272 | *, *, |
911c8df0 MM |
1273 | *, *, *, 20, 16, |
1274 | *, *") | |
afc69d4e | 1275 | (set_attr "isa" |
cb152d12 | 1276 | "<VSisa>, <VSisa>, <VSisa>, *, *, *, |
05a7a104 | 1277 | p10, p10, |
cb152d12 | 1278 | p9v, *, <VSisa>, *, *, |
afc69d4e | 1279 | *, *")]) |
29e6733c | 1280 | |
c9485473 MM |
1281 | ;; Explicit load/store expanders for the builtin functions |
1282 | (define_expand "vsx_load_<mode>" | |
ad18eed2 SB |
1283 | [(set (match_operand:VSX_M 0 "vsx_register_operand") |
1284 | (match_operand:VSX_M 1 "memory_operand"))] | |
c9485473 | 1285 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
06f9caed BS |
1286 | { |
1287 | /* Expand to swaps if needed, prior to swap optimization. */ | |
cb25dea3 PB |
1288 | if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR |
1289 | && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode)) | |
06f9caed BS |
1290 | { |
1291 | rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); | |
1292 | DONE; | |
1293 | } | |
1294 | }) | |
c9485473 MM |
1295 | |
1296 | (define_expand "vsx_store_<mode>" | |
ad18eed2 SB |
1297 | [(set (match_operand:VSX_M 0 "memory_operand") |
1298 | (match_operand:VSX_M 1 "vsx_register_operand"))] | |
c9485473 | 1299 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
06f9caed BS |
1300 | { |
1301 | /* Expand to swaps if needed, prior to swap optimization. */ | |
cb25dea3 PB |
1302 | if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR |
1303 | && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode)) | |
06f9caed BS |
1304 | { |
1305 | rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); | |
1306 | DONE; | |
1307 | } | |
1308 | }) | |
c9485473 | 1309 | |
b69c0061 WS |
1310 | ;; Load rightmost element from load_data |
1311 | ;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx. | |
1312 | (define_insn "vsx_lxvr<wd>x" | |
1313 | [(set (match_operand:TI 0 "vsx_register_operand" "=wa") | |
1314 | (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))] | |
1315 | "TARGET_POWER10" | |
1316 | "lxvr<wd>x %x0,%y1" | |
1317 | [(set_attr "type" "vecload")]) | |
1318 | ||
1319 | ;; Store rightmost element into store_data | |
1320 | ;; using stxvrbx, stxvrhx, strvxwx, strvxdx. | |
1321 | (define_insn "vsx_stxvr<wd>x" | |
1322 | [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z") | |
1323 | (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))] | |
1324 | "TARGET_POWER10" | |
1325 | "stxvr<wd>x %x1,%y0" | |
1326 | [(set_attr "type" "vecstore")]) | |
1327 | ||
8fa97501 BS |
1328 | ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc., |
1329 | ;; when you really want their element-reversing behavior. | |
1330 | (define_insn "vsx_ld_elemrev_v2di" | |
1331 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
1332 | (vec_select:V2DI | |
1333 | (match_operand:V2DI 1 "memory_operand" "Z") | |
1334 | (parallel [(const_int 1) (const_int 0)])))] | |
1335 | "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" | |
1336 | "lxvd2x %x0,%y1" | |
1337 | [(set_attr "type" "vecload")]) | |
1338 | ||
d10cff95 CL |
1339 | (define_insn "vsx_ld_elemrev_v1ti" |
1340 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") | |
1341 | (vec_select:V1TI | |
1342 | (match_operand:V1TI 1 "memory_operand" "Z") | |
1343 | (parallel [(const_int 0)])))] | |
1344 | "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN" | |
1345 | { | |
1346 | return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2"; | |
1347 | } | |
1348 | [(set_attr "type" "vecload")]) | |
1349 | ||
8fa97501 BS |
1350 | (define_insn "vsx_ld_elemrev_v2df" |
1351 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") | |
1352 | (vec_select:V2DF | |
1353 | (match_operand:V2DF 1 "memory_operand" "Z") | |
1354 | (parallel [(const_int 1) (const_int 0)])))] | |
1355 | "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" | |
1356 | "lxvd2x %x0,%y1" | |
1357 | [(set_attr "type" "vecload")]) | |
1358 | ||
1359 | (define_insn "vsx_ld_elemrev_v4si" | |
1360 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") | |
1361 | (vec_select:V4SI | |
1362 | (match_operand:V4SI 1 "memory_operand" "Z") | |
1363 | (parallel [(const_int 3) (const_int 2) | |
1364 | (const_int 1) (const_int 0)])))] | |
1365 | "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" | |
1366 | "lxvw4x %x0,%y1" | |
1367 | [(set_attr "type" "vecload")]) | |
1368 | ||
1369 | (define_insn "vsx_ld_elemrev_v4sf" | |
1370 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
1371 | (vec_select:V4SF | |
1372 | (match_operand:V4SF 1 "memory_operand" "Z") | |
1373 | (parallel [(const_int 3) (const_int 2) | |
1374 | (const_int 1) (const_int 0)])))] | |
1375 | "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" | |
1376 | "lxvw4x %x0,%y1" | |
1377 | [(set_attr "type" "vecload")]) | |
1378 | ||
3ef9e1ec | 1379 | (define_expand "vsx_ld_elemrev_v8hi" |
8fa97501 BS |
1380 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") |
1381 | (vec_select:V8HI | |
1382 | (match_operand:V8HI 1 "memory_operand" "Z") | |
1383 | (parallel [(const_int 7) (const_int 6) | |
1384 | (const_int 5) (const_int 4) | |
1385 | (const_int 3) (const_int 2) | |
1386 | (const_int 1) (const_int 0)])))] | |
3ef9e1ec BS |
1387 | "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" |
1388 | { | |
1389 | if (!TARGET_P9_VECTOR) | |
1390 | { | |
1391 | rtx tmp = gen_reg_rtx (V4SImode); | |
1392 | rtx subreg, subreg2, perm[16], pcv; | |
1393 | /* 2 is leftmost element in register */ | |
1394 | unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; | |
1395 | int i; | |
1396 | ||
1397 | subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); | |
1398 | emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); | |
1399 | subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); | |
1400 | ||
1401 | for (i = 0; i < 16; ++i) | |
1402 | perm[i] = GEN_INT (reorder[i]); | |
1403 | ||
1404 | pcv = force_reg (V16QImode, | |
1405 | gen_rtx_CONST_VECTOR (V16QImode, | |
1406 | gen_rtvec_v (16, perm))); | |
1407 | emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, | |
1408 | subreg2, pcv)); | |
1409 | DONE; | |
1410 | } | |
1411 | }) | |
1412 | ||
1413 | (define_insn "*vsx_ld_elemrev_v8hi_internal" | |
1414 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
1415 | (vec_select:V8HI | |
1416 | (match_operand:V8HI 1 "memory_operand" "Z") | |
1417 | (parallel [(const_int 7) (const_int 6) | |
1418 | (const_int 5) (const_int 4) | |
1419 | (const_int 3) (const_int 2) | |
1420 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1421 | "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" |
1422 | "lxvh8x %x0,%y1" | |
1423 | [(set_attr "type" "vecload")]) | |
1424 | ||
3ef9e1ec | 1425 | (define_expand "vsx_ld_elemrev_v16qi" |
8fa97501 BS |
1426 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") |
1427 | (vec_select:V16QI | |
3ef9e1ec BS |
1428 | (match_operand:V16QI 1 "memory_operand" "Z") |
1429 | (parallel [(const_int 15) (const_int 14) | |
1430 | (const_int 13) (const_int 12) | |
1431 | (const_int 11) (const_int 10) | |
1432 | (const_int 9) (const_int 8) | |
1433 | (const_int 7) (const_int 6) | |
1434 | (const_int 5) (const_int 4) | |
1435 | (const_int 3) (const_int 2) | |
1436 | (const_int 1) (const_int 0)])))] | |
1437 | "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" | |
1438 | { | |
1439 | if (!TARGET_P9_VECTOR) | |
1440 | { | |
1441 | rtx tmp = gen_reg_rtx (V4SImode); | |
1442 | rtx subreg, subreg2, perm[16], pcv; | |
1443 | /* 3 is leftmost element in register */ | |
1444 | unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; | |
1445 | int i; | |
1446 | ||
1447 | subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0); | |
1448 | emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); | |
1449 | subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0); | |
1450 | ||
1451 | for (i = 0; i < 16; ++i) | |
1452 | perm[i] = GEN_INT (reorder[i]); | |
1453 | ||
1454 | pcv = force_reg (V16QImode, | |
1455 | gen_rtx_CONST_VECTOR (V16QImode, | |
1456 | gen_rtvec_v (16, perm))); | |
1457 | emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2, | |
1458 | subreg2, pcv)); | |
1459 | DONE; | |
1460 | } | |
1461 | }) | |
1462 | ||
9d36bd3b | 1463 | (define_insn "vsx_ld_elemrev_v16qi_internal" |
3ef9e1ec BS |
1464 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") |
1465 | (vec_select:V16QI | |
1466 | (match_operand:V16QI 1 "memory_operand" "Z") | |
1467 | (parallel [(const_int 15) (const_int 14) | |
1468 | (const_int 13) (const_int 12) | |
1469 | (const_int 11) (const_int 10) | |
1470 | (const_int 9) (const_int 8) | |
1471 | (const_int 7) (const_int 6) | |
1472 | (const_int 5) (const_int 4) | |
1473 | (const_int 3) (const_int 2) | |
1474 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1475 | "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" |
1476 | "lxvb16x %x0,%y1" | |
1477 | [(set_attr "type" "vecload")]) | |
1478 | ||
d10cff95 CL |
1479 | (define_insn "vsx_st_elemrev_v1ti" |
1480 | [(set (match_operand:V1TI 0 "memory_operand" "=Z") | |
1481 | (vec_select:V1TI | |
1482 | (match_operand:V1TI 1 "vsx_register_operand" "+wa") | |
1483 | (parallel [(const_int 0)]))) | |
1484 | (clobber (match_dup 1))] | |
1485 | "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" | |
1486 | { | |
1487 | return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0"; | |
1488 | } | |
1489 | [(set_attr "type" "vecstore")]) | |
1490 | ||
8fa97501 BS |
1491 | (define_insn "vsx_st_elemrev_v2df" |
1492 | [(set (match_operand:V2DF 0 "memory_operand" "=Z") | |
1493 | (vec_select:V2DF | |
3ef9e1ec BS |
1494 | (match_operand:V2DF 1 "vsx_register_operand" "wa") |
1495 | (parallel [(const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1496 | "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" |
1497 | "stxvd2x %x1,%y0" | |
1498 | [(set_attr "type" "vecstore")]) | |
1499 | ||
1500 | (define_insn "vsx_st_elemrev_v2di" | |
1501 | [(set (match_operand:V2DI 0 "memory_operand" "=Z") | |
1502 | (vec_select:V2DI | |
3ef9e1ec BS |
1503 | (match_operand:V2DI 1 "vsx_register_operand" "wa") |
1504 | (parallel [(const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1505 | "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" |
1506 | "stxvd2x %x1,%y0" | |
1507 | [(set_attr "type" "vecstore")]) | |
1508 | ||
1509 | (define_insn "vsx_st_elemrev_v4sf" | |
1510 | [(set (match_operand:V4SF 0 "memory_operand" "=Z") | |
1511 | (vec_select:V4SF | |
3ef9e1ec BS |
1512 | (match_operand:V4SF 1 "vsx_register_operand" "wa") |
1513 | (parallel [(const_int 3) (const_int 2) | |
1514 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1515 | "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" |
1516 | "stxvw4x %x1,%y0" | |
1517 | [(set_attr "type" "vecstore")]) | |
1518 | ||
1519 | (define_insn "vsx_st_elemrev_v4si" | |
1520 | [(set (match_operand:V4SI 0 "memory_operand" "=Z") | |
1521 | (vec_select:V4SI | |
1522 | (match_operand:V4SI 1 "vsx_register_operand" "wa") | |
1523 | (parallel [(const_int 3) (const_int 2) | |
1524 | (const_int 1) (const_int 0)])))] | |
1525 | "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" | |
1526 | "stxvw4x %x1,%y0" | |
1527 | [(set_attr "type" "vecstore")]) | |
1528 | ||
3ef9e1ec | 1529 | (define_expand "vsx_st_elemrev_v8hi" |
8fa97501 BS |
1530 | [(set (match_operand:V8HI 0 "memory_operand" "=Z") |
1531 | (vec_select:V8HI | |
3ef9e1ec BS |
1532 | (match_operand:V8HI 1 "vsx_register_operand" "wa") |
1533 | (parallel [(const_int 7) (const_int 6) | |
1534 | (const_int 5) (const_int 4) | |
1535 | (const_int 3) (const_int 2) | |
1536 | (const_int 1) (const_int 0)])))] | |
1537 | "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" | |
1538 | { | |
1539 | if (!TARGET_P9_VECTOR) | |
1540 | { | |
d10cff95 | 1541 | rtx mem_subreg, subreg, perm[16], pcv; |
3ef9e1ec BS |
1542 | rtx tmp = gen_reg_rtx (V8HImode); |
1543 | /* 2 is leftmost element in register */ | |
1544 | unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; | |
1545 | int i; | |
1546 | ||
1547 | for (i = 0; i < 16; ++i) | |
1548 | perm[i] = GEN_INT (reorder[i]); | |
1549 | ||
1550 | pcv = force_reg (V16QImode, | |
1551 | gen_rtx_CONST_VECTOR (V16QImode, | |
1552 | gen_rtvec_v (16, perm))); | |
1553 | emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], | |
1554 | operands[1], pcv)); | |
1555 | subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); | |
d10cff95 CL |
1556 | mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); |
1557 | emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); | |
3ef9e1ec BS |
1558 | DONE; |
1559 | } | |
1560 | }) | |
1561 | ||
d10cff95 CL |
1562 | (define_insn "*vsx_st_elemrev_v2di_internal" |
1563 | [(set (match_operand:V2DI 0 "memory_operand" "=Z") | |
1564 | (vec_select:V2DI | |
1565 | (match_operand:V2DI 1 "vsx_register_operand" "wa") | |
1566 | (parallel [(const_int 1) (const_int 0)])))] | |
1567 | "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" | |
1568 | "stxvd2x %x1,%y0" | |
1569 | [(set_attr "type" "vecstore")]) | |
1570 | ||
3ef9e1ec BS |
1571 | (define_insn "*vsx_st_elemrev_v8hi_internal" |
1572 | [(set (match_operand:V8HI 0 "memory_operand" "=Z") | |
1573 | (vec_select:V8HI | |
1574 | (match_operand:V8HI 1 "vsx_register_operand" "wa") | |
1575 | (parallel [(const_int 7) (const_int 6) | |
1576 | (const_int 5) (const_int 4) | |
1577 | (const_int 3) (const_int 2) | |
1578 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1579 | "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" |
1580 | "stxvh8x %x1,%y0" | |
1581 | [(set_attr "type" "vecstore")]) | |
1582 | ||
3ef9e1ec BS |
1583 | (define_expand "vsx_st_elemrev_v16qi" |
1584 | [(set (match_operand:V16QI 0 "memory_operand" "=Z") | |
1585 | (vec_select:V16QI | |
1586 | (match_operand:V16QI 1 "vsx_register_operand" "wa") | |
1587 | (parallel [(const_int 15) (const_int 14) | |
1588 | (const_int 13) (const_int 12) | |
1589 | (const_int 11) (const_int 10) | |
1590 | (const_int 9) (const_int 8) | |
1591 | (const_int 7) (const_int 6) | |
1592 | (const_int 5) (const_int 4) | |
1593 | (const_int 3) (const_int 2) | |
1594 | (const_int 1) (const_int 0)])))] | |
1595 | "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" | |
1596 | { | |
1597 | if (!TARGET_P9_VECTOR) | |
1598 | { | |
d10cff95 | 1599 | rtx mem_subreg, subreg, perm[16], pcv; |
3ef9e1ec BS |
1600 | rtx tmp = gen_reg_rtx (V16QImode); |
1601 | /* 3 is leftmost element in register */ | |
1602 | unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; | |
1603 | int i; | |
1604 | ||
1605 | for (i = 0; i < 16; ++i) | |
1606 | perm[i] = GEN_INT (reorder[i]); | |
1607 | ||
1608 | pcv = force_reg (V16QImode, | |
1609 | gen_rtx_CONST_VECTOR (V16QImode, | |
1610 | gen_rtvec_v (16, perm))); | |
1611 | emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1], | |
1612 | operands[1], pcv)); | |
1613 | subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0); | |
d10cff95 CL |
1614 | mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0); |
1615 | emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); | |
3ef9e1ec BS |
1616 | DONE; |
1617 | } | |
1618 | }) | |
1619 | ||
1620 | (define_insn "*vsx_st_elemrev_v16qi_internal" | |
8fa97501 BS |
1621 | [(set (match_operand:V16QI 0 "memory_operand" "=Z") |
1622 | (vec_select:V16QI | |
3ef9e1ec BS |
1623 | (match_operand:V16QI 1 "vsx_register_operand" "wa") |
1624 | (parallel [(const_int 15) (const_int 14) | |
1625 | (const_int 13) (const_int 12) | |
1626 | (const_int 11) (const_int 10) | |
1627 | (const_int 9) (const_int 8) | |
1628 | (const_int 7) (const_int 6) | |
1629 | (const_int 5) (const_int 4) | |
1630 | (const_int 3) (const_int 2) | |
1631 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1632 | "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" |
1633 | "stxvb16x %x1,%y0" | |
1634 | [(set_attr "type" "vecstore")]) | |
1635 | ||
29e6733c | 1636 | \f |
0609bdf2 MM |
1637 | ;; VSX vector floating point arithmetic instructions. The VSX scalar |
1638 | ;; instructions are now combined with the insn for the traditional floating | |
1639 | ;; point unit. | |
29e6733c | 1640 | (define_insn "*vsx_add<mode>3" |
012f609e SB |
1641 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1642 | (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1643 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1644 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1645 | "xvadd<sd>p %x0,%x1,%x2" |
2c2aa74d | 1646 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1647 | |
1648 | (define_insn "*vsx_sub<mode>3" | |
012f609e SB |
1649 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>") |
1650 | (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1651 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1652 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1653 | "xvsub<sd>p %x0,%x1,%x2" |
2c2aa74d | 1654 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1655 | |
1656 | (define_insn "*vsx_mul<mode>3" | |
012f609e SB |
1657 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1658 | (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1659 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1660 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1661 | "xvmul<sd>p %x0,%x1,%x2" |
2c2aa74d | 1662 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 1663 | |
2ccdda19 BS |
1664 | ; Emulate vector with scalar for vec_mul in V2DImode |
1665 | (define_insn_and_split "vsx_mul_v2di" | |
1666 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
1667 | (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") | |
1668 | (match_operand:V2DI 2 "vsx_register_operand" "wa")] | |
1669 | UNSPEC_VSX_MULSD))] | |
1670 | "VECTOR_MEM_VSX_P (V2DImode)" | |
1671 | "#" | |
3cb8ee5c | 1672 | "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" |
2ccdda19 | 1673 | [(const_int 0)] |
2ccdda19 BS |
1674 | { |
1675 | rtx op0 = operands[0]; | |
1676 | rtx op1 = operands[1]; | |
1677 | rtx op2 = operands[2]; | |
f1ad419e CL |
1678 | |
1679 | if (TARGET_POWER10) | |
1680 | emit_insn (gen_mulv2di3 (op0, op1, op2) ); | |
1681 | ||
da86c81e PB |
1682 | else |
1683 | { | |
f1ad419e CL |
1684 | rtx op3 = gen_reg_rtx (DImode); |
1685 | rtx op4 = gen_reg_rtx (DImode); | |
1686 | rtx op5 = gen_reg_rtx (DImode); | |
1687 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); | |
1688 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); | |
1689 | if (TARGET_POWERPC64) | |
1690 | emit_insn (gen_muldi3 (op5, op3, op4)); | |
1691 | else | |
1692 | { | |
1693 | rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); | |
1694 | emit_move_insn (op5, ret); | |
1695 | } | |
1696 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); | |
1697 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); | |
1698 | if (TARGET_POWERPC64) | |
1699 | emit_insn (gen_muldi3 (op3, op3, op4)); | |
1700 | else | |
1701 | { | |
1702 | rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); | |
1703 | emit_move_insn (op3, ret); | |
1704 | } | |
1705 | emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); | |
da86c81e | 1706 | } |
d5e6e133 | 1707 | DONE; |
6c332313 | 1708 | } |
2ccdda19 BS |
1709 | [(set_attr "type" "mul")]) |
1710 | ||
29e6733c | 1711 | (define_insn "*vsx_div<mode>3" |
012f609e SB |
1712 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1713 | (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1714 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1715 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1716 | "xvdiv<sd>p %x0,%x1,%x2" |
2c2aa74d | 1717 | [(set_attr "type" "<VStype_div>")]) |
29e6733c | 1718 | |
2ccdda19 BS |
1719 | ; Emulate vector with scalar for vec_div in V2DImode |
1720 | (define_insn_and_split "vsx_div_v2di" | |
1721 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
1722 | (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") | |
1723 | (match_operand:V2DI 2 "vsx_register_operand" "wa")] | |
1724 | UNSPEC_VSX_DIVSD))] | |
1725 | "VECTOR_MEM_VSX_P (V2DImode)" | |
1726 | "#" | |
3cb8ee5c | 1727 | "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" |
2ccdda19 | 1728 | [(const_int 0)] |
2ccdda19 BS |
1729 | { |
1730 | rtx op0 = operands[0]; | |
1731 | rtx op1 = operands[1]; | |
1732 | rtx op2 = operands[2]; | |
1733 | rtx op3 = gen_reg_rtx (DImode); | |
1734 | rtx op4 = gen_reg_rtx (DImode); | |
1735 | rtx op5 = gen_reg_rtx (DImode); | |
1736 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); | |
1737 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); | |
da86c81e PB |
1738 | if (TARGET_POWERPC64) |
1739 | emit_insn (gen_divdi3 (op5, op3, op4)); | |
1740 | else | |
1741 | { | |
1742 | rtx libfunc = optab_libfunc (sdiv_optab, DImode); | |
1743 | rtx target = emit_library_call_value (libfunc, | |
1744 | op5, LCT_NORMAL, DImode, | |
1745 | op3, DImode, | |
1746 | op4, DImode); | |
1747 | emit_move_insn (op5, target); | |
1748 | } | |
2ccdda19 BS |
1749 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); |
1750 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); | |
da86c81e PB |
1751 | if (TARGET_POWERPC64) |
1752 | emit_insn (gen_divdi3 (op3, op3, op4)); | |
1753 | else | |
1754 | { | |
1755 | rtx libfunc = optab_libfunc (sdiv_optab, DImode); | |
1756 | rtx target = emit_library_call_value (libfunc, | |
1757 | op3, LCT_NORMAL, DImode, | |
1758 | op3, DImode, | |
1759 | op4, DImode); | |
1760 | emit_move_insn (op3, target); | |
1761 | } | |
2ccdda19 | 1762 | emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); |
d5e6e133 | 1763 | DONE; |
6c332313 | 1764 | } |
2ccdda19 BS |
1765 | [(set_attr "type" "div")]) |
1766 | ||
1767 | (define_insn_and_split "vsx_udiv_v2di" | |
1768 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
1769 | (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") | |
1770 | (match_operand:V2DI 2 "vsx_register_operand" "wa")] | |
1771 | UNSPEC_VSX_DIVUD))] | |
1772 | "VECTOR_MEM_VSX_P (V2DImode)" | |
1773 | "#" | |
3cb8ee5c | 1774 | "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" |
2ccdda19 | 1775 | [(const_int 0)] |
2ccdda19 BS |
1776 | { |
1777 | rtx op0 = operands[0]; | |
1778 | rtx op1 = operands[1]; | |
1779 | rtx op2 = operands[2]; | |
f1ad419e CL |
1780 | |
1781 | if (TARGET_POWER10) | |
1782 | emit_insn (gen_udivv2di3 (op0, op1, op2) ); | |
1783 | else | |
1784 | { | |
1785 | rtx op3 = gen_reg_rtx (DImode); | |
1786 | rtx op4 = gen_reg_rtx (DImode); | |
1787 | rtx op5 = gen_reg_rtx (DImode); | |
1788 | ||
1789 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); | |
1790 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); | |
1791 | ||
1792 | if (TARGET_POWERPC64) | |
1793 | emit_insn (gen_udivdi3 (op5, op3, op4)); | |
1794 | else | |
1795 | { | |
1796 | rtx libfunc = optab_libfunc (udiv_optab, DImode); | |
1797 | rtx target = emit_library_call_value (libfunc, | |
1798 | op5, LCT_NORMAL, DImode, | |
1799 | op3, DImode, | |
1800 | op4, DImode); | |
1801 | emit_move_insn (op5, target); | |
1802 | } | |
1803 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); | |
1804 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); | |
1805 | ||
1806 | if (TARGET_POWERPC64) | |
1807 | emit_insn (gen_udivdi3 (op3, op3, op4)); | |
1808 | else | |
1809 | { | |
1810 | rtx libfunc = optab_libfunc (udiv_optab, DImode); | |
1811 | rtx target = emit_library_call_value (libfunc, | |
1812 | op3, LCT_NORMAL, DImode, | |
1813 | op3, DImode, | |
1814 | op4, DImode); | |
1815 | emit_move_insn (op3, target); | |
1816 | } | |
1817 | emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); | |
1818 | } | |
1819 | DONE; | |
6c332313 | 1820 | } |
2ccdda19 BS |
1821 | [(set_attr "type" "div")]) |
1822 | ||
f03122f2 CL |
1823 | ;; Vector integer signed/unsigned divide |
1824 | (define_insn "vsx_div_v1ti" | |
1825 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1826 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1827 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1828 | UNSPEC_VSX_DIVSQ))] | |
1829 | "TARGET_POWER10" | |
1830 | "vdivsq %0,%1,%2" | |
1831 | [(set_attr "type" "div")]) | |
1832 | ||
1833 | (define_insn "vsx_udiv_v1ti" | |
1834 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1835 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1836 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1837 | UNSPEC_VSX_DIVUQ))] | |
1838 | "TARGET_POWER10" | |
1839 | "vdivuq %0,%1,%2" | |
1840 | [(set_attr "type" "div")]) | |
1841 | ||
1842 | (define_insn "vsx_dives_v1ti" | |
1843 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1844 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1845 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1846 | UNSPEC_VSX_DIVESQ))] | |
1847 | "TARGET_POWER10" | |
1848 | "vdivesq %0,%1,%2" | |
1849 | [(set_attr "type" "div")]) | |
1850 | ||
1851 | (define_insn "vsx_diveu_v1ti" | |
1852 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1853 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1854 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1855 | UNSPEC_VSX_DIVEUQ))] | |
1856 | "TARGET_POWER10" | |
1857 | "vdiveuq %0,%1,%2" | |
1858 | [(set_attr "type" "div")]) | |
1859 | ||
1860 | (define_insn "vsx_mods_v1ti" | |
1861 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1862 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1863 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1864 | UNSPEC_VSX_MODSQ))] | |
1865 | "TARGET_POWER10" | |
1866 | "vmodsq %0,%1,%2" | |
1867 | [(set_attr "type" "div")]) | |
1868 | ||
1869 | (define_insn "vsx_modu_v1ti" | |
1870 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1871 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1872 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1873 | UNSPEC_VSX_MODUQ))] | |
1874 | "TARGET_POWER10" | |
1875 | "vmoduq %0,%1,%2" | |
1876 | [(set_attr "type" "div")]) | |
1877 | ||
29e6733c MM |
1878 | ;; *tdiv* instruction returning the FG flag |
1879 | (define_expand "vsx_tdiv<mode>3_fg" | |
1880 | [(set (match_dup 3) | |
ad18eed2 SB |
1881 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") |
1882 | (match_operand:VSX_B 2 "vsx_register_operand")] | |
29e6733c | 1883 | UNSPEC_VSX_TDIV)) |
ad18eed2 | 1884 | (set (match_operand:SI 0 "gpc_reg_operand") |
29e6733c MM |
1885 | (gt:SI (match_dup 3) |
1886 | (const_int 0)))] | |
1887 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1888 | { | |
1889 | operands[3] = gen_reg_rtx (CCFPmode); | |
1890 | }) | |
1891 | ||
1892 | ;; *tdiv* instruction returning the FE flag | |
1893 | (define_expand "vsx_tdiv<mode>3_fe" | |
1894 | [(set (match_dup 3) | |
ad18eed2 SB |
1895 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") |
1896 | (match_operand:VSX_B 2 "vsx_register_operand")] | |
29e6733c | 1897 | UNSPEC_VSX_TDIV)) |
ad18eed2 | 1898 | (set (match_operand:SI 0 "gpc_reg_operand") |
29e6733c MM |
1899 | (eq:SI (match_dup 3) |
1900 | (const_int 0)))] | |
1901 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1902 | { | |
1903 | operands[3] = gen_reg_rtx (CCFPmode); | |
1904 | }) | |
1905 | ||
1906 | (define_insn "*vsx_tdiv<mode>3_internal" | |
012f609e SB |
1907 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=x") |
1908 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa") | |
1909 | (match_operand:VSX_B 2 "vsx_register_operand" "wa")] | |
29e6733c MM |
1910 | UNSPEC_VSX_TDIV))] |
1911 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 1912 | "x<VSv>tdiv<sd>p %0,%x1,%x2" |
2c2aa74d | 1913 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1914 | |
1915 | (define_insn "vsx_fre<mode>2" | |
012f609e SB |
1916 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1917 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
1918 | UNSPEC_FRES))] |
1919 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 1920 | "xvre<sd>p %x0,%x1" |
2c2aa74d | 1921 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1922 | |
1923 | (define_insn "*vsx_neg<mode>2" | |
012f609e SB |
1924 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1925 | (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] | |
29e6733c | 1926 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1927 | "xvneg<sd>p %x0,%x1" |
2c2aa74d | 1928 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1929 | |
1930 | (define_insn "*vsx_abs<mode>2" | |
012f609e SB |
1931 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1932 | (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] | |
29e6733c | 1933 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1934 | "xvabs<sd>p %x0,%x1" |
2c2aa74d | 1935 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1936 | |
1937 | (define_insn "vsx_nabs<mode>2" | |
012f609e | 1938 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
0609bdf2 MM |
1939 | (neg:VSX_F |
1940 | (abs:VSX_F | |
012f609e | 1941 | (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))] |
29e6733c | 1942 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1943 | "xvnabs<sd>p %x0,%x1" |
2c2aa74d | 1944 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1945 | |
1946 | (define_insn "vsx_smax<mode>3" | |
012f609e SB |
1947 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1948 | (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1949 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1950 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1951 | "xvmax<sd>p %x0,%x1,%x2" |
2c2aa74d | 1952 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1953 | |
1954 | (define_insn "*vsx_smin<mode>3" | |
012f609e SB |
1955 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1956 | (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1957 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1958 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1959 | "xvmin<sd>p %x0,%x1,%x2" |
2c2aa74d | 1960 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1961 | |
1962 | (define_insn "*vsx_sqrt<mode>2" | |
012f609e SB |
1963 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1964 | (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] | |
29e6733c | 1965 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1966 | "xvsqrt<sd>p %x0,%x1" |
1f5aa628 | 1967 | [(set_attr "type" "<sd>sqrt")]) |
29e6733c | 1968 | |
92902797 | 1969 | (define_insn "*vsx_rsqrte<mode>2" |
012f609e SB |
1970 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1971 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
92902797 | 1972 | UNSPEC_RSQRT))] |
29e6733c | 1973 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1974 | "xvrsqrte<sd>p %x0,%x1" |
2c2aa74d | 1975 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1976 | |
1977 | ;; *tsqrt* returning the fg flag | |
1978 | (define_expand "vsx_tsqrt<mode>2_fg" | |
d36a53d6 | 1979 | [(set (match_dup 2) |
ad18eed2 | 1980 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] |
29e6733c | 1981 | UNSPEC_VSX_TSQRT)) |
ad18eed2 | 1982 | (set (match_operand:SI 0 "gpc_reg_operand") |
d36a53d6 | 1983 | (gt:SI (match_dup 2) |
29e6733c MM |
1984 | (const_int 0)))] |
1985 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1986 | { | |
d36a53d6 | 1987 | operands[2] = gen_reg_rtx (CCFPmode); |
29e6733c MM |
1988 | }) |
1989 | ||
1990 | ;; *tsqrt* returning the fe flag | |
1991 | (define_expand "vsx_tsqrt<mode>2_fe" | |
d36a53d6 | 1992 | [(set (match_dup 2) |
ad18eed2 | 1993 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] |
29e6733c | 1994 | UNSPEC_VSX_TSQRT)) |
ad18eed2 | 1995 | (set (match_operand:SI 0 "gpc_reg_operand") |
d36a53d6 | 1996 | (eq:SI (match_dup 2) |
29e6733c MM |
1997 | (const_int 0)))] |
1998 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1999 | { | |
d36a53d6 | 2000 | operands[2] = gen_reg_rtx (CCFPmode); |
29e6733c MM |
2001 | }) |
2002 | ||
2003 | (define_insn "*vsx_tsqrt<mode>2_internal" | |
012f609e SB |
2004 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=x") |
2005 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2006 | UNSPEC_VSX_TSQRT))] |
2007 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2008 | "x<VSv>tsqrt<sd>p %0,%x1" |
2c2aa74d | 2009 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2010 | |
0609bdf2 MM |
2011 | ;; Fused vector multiply/add instructions. Support the classical Altivec |
2012 | ;; versions of fma, which allows the target to be a separate register from the | |
2013 | ;; 3 inputs. Under VSX, the target must be either the addend or the first | |
2014 | ;; multiply. | |
c36193c6 MM |
2015 | |
2016 | (define_insn "*vsx_fmav4sf4" | |
8d3620ba | 2017 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v") |
c36193c6 | 2018 | (fma:V4SF |
8d3620ba SB |
2019 | (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v") |
2020 | (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v") | |
2021 | (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))] | |
c36193c6 MM |
2022 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
2023 | "@ | |
c36193c6 MM |
2024 | xvmaddasp %x0,%x1,%x2 |
2025 | xvmaddmsp %x0,%x1,%x3 | |
2026 | vmaddfp %0,%1,%2,%3" | |
2027 | [(set_attr "type" "vecfloat")]) | |
2028 | ||
2029 | (define_insn "*vsx_fmav2df4" | |
85949949 | 2030 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") |
c36193c6 | 2031 | (fma:V2DF |
85949949 SB |
2032 | (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa") |
2033 | (match_operand:V2DF 2 "vsx_register_operand" "wa,0") | |
2034 | (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))] | |
c36193c6 MM |
2035 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2036 | "@ | |
c36193c6 MM |
2037 | xvmaddadp %x0,%x1,%x2 |
2038 | xvmaddmdp %x0,%x1,%x3" | |
4356b75d | 2039 | [(set_attr "type" "vecdouble")]) |
c36193c6 | 2040 | |
d6613781 | 2041 | (define_insn "*vsx_fms<mode>4" |
012f609e | 2042 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa") |
c36193c6 | 2043 | (fma:VSX_F |
012f609e SB |
2044 | (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa") |
2045 | (match_operand:VSX_F 2 "vsx_register_operand" "wa,0") | |
c36193c6 | 2046 | (neg:VSX_F |
012f609e | 2047 | (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))] |
29e6733c MM |
2048 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
2049 | "@ | |
6cc8f683 SB |
2050 | xvmsuba<sd>p %x0,%x1,%x2 |
2051 | xvmsubm<sd>p %x0,%x1,%x3" | |
4356b75d | 2052 | [(set_attr "type" "<VStype_mul>")]) |
c36193c6 | 2053 | |
d6613781 | 2054 | (define_insn "*vsx_nfma<mode>4" |
012f609e | 2055 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa") |
c36193c6 MM |
2056 | (neg:VSX_F |
2057 | (fma:VSX_F | |
012f609e SB |
2058 | (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa") |
2059 | (match_operand:VSX_F 2 "vsx_register_operand" "wa,0") | |
2060 | (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))] | |
29e6733c | 2061 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
1b1562a5 | 2062 | "@ |
6cc8f683 SB |
2063 | xvnmadda<sd>p %x0,%x1,%x2 |
2064 | xvnmaddm<sd>p %x0,%x1,%x3" | |
2c2aa74d | 2065 | [(set_attr "type" "<VStype_mul>")]) |
29e6733c | 2066 | |
c36193c6 | 2067 | (define_insn "*vsx_nfmsv4sf4" |
8d3620ba | 2068 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v") |
c36193c6 MM |
2069 | (neg:V4SF |
2070 | (fma:V4SF | |
8d3620ba SB |
2071 | (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v") |
2072 | (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v") | |
c36193c6 | 2073 | (neg:V4SF |
8d3620ba | 2074 | (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))] |
c36193c6 MM |
2075 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
2076 | "@ | |
c36193c6 MM |
2077 | xvnmsubasp %x0,%x1,%x2 |
2078 | xvnmsubmsp %x0,%x1,%x3 | |
2079 | vnmsubfp %0,%1,%2,%3" | |
2080 | [(set_attr "type" "vecfloat")]) | |
2081 | ||
2082 | (define_insn "*vsx_nfmsv2df4" | |
85949949 | 2083 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") |
c36193c6 MM |
2084 | (neg:V2DF |
2085 | (fma:V2DF | |
85949949 SB |
2086 | (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa") |
2087 | (match_operand:V2DF 2 "vsx_register_operand" "wa,0") | |
c36193c6 | 2088 | (neg:V2DF |
85949949 | 2089 | (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))] |
c36193c6 MM |
2090 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2091 | "@ | |
c36193c6 MM |
2092 | xvnmsubadp %x0,%x1,%x2 |
2093 | xvnmsubmdp %x0,%x1,%x3" | |
4356b75d | 2094 | [(set_attr "type" "vecdouble")]) |
29e6733c | 2095 | |
29e6733c MM |
2096 | ;; Vector conditional expressions (no scalar version for these instructions) |
2097 | (define_insn "vsx_eq<mode>" | |
012f609e SB |
2098 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2099 | (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
2100 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 2101 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2102 | "xvcmpeq<sd>p %x0,%x1,%x2" |
2c2aa74d | 2103 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2104 | |
2105 | (define_insn "vsx_gt<mode>" | |
012f609e SB |
2106 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2107 | (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
2108 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 2109 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2110 | "xvcmpgt<sd>p %x0,%x1,%x2" |
2c2aa74d | 2111 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2112 | |
2113 | (define_insn "*vsx_ge<mode>" | |
012f609e SB |
2114 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2115 | (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
2116 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 2117 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2118 | "xvcmpge<sd>p %x0,%x1,%x2" |
2c2aa74d | 2119 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2120 | |
29e6733c MM |
2121 | ;; Compare vectors producing a vector result and a predicate, setting CR6 to |
2122 | ;; indicate a combined status | |
2123 | (define_insn "*vsx_eq_<mode>_p" | |
b65261f6 | 2124 | [(set (reg:CC CR6_REGNO) |
29e6733c | 2125 | (unspec:CC |
012f609e SB |
2126 | [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") |
2127 | (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] | |
29e6733c | 2128 | UNSPEC_PREDICATE)) |
012f609e | 2129 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
29e6733c MM |
2130 | (eq:VSX_F (match_dup 1) |
2131 | (match_dup 2)))] | |
2132 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2133 | "xvcmpeq<sd>p. %x0,%x1,%x2" |
4356b75d | 2134 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2135 | |
2136 | (define_insn "*vsx_gt_<mode>_p" | |
b65261f6 | 2137 | [(set (reg:CC CR6_REGNO) |
29e6733c | 2138 | (unspec:CC |
012f609e SB |
2139 | [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") |
2140 | (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] | |
29e6733c | 2141 | UNSPEC_PREDICATE)) |
012f609e | 2142 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
29e6733c MM |
2143 | (gt:VSX_F (match_dup 1) |
2144 | (match_dup 2)))] | |
2145 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2146 | "xvcmpgt<sd>p. %x0,%x1,%x2" |
4356b75d | 2147 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2148 | |
cf5d0fc2 WS |
2149 | ;; xvtlsbb BF,XB |
2150 | ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte | |
2151 | ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE). | |
2152 | (define_insn "*xvtlsbb_internal" | |
2153 | [(set (match_operand:CC 0 "cc_reg_operand" "=y") | |
2154 | (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")] | |
2155 | UNSPEC_XVTLSBB))] | |
2156 | "TARGET_POWER10" | |
2157 | "xvtlsbb %0,%x1" | |
2158 | [(set_attr "type" "logical")]) | |
2159 | ||
2160 | ;; Vector Test Least Significant Bit by Byte | |
2161 | ;; for the implementation of the builtin | |
2162 | ;; __builtin_vec_test_lsbb_all_ones | |
2163 | ;; int vec_test_lsbb_all_ones (vector unsigned char); | |
2164 | ;; and | |
2165 | ;; __builtin_vec_test_lsbb_all_zeros | |
2166 | ;; int vec_test_lsbb_all_zeros (vector unsigned char); | |
2167 | (define_expand "xvtlsbbo" | |
2168 | [(set (match_dup 2) | |
2169 | (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")] | |
2170 | UNSPEC_XVTLSBB)) | |
2171 | (set (match_operand:SI 0 "gpc_reg_operand" "=r") | |
2172 | (lt:SI (match_dup 2) (const_int 0)))] | |
2173 | "TARGET_POWER10" | |
2174 | { | |
2175 | operands[2] = gen_reg_rtx (CCmode); | |
2176 | }) | |
2177 | (define_expand "xvtlsbbz" | |
2178 | [(set (match_dup 2) | |
2179 | (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")] | |
2180 | UNSPEC_XVTLSBB)) | |
2181 | (set (match_operand:SI 0 "gpc_reg_operand" "=r") | |
2182 | (eq:SI (match_dup 2) (const_int 0)))] | |
2183 | "TARGET_POWER10" | |
2184 | { | |
2185 | operands[2] = gen_reg_rtx (CCmode); | |
2186 | }) | |
2187 | ||
29e6733c | 2188 | (define_insn "*vsx_ge_<mode>_p" |
b65261f6 | 2189 | [(set (reg:CC CR6_REGNO) |
29e6733c | 2190 | (unspec:CC |
012f609e SB |
2191 | [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") |
2192 | (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] | |
29e6733c | 2193 | UNSPEC_PREDICATE)) |
012f609e | 2194 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
29e6733c MM |
2195 | (ge:VSX_F (match_dup 1) |
2196 | (match_dup 2)))] | |
2197 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2198 | "xvcmpge<sd>p. %x0,%x1,%x2" |
4356b75d | 2199 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2200 | |
2201 | ;; Vector select | |
2202 | (define_insn "*vsx_xxsel<mode>" | |
cb152d12 | 2203 | [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") |
29e6733c | 2204 | (if_then_else:VSX_L |
cb152d12 | 2205 | (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") |
70db9095 | 2206 | (match_operand:VSX_L 4 "zero_constant" "")) |
cb152d12 SB |
2207 | (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa") |
2208 | (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c MM |
2209 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
2210 | "xxsel %x0,%x3,%x2,%x1" | |
cb152d12 SB |
2211 | [(set_attr "type" "vecmove") |
2212 | (set_attr "isa" "<VSisa>")]) | |
29e6733c MM |
2213 | |
2214 | (define_insn "*vsx_xxsel<mode>_uns" | |
cb152d12 | 2215 | [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") |
29e6733c | 2216 | (if_then_else:VSX_L |
cb152d12 | 2217 | (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") |
70db9095 | 2218 | (match_operand:VSX_L 4 "zero_constant" "")) |
cb152d12 SB |
2219 | (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa") |
2220 | (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))] | |
29e6733c MM |
2221 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
2222 | "xxsel %x0,%x3,%x2,%x1" | |
cb152d12 SB |
2223 | [(set_attr "type" "vecmove") |
2224 | (set_attr "isa" "<VSisa>")]) | |
29e6733c MM |
2225 | |
2226 | ;; Copy sign | |
2227 | (define_insn "vsx_copysign<mode>3" | |
012f609e | 2228 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
0609bdf2 | 2229 | (unspec:VSX_F |
012f609e SB |
2230 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa") |
2231 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")] | |
8119a6a6 | 2232 | UNSPEC_COPYSIGN))] |
29e6733c | 2233 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2234 | "xvcpsgn<sd>p %x0,%x2,%x1" |
2c2aa74d | 2235 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2236 | |
2237 | ;; For the conversions, limit the register class for the integer value to be | |
2238 | ;; the fprs because we don't want to add the altivec registers to movdi/movsi. | |
2239 | ;; For the unsigned tests, there isn't a generic double -> unsigned conversion | |
2240 | ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. | |
8a480dc3 AM |
2241 | ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md |
2242 | ;; in allowing virtual registers. | |
29e6733c | 2243 | (define_insn "vsx_float<VSi><mode>2" |
012f609e SB |
2244 | [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa") |
2245 | (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))] | |
29e6733c | 2246 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2247 | "xvcvsx<VSc><sd>p %x0,%x1" |
2c2aa74d | 2248 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2249 | |
2250 | (define_insn "vsx_floatuns<VSi><mode>2" | |
012f609e SB |
2251 | [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa") |
2252 | (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))] | |
29e6733c | 2253 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2254 | "xvcvux<VSc><sd>p %x0,%x1" |
2c2aa74d | 2255 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2256 | |
2257 | (define_insn "vsx_fix_trunc<mode><VSi>2" | |
012f609e SB |
2258 | [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa") |
2259 | (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))] | |
29e6733c | 2260 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2261 | "x<VSv>cv<sd>psx<VSc>s %x0,%x1" |
2c2aa74d | 2262 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2263 | |
2264 | (define_insn "vsx_fixuns_trunc<mode><VSi>2" | |
012f609e SB |
2265 | [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa") |
2266 | (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))] | |
29e6733c | 2267 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2268 | "x<VSv>cv<sd>pux<VSc>s %x0,%x1" |
2c2aa74d | 2269 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2270 | |
2271 | ;; Math rounding functions | |
6cc8f683 | 2272 | (define_insn "vsx_x<VSv>r<sd>pi" |
012f609e SB |
2273 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") |
2274 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2275 | UNSPEC_VSX_ROUND_I))] |
2276 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2277 | "x<VSv>r<sd>pi %x0,%x1" |
2c2aa74d | 2278 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2279 | |
6cc8f683 | 2280 | (define_insn "vsx_x<VSv>r<sd>pic" |
012f609e SB |
2281 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") |
2282 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2283 | UNSPEC_VSX_ROUND_IC))] |
2284 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2285 | "x<VSv>r<sd>pic %x0,%x1" |
2c2aa74d | 2286 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2287 | |
2288 | (define_insn "vsx_btrunc<mode>2" | |
012f609e SB |
2289 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2290 | (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] | |
29e6733c | 2291 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2292 | "xvr<sd>piz %x0,%x1" |
2c2aa74d | 2293 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2294 | |
2295 | (define_insn "*vsx_b2trunc<mode>2" | |
012f609e SB |
2296 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") |
2297 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2298 | UNSPEC_FRIZ))] |
2299 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2300 | "x<VSv>r<sd>piz %x0,%x1" |
2c2aa74d | 2301 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2302 | |
2303 | (define_insn "vsx_floor<mode>2" | |
012f609e SB |
2304 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2305 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2306 | UNSPEC_FRIM))] |
2307 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2308 | "xvr<sd>pim %x0,%x1" |
2c2aa74d | 2309 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2310 | |
2311 | (define_insn "vsx_ceil<mode>2" | |
012f609e SB |
2312 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2313 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2314 | UNSPEC_FRIP))] |
2315 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2316 | "xvr<sd>pip %x0,%x1" |
2c2aa74d | 2317 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2318 | |
2319 | \f | |
2320 | ;; VSX convert to/from double vector | |
2321 | ||
2322 | ;; Convert between single and double precision | |
2323 | ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal | |
2324 | ;; scalar single precision instructions internally use the double format. | |
2325 | ;; Prefer the altivec registers, since we likely will need to do a vperm | |
1f5aa628 SB |
2326 | (define_insn "vsx_xscvdpsp" |
2327 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa") | |
2328 | (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")] | |
29e6733c | 2329 | UNSPEC_VSX_CVSPDP))] |
1f5aa628 SB |
2330 | "VECTOR_UNIT_VSX_P (DFmode)" |
2331 | "xscvdpsp %x0,%x1" | |
2332 | [(set_attr "type" "fp")]) | |
2333 | ||
6485d5d6 | 2334 | (define_insn "vsx_xvcvspdp_be" |
1f5aa628 | 2335 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") |
6485d5d6 KL |
2336 | (float_extend:V2DF |
2337 | (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
2338 | (parallel [(const_int 0) (const_int 2)]))))] | |
2339 | "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN" | |
2340 | "xvcvspdp %x0,%x1" | |
2341 | [(set_attr "type" "vecdouble")]) | |
2342 | ||
2343 | (define_insn "vsx_xvcvspdp_le" | |
2344 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") | |
2345 | (float_extend:V2DF | |
2346 | (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
2347 | (parallel [(const_int 1) (const_int 3)]))))] | |
2348 | "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" | |
1f5aa628 SB |
2349 | "xvcvspdp %x0,%x1" |
2350 | [(set_attr "type" "vecdouble")]) | |
2351 | ||
6485d5d6 KL |
2352 | (define_expand "vsx_xvcvspdp" |
2353 | [(match_operand:V2DF 0 "vsx_register_operand") | |
2354 | (match_operand:V4SF 1 "vsx_register_operand")] | |
2355 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2356 | { | |
2357 | if (BYTES_BIG_ENDIAN) | |
2358 | emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1])); | |
2359 | else | |
2360 | emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1])); | |
2361 | DONE; | |
2362 | }) | |
2363 | ||
1f5aa628 SB |
2364 | (define_insn "vsx_xvcvdpsp" |
2365 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa") | |
2366 | (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")] | |
2367 | UNSPEC_VSX_CVSPDP))] | |
2368 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2369 | "xvcvdpsp %x0,%x1" | |
2370 | [(set_attr "type" "vecdouble")]) | |
29e6733c MM |
2371 | |
2372 | ;; xscvspdp, represent the scalar SF type as V4SF | |
2373 | (define_insn "vsx_xscvspdp" | |
cc998fd5 | 2374 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") |
59f5868d | 2375 | (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] |
29e6733c | 2376 | UNSPEC_VSX_CVSPDP))] |
df5a9a7c | 2377 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
29e6733c MM |
2378 | "xscvspdp %x0,%x1" |
2379 | [(set_attr "type" "fp")]) | |
2380 | ||
2f448503 MM |
2381 | ;; Same as vsx_xscvspdp, but use SF as the type |
2382 | (define_insn "vsx_xscvspdp_scalar2" | |
72e3386e | 2383 | [(set (match_operand:SF 0 "vsx_register_operand" "=wa") |
2f448503 MM |
2384 | (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] |
2385 | UNSPEC_VSX_CVSPDP))] | |
2386 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2387 | "xscvspdp %x0,%x1" | |
2388 | [(set_attr "type" "fp")]) | |
2389 | ||
26bca0ed CL |
2390 | ;; Generate xvcvhpsp instruction |
2391 | (define_insn "vsx_xvcvhpsp" | |
2392 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
2393 | (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] | |
2394 | UNSPEC_VSX_CVHPSP))] | |
2395 | "TARGET_P9_VECTOR" | |
2396 | "xvcvhpsp %x0,%x1" | |
2397 | [(set_attr "type" "vecfloat")]) | |
2398 | ||
58b475a2 WS |
2399 | ;; Generate xvcvsphp |
2400 | (define_insn "vsx_xvcvsphp" | |
2401 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
2402 | (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] | |
2403 | UNSPEC_VSX_XVCVSPHP))] | |
2404 | "TARGET_P9_VECTOR" | |
2405 | "xvcvsphp %x0,%x1" | |
2406 | [(set_attr "type" "vecfloat")]) | |
2407 | ||
29e6733c MM |
2408 | ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF |
2409 | ;; format of scalars is actually DF. | |
2410 | (define_insn "vsx_xscvdpsp_scalar" | |
2411 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
72e3386e | 2412 | (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] |
29e6733c | 2413 | UNSPEC_VSX_CVSPDP))] |
df5a9a7c | 2414 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
29e6733c MM |
2415 | "xscvdpsp %x0,%x1" |
2416 | [(set_attr "type" "fp")]) | |
2417 | ||
0bd62dca MM |
2418 | ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs |
2419 | (define_insn "vsx_xscvdpspn" | |
72e3386e | 2420 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") |
cc998fd5 | 2421 | (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
2422 | UNSPEC_VSX_CVDPSPN))] |
2423 | "TARGET_XSCVDPSPN" | |
2424 | "xscvdpspn %x0,%x1" | |
2425 | [(set_attr "type" "fp")]) | |
2426 | ||
2427 | (define_insn "vsx_xscvspdpn" | |
cc998fd5 | 2428 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") |
57e6b981 | 2429 | (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
2430 | UNSPEC_VSX_CVSPDPN))] |
2431 | "TARGET_XSCVSPDPN" | |
2432 | "xscvspdpn %x0,%x1" | |
2433 | [(set_attr "type" "fp")]) | |
2434 | ||
2435 | (define_insn "vsx_xscvdpspn_scalar" | |
57e6b981 | 2436 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") |
72e3386e | 2437 | (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
2438 | UNSPEC_VSX_CVDPSPN))] |
2439 | "TARGET_XSCVDPSPN" | |
2440 | "xscvdpspn %x0,%x1" | |
2441 | [(set_attr "type" "fp")]) | |
2442 | ||
2443 | ;; Used by direct move to move a SFmode value from GPR to VSX register | |
2444 | (define_insn "vsx_xscvspdpn_directmove" | |
2445 | [(set (match_operand:SF 0 "vsx_register_operand" "=wa") | |
b306ab3a | 2446 | (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
2447 | UNSPEC_VSX_CVSPDPN))] |
2448 | "TARGET_XSCVSPDPN" | |
2449 | "xscvspdpn %x0,%x1" | |
2450 | [(set_attr "type" "fp")]) | |
2451 | ||
70f0f8b2 BS |
2452 | ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long) |
2453 | ||
4d8cfe0e KL |
2454 | (define_insn "vsx_xvcv<su>xwsp" |
2455 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
2456 | (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))] | |
2457 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2458 | "xvcv<su>xwsp %x0,%x1" | |
2459 | [(set_attr "type" "vecfloat")]) | |
2460 | ||
2461 | (define_insn "vsx_xvcv<su>xddp" | |
2462 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") | |
2463 | (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))] | |
2464 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2465 | "xvcv<su>xddp %x0,%x1" | |
2466 | [(set_attr "type" "vecdouble")]) | |
2467 | ||
2468 | (define_insn "vsx_xvcvsp<su>xws" | |
2469 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") | |
2470 | (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))] | |
2471 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2472 | "xvcvsp<su>xws %x0,%x1" | |
2473 | [(set_attr "type" "vecfloat")]) | |
2474 | ||
2475 | (define_insn "vsx_xvcvdp<su>xds" | |
2476 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
2477 | (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))] | |
2478 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2479 | "xvcvdp<su>xds %x0,%x1" | |
2480 | [(set_attr "type" "vecdouble")]) | |
2481 | ||
70f0f8b2 | 2482 | (define_expand "vsx_xvcvsxddp_scale" |
ad18eed2 SB |
2483 | [(match_operand:V2DF 0 "vsx_register_operand") |
2484 | (match_operand:V2DI 1 "vsx_register_operand") | |
2485 | (match_operand:QI 2 "immediate_operand")] | |
70f0f8b2 BS |
2486 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2487 | { | |
2488 | rtx op0 = operands[0]; | |
2489 | rtx op1 = operands[1]; | |
2490 | int scale = INTVAL(operands[2]); | |
2491 | emit_insn (gen_vsx_xvcvsxddp (op0, op1)); | |
2492 | if (scale != 0) | |
2493 | rs6000_scale_v2df (op0, op0, -scale); | |
2494 | DONE; | |
2495 | }) | |
2496 | ||
70f0f8b2 | 2497 | (define_expand "vsx_xvcvuxddp_scale" |
ad18eed2 SB |
2498 | [(match_operand:V2DF 0 "vsx_register_operand") |
2499 | (match_operand:V2DI 1 "vsx_register_operand") | |
2500 | (match_operand:QI 2 "immediate_operand")] | |
70f0f8b2 BS |
2501 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2502 | { | |
2503 | rtx op0 = operands[0]; | |
2504 | rtx op1 = operands[1]; | |
2505 | int scale = INTVAL(operands[2]); | |
2506 | emit_insn (gen_vsx_xvcvuxddp (op0, op1)); | |
2507 | if (scale != 0) | |
2508 | rs6000_scale_v2df (op0, op0, -scale); | |
2509 | DONE; | |
2510 | }) | |
2511 | ||
70f0f8b2 | 2512 | (define_expand "vsx_xvcvdpsxds_scale" |
ad18eed2 SB |
2513 | [(match_operand:V2DI 0 "vsx_register_operand") |
2514 | (match_operand:V2DF 1 "vsx_register_operand") | |
2515 | (match_operand:QI 2 "immediate_operand")] | |
70f0f8b2 BS |
2516 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2517 | { | |
2518 | rtx op0 = operands[0]; | |
2519 | rtx op1 = operands[1]; | |
9b5ee426 BS |
2520 | rtx tmp; |
2521 | int scale = INTVAL (operands[2]); | |
2522 | if (scale == 0) | |
2523 | tmp = op1; | |
2524 | else | |
2525 | { | |
2526 | tmp = gen_reg_rtx (V2DFmode); | |
2527 | rs6000_scale_v2df (tmp, op1, scale); | |
2528 | } | |
70f0f8b2 BS |
2529 | emit_insn (gen_vsx_xvcvdpsxds (op0, tmp)); |
2530 | DONE; | |
2531 | }) | |
2532 | ||
e5898daf CL |
2533 | ;; convert vector of 64-bit floating point numbers to vector of |
2534 | ;; 64-bit unsigned integer | |
70f0f8b2 | 2535 | (define_expand "vsx_xvcvdpuxds_scale" |
ad18eed2 SB |
2536 | [(match_operand:V2DI 0 "vsx_register_operand") |
2537 | (match_operand:V2DF 1 "vsx_register_operand") | |
2538 | (match_operand:QI 2 "immediate_operand")] | |
70f0f8b2 BS |
2539 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2540 | { | |
2541 | rtx op0 = operands[0]; | |
2542 | rtx op1 = operands[1]; | |
9b5ee426 BS |
2543 | rtx tmp; |
2544 | int scale = INTVAL (operands[2]); | |
2545 | if (scale == 0) | |
2546 | tmp = op1; | |
2547 | else | |
2548 | { | |
2549 | tmp = gen_reg_rtx (V2DFmode); | |
2550 | rs6000_scale_v2df (tmp, op1, scale); | |
2551 | } | |
70f0f8b2 BS |
2552 | emit_insn (gen_vsx_xvcvdpuxds (op0, tmp)); |
2553 | DONE; | |
2554 | }) | |
2555 | ||
29e6733c MM |
2556 | ;; Convert from 64-bit to 32-bit types |
2557 | ;; Note, favor the Altivec registers since the usual use of these instructions | |
2558 | ;; is in vector converts and we need to use the Altivec vperm instruction. | |
2559 | ||
2560 | (define_insn "vsx_xvcvdpsxws" | |
2561 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") | |
85949949 | 2562 | (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")] |
29e6733c MM |
2563 | UNSPEC_VSX_CVDPSXWS))] |
2564 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2565 | "xvcvdpsxws %x0,%x1" | |
4356b75d | 2566 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
2567 | |
2568 | (define_insn "vsx_xvcvdpuxws" | |
2569 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") | |
85949949 | 2570 | (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")] |
29e6733c MM |
2571 | UNSPEC_VSX_CVDPUXWS))] |
2572 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2573 | "xvcvdpuxws %x0,%x1" | |
4356b75d | 2574 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
2575 | |
2576 | (define_insn "vsx_xvcvsxdsp" | |
8d3620ba SB |
2577 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") |
2578 | (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2579 | UNSPEC_VSX_CVSXDSP))] |
2580 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2581 | "xvcvsxdsp %x0,%x1" | |
2582 | [(set_attr "type" "vecfloat")]) | |
2583 | ||
2584 | (define_insn "vsx_xvcvuxdsp" | |
8d3620ba SB |
2585 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") |
2586 | (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2587 | UNSPEC_VSX_CVUXDSP))] |
2588 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
8722316b | 2589 | "xvcvuxdsp %x0,%x1" |
4356b75d | 2590 | [(set_attr "type" "vecdouble")]) |
29e6733c | 2591 | |
6485d5d6 KL |
2592 | ;; Convert vector of 32-bit signed/unsigned integers to vector of |
2593 | ;; 64-bit floating point numbers. | |
2594 | (define_insn "vsx_xvcv<su>xwdp_be" | |
8d3620ba | 2595 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") |
6485d5d6 KL |
2596 | (any_float:V2DF |
2597 | (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa") | |
2598 | (parallel [(const_int 0) (const_int 2)]))))] | |
2599 | "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN" | |
2600 | "xvcv<su>xwdp %x0,%x1" | |
4356b75d | 2601 | [(set_attr "type" "vecdouble")]) |
29e6733c | 2602 | |
6485d5d6 KL |
2603 | (define_insn "vsx_xvcv<su>xwdp_le" |
2604 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") | |
2605 | (any_float:V2DF | |
2606 | (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa") | |
2607 | (parallel [(const_int 1) (const_int 3)]))))] | |
2608 | "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" | |
2609 | "xvcv<su>xwdp %x0,%x1" | |
2610 | [(set_attr "type" "vecdouble")]) | |
2611 | ||
2612 | (define_expand "vsx_xvcv<su>xwdp" | |
2613 | [(match_operand:V2DF 0 "vsx_register_operand") | |
2614 | (match_operand:V4SI 1 "vsx_register_operand") | |
2615 | (any_float (pc))] | |
2616 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2617 | { | |
2618 | if (BYTES_BIG_ENDIAN) | |
2619 | emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1])); | |
2620 | else | |
2621 | emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1])); | |
2622 | DONE; | |
2623 | }) | |
2624 | ||
156b5cca | 2625 | (define_insn "vsx_xvcvsxwdp_df" |
cc998fd5 | 2626 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") |
156b5cca MM |
2627 | (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] |
2628 | UNSPEC_VSX_CVSXWDP))] | |
2629 | "TARGET_VSX" | |
2630 | "xvcvsxwdp %x0,%x1" | |
2631 | [(set_attr "type" "vecdouble")]) | |
2632 | ||
156b5cca | 2633 | (define_insn "vsx_xvcvuxwdp_df" |
cc998fd5 | 2634 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") |
156b5cca MM |
2635 | (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] |
2636 | UNSPEC_VSX_CVUXWDP))] | |
2637 | "TARGET_VSX" | |
2638 | "xvcvuxwdp %x0,%x1" | |
2639 | [(set_attr "type" "vecdouble")]) | |
2640 | ||
6485d5d6 KL |
2641 | ;; Convert vector of 32-bit floating point numbers to vector of |
2642 | ;; 64-bit signed/unsigned integers. | |
2643 | (define_insn "vsx_xvcvsp<su>xds_be" | |
29e6733c | 2644 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") |
6485d5d6 KL |
2645 | (any_fix:V2DI |
2646 | (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
2647 | (parallel [(const_int 0) (const_int 2)]))))] | |
2648 | "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN" | |
2649 | "xvcvsp<su>xds %x0,%x1" | |
4356b75d | 2650 | [(set_attr "type" "vecdouble")]) |
29e6733c | 2651 | |
6485d5d6 | 2652 | (define_insn "vsx_xvcvsp<su>xds_le" |
29e6733c | 2653 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") |
6485d5d6 KL |
2654 | (any_fix:V2DI |
2655 | (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
2656 | (parallel [(const_int 1) (const_int 3)]))))] | |
2657 | "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" | |
2658 | "xvcvsp<su>xds %x0,%x1" | |
4356b75d | 2659 | [(set_attr "type" "vecdouble")]) |
688e4919 | 2660 | |
6485d5d6 KL |
2661 | (define_expand "vsx_xvcvsp<su>xds" |
2662 | [(match_operand:V2DI 0 "vsx_register_operand") | |
2663 | (match_operand:V4SF 1 "vsx_register_operand") | |
2664 | (any_fix (pc))] | |
2665 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2666 | { | |
2667 | if (BYTES_BIG_ENDIAN) | |
2668 | emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1])); | |
2669 | else | |
2670 | emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1])); | |
2671 | DONE; | |
2672 | }) | |
2673 | ||
19d22f7c CL |
2674 | ;; Generate float2 double |
2675 | ;; convert two double to float | |
2676 | (define_expand "float2_v2df" | |
2677 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2678 | (use (match_operand:V2DF 1 "register_operand" "wa")) | |
2679 | (use (match_operand:V2DF 2 "register_operand" "wa"))] | |
2680 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2681 | { | |
2682 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2683 | ||
2684 | rtx_dst = operands[0]; | |
2685 | rtx_src1 = operands[1]; | |
2686 | rtx_src2 = operands[2]; | |
2687 | ||
2688 | rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2); | |
2689 | DONE; | |
2690 | }) | |
2691 | ||
be1418c7 CL |
2692 | ;; Generate float2 |
2693 | ;; convert two long long signed ints to float | |
2694 | (define_expand "float2_v2di" | |
2695 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2696 | (use (match_operand:V2DI 1 "register_operand" "wa")) | |
2697 | (use (match_operand:V2DI 2 "register_operand" "wa"))] | |
2698 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2699 | { | |
2700 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2701 | ||
2702 | rtx_dst = operands[0]; | |
2703 | rtx_src1 = operands[1]; | |
2704 | rtx_src2 = operands[2]; | |
2705 | ||
2706 | rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); | |
2707 | DONE; | |
2708 | }) | |
2709 | ||
2710 | ;; Generate uns_float2 | |
2711 | ;; convert two long long unsigned ints to float | |
2712 | (define_expand "uns_float2_v2di" | |
2713 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2714 | (use (match_operand:V2DI 1 "register_operand" "wa")) | |
2715 | (use (match_operand:V2DI 2 "register_operand" "wa"))] | |
2716 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2717 | { | |
2718 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2719 | ||
2720 | rtx_dst = operands[0]; | |
2721 | rtx_src1 = operands[1]; | |
2722 | rtx_src2 = operands[2]; | |
2723 | ||
2724 | rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); | |
2725 | DONE; | |
2726 | }) | |
2727 | ||
2728 | ;; Generate floate | |
2729 | ;; convert double or long long signed to float | |
2730 | ;; (Only even words are valid, BE numbering) | |
2731 | (define_expand "floate<mode>" | |
2732 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2733 | (use (match_operand:VSX_D 1 "register_operand" "wa"))] | |
2734 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2735 | { | |
427a7384 | 2736 | if (BYTES_BIG_ENDIAN) |
be1418c7 CL |
2737 | { |
2738 | /* Shift left one word to put even word correct location */ | |
2739 | rtx rtx_tmp; | |
2740 | rtx rtx_val = GEN_INT (4); | |
2741 | ||
2742 | rtx_tmp = gen_reg_rtx (V4SFmode); | |
2743 | emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); | |
2744 | emit_insn (gen_altivec_vsldoi_v4sf (operands[0], | |
2745 | rtx_tmp, rtx_tmp, rtx_val)); | |
2746 | } | |
2747 | else | |
394a527f | 2748 | emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); |
be1418c7 CL |
2749 | |
2750 | DONE; | |
2751 | }) | |
2752 | ||
2753 | ;; Generate uns_floate | |
2754 | ;; convert long long unsigned to float | |
2755 | ;; (Only even words are valid, BE numbering) | |
2756 | (define_expand "unsfloatev2di" | |
2757 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2758 | (use (match_operand:V2DI 1 "register_operand" "wa"))] | |
2759 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2760 | { | |
427a7384 | 2761 | if (BYTES_BIG_ENDIAN) |
be1418c7 CL |
2762 | { |
2763 | /* Shift left one word to put even word correct location */ | |
2764 | rtx rtx_tmp; | |
2765 | rtx rtx_val = GEN_INT (4); | |
2766 | ||
2767 | rtx_tmp = gen_reg_rtx (V4SFmode); | |
2768 | emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); | |
2769 | emit_insn (gen_altivec_vsldoi_v4sf (operands[0], | |
2770 | rtx_tmp, rtx_tmp, rtx_val)); | |
2771 | } | |
2772 | else | |
2773 | emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); | |
2774 | ||
2775 | DONE; | |
2776 | }) | |
2777 | ||
2778 | ;; Generate floato | |
2779 | ;; convert double or long long signed to float | |
2780 | ;; Only odd words are valid, BE numbering) | |
2781 | (define_expand "floato<mode>" | |
2782 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2783 | (use (match_operand:VSX_D 1 "register_operand" "wa"))] | |
2784 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2785 | { | |
427a7384 | 2786 | if (BYTES_BIG_ENDIAN) |
394a527f | 2787 | emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); |
be1418c7 CL |
2788 | else |
2789 | { | |
2790 | /* Shift left one word to put odd word correct location */ | |
2791 | rtx rtx_tmp; | |
2792 | rtx rtx_val = GEN_INT (4); | |
2793 | ||
2794 | rtx_tmp = gen_reg_rtx (V4SFmode); | |
394a527f | 2795 | emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); |
be1418c7 CL |
2796 | emit_insn (gen_altivec_vsldoi_v4sf (operands[0], |
2797 | rtx_tmp, rtx_tmp, rtx_val)); | |
2798 | } | |
2799 | DONE; | |
2800 | }) | |
2801 | ||
2802 | ;; Generate uns_floato | |
2803 | ;; convert long long unsigned to float | |
2804 | ;; (Only odd words are valid, BE numbering) | |
2805 | (define_expand "unsfloatov2di" | |
2806 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2807 | (use (match_operand:V2DI 1 "register_operand" "wa"))] | |
2808 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2809 | { | |
427a7384 | 2810 | if (BYTES_BIG_ENDIAN) |
be1418c7 CL |
2811 | emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); |
2812 | else | |
2813 | { | |
2814 | /* Shift left one word to put odd word correct location */ | |
2815 | rtx rtx_tmp; | |
2816 | rtx rtx_val = GEN_INT (4); | |
2817 | ||
2818 | rtx_tmp = gen_reg_rtx (V4SFmode); | |
2819 | emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); | |
2820 | emit_insn (gen_altivec_vsldoi_v4sf (operands[0], | |
2821 | rtx_tmp, rtx_tmp, rtx_val)); | |
2822 | } | |
2823 | DONE; | |
2824 | }) | |
2825 | ||
e5898daf CL |
2826 | ;; Generate vsigned2 |
2827 | ;; convert two double float vectors to a vector of single precision ints | |
2828 | (define_expand "vsigned2_v2df" | |
2829 | [(match_operand:V4SI 0 "register_operand" "=wa") | |
2830 | (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa") | |
2831 | (match_operand:V2DF 2 "register_operand" "wa")] | |
2832 | UNSPEC_VSX_VSIGNED2)] | |
2833 | "TARGET_VSX" | |
2834 | { | |
2835 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2836 | bool signed_convert=true; | |
2837 | ||
2838 | rtx_dst = operands[0]; | |
2839 | rtx_src1 = operands[1]; | |
2840 | rtx_src2 = operands[2]; | |
2841 | ||
2842 | rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); | |
2843 | DONE; | |
2844 | }) | |
2845 | ||
2846 | ;; Generate vsignedo_v2df | |
2847 | ;; signed double float to int convert odd word | |
2848 | (define_expand "vsignedo_v2df" | |
2849 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
2850 | (match_operand:V2DF 1 "register_operand" "wa"))] | |
2851 | "TARGET_VSX" | |
2852 | { | |
427a7384 | 2853 | if (BYTES_BIG_ENDIAN) |
e5898daf CL |
2854 | { |
2855 | rtx rtx_tmp; | |
2856 | rtx rtx_val = GEN_INT (12); | |
2857 | rtx_tmp = gen_reg_rtx (V4SImode); | |
2858 | ||
2859 | emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); | |
2860 | ||
2861 | /* Big endian word numbering for words in operand is 0 1 2 3. | |
2862 | take (operand[1] operand[1]) and shift left one word | |
2863 | 0 1 2 3 0 1 2 3 => 1 2 3 0 | |
2864 | Words 1 and 3 are now are now where they need to be for result. */ | |
2865 | ||
2866 | emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, | |
2867 | rtx_tmp, rtx_val)); | |
2868 | } | |
2869 | else | |
2870 | /* Little endian word numbering for operand is 3 2 1 0. | |
2871 | Result words 3 and 1 are where they need to be. */ | |
2872 | emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); | |
2873 | ||
2874 | DONE; | |
2875 | } | |
2876 | [(set_attr "type" "veccomplex")]) | |
2877 | ||
2878 | ;; Generate vsignede_v2df | |
2879 | ;; signed double float to int even word | |
2880 | (define_expand "vsignede_v2df" | |
2881 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
2882 | (match_operand:V2DF 1 "register_operand" "v"))] | |
2883 | "TARGET_VSX" | |
2884 | { | |
427a7384 | 2885 | if (BYTES_BIG_ENDIAN) |
e5898daf CL |
2886 | /* Big endian word numbering for words in operand is 0 1 |
2887 | Result words 0 is where they need to be. */ | |
2888 | emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); | |
2889 | ||
2890 | else | |
2891 | { | |
2892 | rtx rtx_tmp; | |
2893 | rtx rtx_val = GEN_INT (12); | |
2894 | rtx_tmp = gen_reg_rtx (V4SImode); | |
2895 | ||
2896 | emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); | |
2897 | ||
2898 | /* Little endian word numbering for operand is 3 2 1 0. | |
2899 | take (operand[1] operand[1]) and shift left three words | |
2900 | 0 1 2 3 0 1 2 3 => 3 0 1 2 | |
2901 | Words 0 and 2 are now where they need to be for the result. */ | |
2902 | emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, | |
2903 | rtx_tmp, rtx_val)); | |
2904 | } | |
2905 | DONE; | |
2906 | } | |
2907 | [(set_attr "type" "veccomplex")]) | |
2908 | ||
2909 | ;; Generate unsigned2 | |
2910 | ;; convert two double float vectors to a vector of single precision | |
2911 | ;; unsigned ints | |
2912 | (define_expand "vunsigned2_v2df" | |
2913 | [(match_operand:V4SI 0 "register_operand" "=v") | |
2914 | (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v") | |
2915 | (match_operand:V2DF 2 "register_operand" "v")] | |
2916 | UNSPEC_VSX_VSIGNED2)] | |
2917 | "TARGET_VSX" | |
2918 | { | |
2919 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2920 | bool signed_convert=false; | |
2921 | ||
2922 | rtx_dst = operands[0]; | |
2923 | rtx_src1 = operands[1]; | |
2924 | rtx_src2 = operands[2]; | |
2925 | ||
2926 | rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); | |
2927 | DONE; | |
2928 | }) | |
2929 | ||
2930 | ;; Generate vunsignedo_v2df | |
2931 | ;; unsigned double float to int convert odd word | |
2932 | (define_expand "vunsignedo_v2df" | |
2933 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
2934 | (match_operand:V2DF 1 "register_operand" "v"))] | |
2935 | "TARGET_VSX" | |
2936 | { | |
427a7384 | 2937 | if (BYTES_BIG_ENDIAN) |
e5898daf CL |
2938 | { |
2939 | rtx rtx_tmp; | |
2940 | rtx rtx_val = GEN_INT (12); | |
2941 | rtx_tmp = gen_reg_rtx (V4SImode); | |
2942 | ||
2943 | emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); | |
2944 | ||
2945 | /* Big endian word numbering for words in operand is 0 1 2 3. | |
2946 | take (operand[1] operand[1]) and shift left one word | |
2947 | 0 1 2 3 0 1 2 3 => 1 2 3 0 | |
2948 | Words 1 and 3 are now are now where they need to be for result. */ | |
2949 | ||
2950 | emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, | |
2951 | rtx_tmp, rtx_val)); | |
2952 | } | |
2953 | else | |
2954 | /* Little endian word numbering for operand is 3 2 1 0. | |
2955 | Result words 3 and 1 are where they need to be. */ | |
2956 | emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); | |
2957 | ||
2958 | DONE; | |
2959 | } | |
2960 | [(set_attr "type" "veccomplex")]) | |
2961 | ||
2962 | ;; Generate vunsignede_v2df | |
2963 | ;; unsigned double float to int even word | |
2964 | (define_expand "vunsignede_v2df" | |
2965 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
2966 | (match_operand:V2DF 1 "register_operand" "v"))] | |
2967 | "TARGET_VSX" | |
2968 | { | |
427a7384 | 2969 | if (BYTES_BIG_ENDIAN) |
e5898daf CL |
2970 | /* Big endian word numbering for words in operand is 0 1 |
2971 | Result words 0 is where they need to be. */ | |
2972 | emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); | |
2973 | ||
2974 | else | |
2975 | { | |
2976 | rtx rtx_tmp; | |
2977 | rtx rtx_val = GEN_INT (12); | |
2978 | rtx_tmp = gen_reg_rtx (V4SImode); | |
2979 | ||
2980 | emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); | |
2981 | ||
2982 | /* Little endian word numbering for operand is 3 2 1 0. | |
2983 | take (operand[1] operand[1]) and shift left three words | |
2984 | 0 1 2 3 0 1 2 3 => 3 0 1 2 | |
2985 | Words 0 and 2 are now where they need to be for the result. */ | |
2986 | emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, | |
2987 | rtx_tmp, rtx_val)); | |
2988 | } | |
2989 | DONE; | |
2990 | } | |
2991 | [(set_attr "type" "veccomplex")]) | |
2992 | ||
688e4919 | 2993 | ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since |
0c307d8f | 2994 | ;; since the xvrdpiz instruction does not truncate the value if the floating |
688e4919 | 2995 | ;; point value is < LONG_MIN or > LONG_MAX. |
0c307d8f | 2996 | (define_insn "*vsx_float_fix_v2df2" |
85949949 | 2997 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa") |
0c307d8f MM |
2998 | (float:V2DF |
2999 | (fix:V2DI | |
85949949 | 3000 | (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))] |
2c2aa74d | 3001 | "TARGET_HARD_FLOAT |
0c307d8f | 3002 | && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations |
688e4919 | 3003 | && !flag_trapping_math && TARGET_FRIZ" |
0c307d8f | 3004 | "xvrdpiz %x0,%x1" |
2c2aa74d | 3005 | [(set_attr "type" "vecdouble")]) |
688e4919 | 3006 | |
29e6733c MM |
3007 | \f |
3008 | ;; Permute operations | |
3009 | ||
3010 | ;; Build a V2DF/V2DI vector from two scalars | |
3011 | (define_insn "vsx_concat_<mode>" | |
08c4c51e | 3012 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") |
c6d5ff83 | 3013 | (vec_concat:VSX_D |
08c4c51e MM |
3014 | (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b") |
3015 | (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))] | |
29e6733c | 3016 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
de75c876 | 3017 | { |
e86aefb8 MM |
3018 | if (which_alternative == 0) |
3019 | return (BYTES_BIG_ENDIAN | |
3020 | ? "xxpermdi %x0,%x1,%x2,0" | |
3021 | : "xxpermdi %x0,%x2,%x1,0"); | |
3022 | ||
3023 | else if (which_alternative == 1) | |
3024 | return (BYTES_BIG_ENDIAN | |
3025 | ? "mtvsrdd %x0,%1,%2" | |
3026 | : "mtvsrdd %x0,%2,%1"); | |
3027 | ||
de75c876 | 3028 | else |
e86aefb8 | 3029 | gcc_unreachable (); |
de75c876 | 3030 | } |
b0894ae0 | 3031 | [(set_attr "type" "vecperm,vecmove")]) |
29e6733c | 3032 | |
08c4c51e MM |
3033 | ;; Combiner patterns to allow creating XXPERMDI's to access either double |
3034 | ;; word element in a vector register. | |
3035 | (define_insn "*vsx_concat_<mode>_1" | |
3036 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
3037 | (vec_concat:VSX_D | |
3038 | (vec_select:<VS_scalar> | |
3039 | (match_operand:VSX_D 1 "gpc_reg_operand" "wa") | |
3040 | (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) | |
3041 | (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))] | |
3042 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
3043 | { | |
3044 | HOST_WIDE_INT dword = INTVAL (operands[2]); | |
3045 | if (BYTES_BIG_ENDIAN) | |
3046 | { | |
3047 | operands[4] = GEN_INT (2*dword); | |
3048 | return "xxpermdi %x0,%x1,%x3,%4"; | |
3049 | } | |
3050 | else | |
3051 | { | |
3052 | operands[4] = GEN_INT (!dword); | |
3053 | return "xxpermdi %x0,%x3,%x1,%4"; | |
3054 | } | |
3055 | } | |
3056 | [(set_attr "type" "vecperm")]) | |
3057 | ||
3058 | (define_insn "*vsx_concat_<mode>_2" | |
3059 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
3060 | (vec_concat:VSX_D | |
3061 | (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa") | |
3062 | (vec_select:<VS_scalar> | |
3063 | (match_operand:VSX_D 2 "gpc_reg_operand" "wa") | |
3064 | (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))] | |
3065 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
3066 | { | |
3067 | HOST_WIDE_INT dword = INTVAL (operands[3]); | |
3068 | if (BYTES_BIG_ENDIAN) | |
3069 | { | |
3070 | operands[4] = GEN_INT (dword); | |
3071 | return "xxpermdi %x0,%x1,%x2,%4"; | |
3072 | } | |
3073 | else | |
3074 | { | |
3075 | operands[4] = GEN_INT (2 * !dword); | |
3076 | return "xxpermdi %x0,%x2,%x1,%4"; | |
3077 | } | |
3078 | } | |
3079 | [(set_attr "type" "vecperm")]) | |
3080 | ||
3081 | (define_insn "*vsx_concat_<mode>_3" | |
3082 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
3083 | (vec_concat:VSX_D | |
3084 | (vec_select:<VS_scalar> | |
3085 | (match_operand:VSX_D 1 "gpc_reg_operand" "wa") | |
3086 | (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) | |
3087 | (vec_select:<VS_scalar> | |
3088 | (match_operand:VSX_D 3 "gpc_reg_operand" "wa") | |
3089 | (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))] | |
3090 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
3091 | { | |
3092 | HOST_WIDE_INT dword1 = INTVAL (operands[2]); | |
3093 | HOST_WIDE_INT dword2 = INTVAL (operands[4]); | |
3094 | if (BYTES_BIG_ENDIAN) | |
3095 | { | |
3096 | operands[5] = GEN_INT ((2 * dword1) + dword2); | |
3097 | return "xxpermdi %x0,%x1,%x3,%5"; | |
3098 | } | |
3099 | else | |
3100 | { | |
3101 | operands[5] = GEN_INT ((2 * !dword2) + !dword1); | |
3102 | return "xxpermdi %x0,%x3,%x1,%5"; | |
3103 | } | |
3104 | } | |
3105 | [(set_attr "type" "vecperm")]) | |
3106 | ||
29e6733c MM |
3107 | ;; Special purpose concat using xxpermdi to glue two single precision values |
3108 | ;; together, relying on the fact that internally scalar floats are represented | |
3109 | ;; as doubles. This is used to initialize a V4SF vector with 4 floats | |
3110 | (define_insn "vsx_concat_v2sf" | |
6019c0fc | 3111 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") |
29e6733c | 3112 | (unspec:V2DF |
72e3386e SB |
3113 | [(match_operand:SF 1 "vsx_register_operand" "wa") |
3114 | (match_operand:SF 2 "vsx_register_operand" "wa")] | |
29e6733c MM |
3115 | UNSPEC_VSX_CONCAT))] |
3116 | "VECTOR_MEM_VSX_P (V2DFmode)" | |
de75c876 BS |
3117 | { |
3118 | if (BYTES_BIG_ENDIAN) | |
3119 | return "xxpermdi %x0,%x1,%x2,0"; | |
3120 | else | |
3121 | return "xxpermdi %x0,%x2,%x1,0"; | |
3122 | } | |
29e6733c MM |
3123 | [(set_attr "type" "vecperm")]) |
3124 | ||
9fede15c SB |
3125 | ;; Concatenate 4 SImode elements into a V4SImode reg. |
3126 | (define_expand "vsx_init_v4si" | |
3127 | [(use (match_operand:V4SI 0 "gpc_reg_operand")) | |
3128 | (use (match_operand:SI 1 "gpc_reg_operand")) | |
3129 | (use (match_operand:SI 2 "gpc_reg_operand")) | |
3130 | (use (match_operand:SI 3 "gpc_reg_operand")) | |
3131 | (use (match_operand:SI 4 "gpc_reg_operand"))] | |
6019c0fc | 3132 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" |
6019c0fc | 3133 | { |
f4a3cea3 KL |
3134 | rtx a = gen_lowpart_SUBREG (DImode, operands[1]); |
3135 | rtx b = gen_lowpart_SUBREG (DImode, operands[2]); | |
3136 | rtx c = gen_lowpart_SUBREG (DImode, operands[3]); | |
3137 | rtx d = gen_lowpart_SUBREG (DImode, operands[4]); | |
9fede15c SB |
3138 | if (!BYTES_BIG_ENDIAN) |
3139 | { | |
3140 | std::swap (a, b); | |
3141 | std::swap (c, d); | |
3142 | } | |
3143 | ||
9fede15c | 3144 | rtx ab = gen_reg_rtx (DImode); |
9fede15c | 3145 | rtx cd = gen_reg_rtx (DImode); |
f4a3cea3 KL |
3146 | emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b, |
3147 | GEN_INT (0xffffffff))); | |
3148 | emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d, | |
3149 | GEN_INT (0xffffffff))); | |
9fede15c SB |
3150 | |
3151 | rtx abcd = gen_reg_rtx (V2DImode); | |
3152 | emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); | |
3153 | emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd)); | |
6019c0fc MM |
3154 | DONE; |
3155 | }) | |
3156 | ||
0cf68694 BS |
3157 | ;; xxpermdi for little endian loads and stores. We need several of |
3158 | ;; these since the form of the PARALLEL differs by mode. | |
3159 | (define_insn "*vsx_xxpermdi2_le_<mode>" | |
012f609e | 3160 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") |
6579b156 | 3161 | (vec_select:VSX_D |
012f609e | 3162 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") |
0cf68694 BS |
3163 | (parallel [(const_int 1) (const_int 0)])))] |
3164 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" | |
3165 | "xxpermdi %x0,%x1,%x1,2" | |
3166 | [(set_attr "type" "vecperm")]) | |
3167 | ||
a8cea25c CL |
3168 | (define_insn "xxswapd_v16qi" |
3169 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
3170 | (vec_select:V16QI | |
3171 | (match_operand:V16QI 1 "vsx_register_operand" "wa") | |
3172 | (parallel [(const_int 8) (const_int 9) | |
3173 | (const_int 10) (const_int 11) | |
3174 | (const_int 12) (const_int 13) | |
3175 | (const_int 14) (const_int 15) | |
3176 | (const_int 0) (const_int 1) | |
3177 | (const_int 2) (const_int 3) | |
3178 | (const_int 4) (const_int 5) | |
3179 | (const_int 6) (const_int 7)])))] | |
3180 | "TARGET_VSX" | |
3181 | ;; AIX does not support the extended mnemonic xxswapd. Use the basic | |
3182 | ;; mnemonic xxpermdi instead. | |
0cf68694 BS |
3183 | "xxpermdi %x0,%x1,%x1,2" |
3184 | [(set_attr "type" "vecperm")]) | |
3185 | ||
a8cea25c | 3186 | (define_insn "xxswapd_v8hi" |
0cf68694 | 3187 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") |
a8cea25c CL |
3188 | (vec_select:V8HI |
3189 | (match_operand:V8HI 1 "vsx_register_operand" "wa") | |
3190 | (parallel [(const_int 4) (const_int 5) | |
3191 | (const_int 6) (const_int 7) | |
3192 | (const_int 0) (const_int 1) | |
3193 | (const_int 2) (const_int 3)])))] | |
3194 | "TARGET_VSX" | |
3195 | ;; AIX does not support the extended mnemonic xxswapd. Use the basic | |
3196 | ;; mnemonic xxpermdi instead. | |
0cf68694 BS |
3197 | "xxpermdi %x0,%x1,%x1,2" |
3198 | [(set_attr "type" "vecperm")]) | |
3199 | ||
a8cea25c CL |
3200 | (define_insn "xxswapd_<mode>" |
3201 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") | |
3202 | (vec_select:VSX_W | |
3203 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") | |
3204 | (parallel [(const_int 2) (const_int 3) | |
3205 | (const_int 0) (const_int 1)])))] | |
3206 | "TARGET_VSX" | |
3207 | ;; AIX does not support extended mnemonic xxswapd. Use the basic | |
3208 | ;; mnemonic xxpermdi instead. | |
0cf68694 BS |
3209 | "xxpermdi %x0,%x1,%x1,2" |
3210 | [(set_attr "type" "vecperm")]) | |
3211 | ||
7b88f66d KN |
3212 | (define_insn "xxswapd_<mode>" |
3213 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
3214 | (vec_select:VSX_D | |
3215 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") | |
3216 | (parallel [(const_int 1) (const_int 0)])))] | |
3217 | "TARGET_VSX" | |
3218 | ;; AIX does not support extended mnemonic xxswapd. Use the basic | |
3219 | ;; mnemonic xxpermdi instead. | |
3220 | "xxpermdi %x0,%x1,%x1,2" | |
3221 | [(set_attr "type" "vecperm")]) | |
3222 | ||
f03122f2 CL |
3223 | ;; Swap upper/lower 64-bit values in a 128-bit vector |
3224 | (define_insn "xxswapd_v1ti" | |
3225 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
3226 | (subreg:V1TI | |
3227 | (vec_select:V2DI | |
3228 | (subreg:V2DI | |
3229 | (match_operand:V1TI 1 "vsx_register_operand" "v") 0 ) | |
3230 | (parallel [(const_int 1)(const_int 0)])) | |
3231 | 0))] | |
3232 | "TARGET_POWER10" | |
3233 | ;; AIX does not support extended mnemonic xxswapd. Use the basic | |
3234 | ;; mnemonic xxpermdi instead. | |
3235 | "xxpermdi %x0,%x1,%x1,2" | |
3236 | [(set_attr "type" "vecperm")]) | |
3237 | ||
b8eaa754 CL |
3238 | (define_insn "xxgenpcvm_<mode>_internal" |
3239 | [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa") | |
3240 | (unspec:VSX_EXTRACT_I4 | |
3241 | [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v") | |
3242 | (match_operand:QI 2 "const_0_to_3_operand" "n")] | |
3243 | UNSPEC_XXGENPCV))] | |
a3c13696 | 3244 | "TARGET_POWER10" |
b8eaa754 CL |
3245 | "xxgenpcv<wd>m %x0,%1,%2" |
3246 | [(set_attr "type" "vecsimple")]) | |
3247 | ||
3248 | (define_expand "xxgenpcvm_<mode>" | |
3249 | [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand")) | |
3250 | (use (match_operand:VSX_EXTRACT_I4 1 "register_operand")) | |
3251 | (use (match_operand:QI 2 "immediate_operand"))] | |
5d9d0c94 | 3252 | "TARGET_POWER10" |
b8eaa754 CL |
3253 | { |
3254 | if (!BYTES_BIG_ENDIAN) | |
3255 | { | |
3256 | /* gen_xxgenpcvm assumes Big Endian order. If LE, | |
3257 | change swap upper and lower double words. */ | |
3258 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3259 | ||
3260 | emit_insn (gen_xxswapd_<mode> (tmp, operands[1])); | |
3261 | operands[1] = tmp; | |
3262 | } | |
3263 | emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1], | |
3264 | operands[2])); | |
3265 | DONE; | |
3266 | }) | |
3267 | ||
0cf68694 BS |
3268 | ;; lxvd2x for little endian loads. We need several of |
3269 | ;; these since the form of the PARALLEL differs by mode. | |
3270 | (define_insn "*vsx_lxvd2x2_le_<mode>" | |
012f609e | 3271 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") |
6579b156 BS |
3272 | (vec_select:VSX_D |
3273 | (match_operand:VSX_D 1 "memory_operand" "Z") | |
0cf68694 | 3274 | (parallel [(const_int 1) (const_int 0)])))] |
5d57fdc1 | 3275 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3276 | "lxvd2x %x0,%y1" |
3277 | [(set_attr "type" "vecload")]) | |
3278 | ||
3279 | (define_insn "*vsx_lxvd2x4_le_<mode>" | |
7858932e | 3280 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
0cf68694 BS |
3281 | (vec_select:VSX_W |
3282 | (match_operand:VSX_W 1 "memory_operand" "Z") | |
3283 | (parallel [(const_int 2) (const_int 3) | |
3284 | (const_int 0) (const_int 1)])))] | |
5d57fdc1 | 3285 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3286 | "lxvd2x %x0,%y1" |
3287 | [(set_attr "type" "vecload")]) | |
3288 | ||
3289 | (define_insn "*vsx_lxvd2x8_le_V8HI" | |
3290 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
3291 | (vec_select:V8HI | |
3292 | (match_operand:V8HI 1 "memory_operand" "Z") | |
3293 | (parallel [(const_int 4) (const_int 5) | |
3294 | (const_int 6) (const_int 7) | |
3295 | (const_int 0) (const_int 1) | |
3296 | (const_int 2) (const_int 3)])))] | |
5d57fdc1 | 3297 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3298 | "lxvd2x %x0,%y1" |
3299 | [(set_attr "type" "vecload")]) | |
3300 | ||
3301 | (define_insn "*vsx_lxvd2x16_le_V16QI" | |
3302 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
3303 | (vec_select:V16QI | |
3304 | (match_operand:V16QI 1 "memory_operand" "Z") | |
3305 | (parallel [(const_int 8) (const_int 9) | |
3306 | (const_int 10) (const_int 11) | |
3307 | (const_int 12) (const_int 13) | |
3308 | (const_int 14) (const_int 15) | |
3309 | (const_int 0) (const_int 1) | |
3310 | (const_int 2) (const_int 3) | |
3311 | (const_int 4) (const_int 5) | |
3312 | (const_int 6) (const_int 7)])))] | |
5d57fdc1 | 3313 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3314 | "lxvd2x %x0,%y1" |
3315 | [(set_attr "type" "vecload")]) | |
3316 | ||
3317 | ;; stxvd2x for little endian stores. We need several of | |
3318 | ;; these since the form of the PARALLEL differs by mode. | |
3319 | (define_insn "*vsx_stxvd2x2_le_<mode>" | |
6579b156 BS |
3320 | [(set (match_operand:VSX_D 0 "memory_operand" "=Z") |
3321 | (vec_select:VSX_D | |
012f609e | 3322 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") |
0cf68694 | 3323 | (parallel [(const_int 1) (const_int 0)])))] |
5d57fdc1 | 3324 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3325 | "stxvd2x %x1,%y0" |
3326 | [(set_attr "type" "vecstore")]) | |
3327 | ||
3328 | (define_insn "*vsx_stxvd2x4_le_<mode>" | |
3329 | [(set (match_operand:VSX_W 0 "memory_operand" "=Z") | |
3330 | (vec_select:VSX_W | |
7858932e | 3331 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") |
0cf68694 BS |
3332 | (parallel [(const_int 2) (const_int 3) |
3333 | (const_int 0) (const_int 1)])))] | |
5d57fdc1 | 3334 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3335 | "stxvd2x %x1,%y0" |
3336 | [(set_attr "type" "vecstore")]) | |
3337 | ||
3338 | (define_insn "*vsx_stxvd2x8_le_V8HI" | |
3339 | [(set (match_operand:V8HI 0 "memory_operand" "=Z") | |
3340 | (vec_select:V8HI | |
3341 | (match_operand:V8HI 1 "vsx_register_operand" "wa") | |
3342 | (parallel [(const_int 4) (const_int 5) | |
3343 | (const_int 6) (const_int 7) | |
3344 | (const_int 0) (const_int 1) | |
3345 | (const_int 2) (const_int 3)])))] | |
5d57fdc1 | 3346 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3347 | "stxvd2x %x1,%y0" |
3348 | [(set_attr "type" "vecstore")]) | |
3349 | ||
3350 | (define_insn "*vsx_stxvd2x16_le_V16QI" | |
3351 | [(set (match_operand:V16QI 0 "memory_operand" "=Z") | |
3352 | (vec_select:V16QI | |
3353 | (match_operand:V16QI 1 "vsx_register_operand" "wa") | |
3354 | (parallel [(const_int 8) (const_int 9) | |
3355 | (const_int 10) (const_int 11) | |
3356 | (const_int 12) (const_int 13) | |
3357 | (const_int 14) (const_int 15) | |
3358 | (const_int 0) (const_int 1) | |
3359 | (const_int 2) (const_int 3) | |
3360 | (const_int 4) (const_int 5) | |
3361 | (const_int 6) (const_int 7)])))] | |
5d57fdc1 | 3362 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3363 | "stxvd2x %x1,%y0" |
3364 | [(set_attr "type" "vecstore")]) | |
3365 | ||
a16a872d MM |
3366 | ;; Convert a TImode value into V1TImode |
3367 | (define_expand "vsx_set_v1ti" | |
ad18eed2 SB |
3368 | [(match_operand:V1TI 0 "nonimmediate_operand") |
3369 | (match_operand:V1TI 1 "nonimmediate_operand") | |
3370 | (match_operand:TI 2 "input_operand") | |
3371 | (match_operand:QI 3 "u5bit_cint_operand")] | |
a16a872d MM |
3372 | "VECTOR_MEM_VSX_P (V1TImode)" |
3373 | { | |
3374 | if (operands[3] != const0_rtx) | |
3375 | gcc_unreachable (); | |
3376 | ||
3377 | emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); | |
3378 | DONE; | |
3379 | }) | |
3380 | ||
08c4c51e MM |
3381 | ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT |
3382 | (define_expand "vsx_set_<mode>" | |
3383 | [(use (match_operand:VSX_D 0 "vsx_register_operand")) | |
3384 | (use (match_operand:VSX_D 1 "vsx_register_operand")) | |
3385 | (use (match_operand:<VS_scalar> 2 "gpc_reg_operand")) | |
3386 | (use (match_operand:QI 3 "const_0_to_1_operand"))] | |
29e6733c MM |
3387 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
3388 | { | |
08c4c51e MM |
3389 | rtx dest = operands[0]; |
3390 | rtx vec_reg = operands[1]; | |
3391 | rtx value = operands[2]; | |
3392 | rtx ele = operands[3]; | |
3393 | rtx tmp = gen_reg_rtx (<VS_scalar>mode); | |
3394 | ||
3395 | if (ele == const0_rtx) | |
3396 | { | |
3397 | emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx)); | |
3398 | emit_insn (gen_vsx_concat_<mode> (dest, value, tmp)); | |
3399 | DONE; | |
3400 | } | |
3401 | else if (ele == const1_rtx) | |
3402 | { | |
3403 | emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx)); | |
3404 | emit_insn (gen_vsx_concat_<mode> (dest, tmp, value)); | |
3405 | DONE; | |
3406 | } | |
29e6733c MM |
3407 | else |
3408 | gcc_unreachable (); | |
08c4c51e | 3409 | }) |
29e6733c MM |
3410 | |
3411 | ;; Extract a DF/DI element from V2DF/V2DI | |
117f16fb MM |
3412 | ;; Optimize cases were we can do a simple or direct move. |
3413 | ;; Or see if we can avoid doing the move at all | |
1a3c3ee9 MM |
3414 | |
3415 | ;; There are some unresolved problems with reload that show up if an Altivec | |
3416 | ;; register was picked. Limit the scalar value to FPRs for now. | |
3417 | ||
3418 | (define_insn "vsx_extract_<mode>" | |
012f609e | 3419 | [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr") |
117f16fb | 3420 | (vec_select:<VS_scalar> |
012f609e | 3421 | (match_operand:VSX_D 1 "gpc_reg_operand" "wa, wa, wa, wa") |
117f16fb | 3422 | (parallel |
012f609e | 3423 | [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))] |
1a3c3ee9 | 3424 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
117f16fb | 3425 | { |
1a3c3ee9 | 3426 | int element = INTVAL (operands[2]); |
117f16fb MM |
3427 | int op0_regno = REGNO (operands[0]); |
3428 | int op1_regno = REGNO (operands[1]); | |
1a3c3ee9 | 3429 | int fldDM; |
117f16fb | 3430 | |
1a3c3ee9 MM |
3431 | gcc_assert (IN_RANGE (element, 0, 1)); |
3432 | gcc_assert (VSX_REGNO_P (op1_regno)); | |
117f16fb | 3433 | |
1a3c3ee9 MM |
3434 | if (element == VECTOR_ELEMENT_SCALAR_64BIT) |
3435 | { | |
3436 | if (op0_regno == op1_regno) | |
3437 | return ASM_COMMENT_START " vec_extract to same register"; | |
117f16fb | 3438 | |
1a3c3ee9 MM |
3439 | else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE |
3440 | && TARGET_POWERPC64) | |
3441 | return "mfvsrd %0,%x1"; | |
117f16fb | 3442 | |
1a3c3ee9 MM |
3443 | else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) |
3444 | return "fmr %0,%1"; | |
117f16fb | 3445 | |
1a3c3ee9 MM |
3446 | else if (VSX_REGNO_P (op0_regno)) |
3447 | return "xxlor %x0,%x1,%x1"; | |
117f16fb | 3448 | |
1a3c3ee9 MM |
3449 | else |
3450 | gcc_unreachable (); | |
3451 | } | |
117f16fb | 3452 | |
1a3c3ee9 MM |
3453 | else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno) |
3454 | && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE) | |
98060bbe | 3455 | return "mfvsrld %0,%x1"; |
117f16fb | 3456 | |
1a3c3ee9 MM |
3457 | else if (VSX_REGNO_P (op0_regno)) |
3458 | { | |
3459 | fldDM = element << 1; | |
3460 | if (!BYTES_BIG_ENDIAN) | |
3461 | fldDM = 3 - fldDM; | |
3462 | operands[3] = GEN_INT (fldDM); | |
3463 | return "xxpermdi %x0,%x1,%x1,%3"; | |
117f16fb MM |
3464 | } |
3465 | ||
1a3c3ee9 MM |
3466 | else |
3467 | gcc_unreachable (); | |
29e6733c | 3468 | } |
863e8d53 | 3469 | [(set_attr "type" "veclogical,mfvsr,mfvsr,vecperm") |
66b54d88 | 3470 | (set_attr "isa" "*,*,p8v,p9v")]) |
29e6733c | 3471 | |
d0047a25 MM |
3472 | ;; Optimize extracting a single scalar element from memory. |
3473 | (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load" | |
11d7bd36 | 3474 | [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr") |
d0047a25 MM |
3475 | (vec_select:<VSX_D:VS_scalar> |
3476 | (match_operand:VSX_D 1 "memory_operand" "m,m") | |
3477 | (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")]))) | |
3478 | (clobber (match_scratch:P 3 "=&b,&b"))] | |
dc355223 | 3479 | "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)" |
d0047a25 MM |
3480 | "#" |
3481 | "&& reload_completed" | |
3482 | [(set (match_dup 0) (match_dup 4))] | |
3483 | { | |
3484 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
3485 | operands[3], <VSX_D:VS_scalar>mode); | |
3486 | } | |
3487 | [(set_attr "type" "fpload,load") | |
3488 | (set_attr "length" "8")]) | |
117f16fb MM |
3489 | |
3490 | ;; Optimize storing a single scalar element that is the right location to | |
3491 | ;; memory | |
3492 | (define_insn "*vsx_extract_<mode>_store" | |
b5aa1281 | 3493 | [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY") |
27b097f8 | 3494 | (vec_select:<VS_scalar> |
208a0405 | 3495 | (match_operand:VSX_D 1 "register_operand" "d,v,v") |
117f16fb MM |
3496 | (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] |
3497 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
3498 | "@ | |
3499 | stfd%U0%X0 %1,%0 | |
158985b1 | 3500 | stxsdx %x1,%y0 |
d0047a25 | 3501 | stxsd %1,%0" |
d17fbef8 | 3502 | [(set_attr "type" "fpstore") |
208a0405 | 3503 | (set_attr "isa" "*,p7v,p9v")]) |
27b097f8 | 3504 | |
e0d32185 MM |
3505 | ;; Variable V2DI/V2DF extract shift |
3506 | (define_insn "vsx_vslo_<mode>" | |
3507 | [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") | |
3508 | (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") | |
3509 | (match_operand:V2DI 2 "gpc_reg_operand" "v")] | |
3510 | UNSPEC_VSX_VSLO))] | |
3511 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
3512 | "vslo %0,%1,%2" | |
3513 | [(set_attr "type" "vecperm")]) | |
3514 | ||
75c299ac | 3515 | ;; Variable V2DI/V2DF extract from a register |
e0d32185 | 3516 | (define_insn_and_split "vsx_extract_<mode>_var" |
75c299ac MM |
3517 | [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") |
3518 | (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") | |
3519 | (match_operand:DI 2 "gpc_reg_operand" "r")] | |
e0d32185 | 3520 | UNSPEC_VSX_EXTRACT)) |
75c299ac MM |
3521 | (clobber (match_scratch:DI 3 "=r")) |
3522 | (clobber (match_scratch:V2DI 4 "=&v"))] | |
e0d32185 MM |
3523 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" |
3524 | "#" | |
3525 | "&& reload_completed" | |
3526 | [(const_int 0)] | |
3527 | { | |
3528 | rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], | |
3529 | operands[3], operands[4]); | |
3530 | DONE; | |
3531 | }) | |
3532 | ||
75c299ac MM |
3533 | ;; Variable V2DI/V2DF extract from memory |
3534 | (define_insn_and_split "*vsx_extract_<mode>_var_load" | |
3535 | [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r") | |
3536 | (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q") | |
3537 | (match_operand:DI 2 "gpc_reg_operand" "r,r")] | |
3538 | UNSPEC_VSX_EXTRACT)) | |
3539 | (clobber (match_scratch:DI 3 "=&b,&b"))] | |
3540 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
3541 | "#" | |
3542 | "&& reload_completed" | |
3543 | [(set (match_dup 0) (match_dup 4))] | |
3544 | { | |
3545 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
3546 | operands[3], <VS_scalar>mode); | |
3547 | } | |
3548 | [(set_attr "type" "fpload,load")]) | |
3549 | ||
df10b6d4 MM |
3550 | ;; Extract a SF element from V4SF |
3551 | (define_insn_and_split "vsx_extract_v4sf" | |
72e3386e | 3552 | [(set (match_operand:SF 0 "vsx_register_operand" "=wa") |
df10b6d4 | 3553 | (vec_select:SF |
e2a99194 MM |
3554 | (match_operand:V4SF 1 "vsx_register_operand" "wa") |
3555 | (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")]))) | |
3556 | (clobber (match_scratch:V4SF 3 "=0"))] | |
df10b6d4 | 3557 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
e2a99194 MM |
3558 | "#" |
3559 | "&& 1" | |
df10b6d4 | 3560 | [(const_int 0)] |
df10b6d4 MM |
3561 | { |
3562 | rtx op0 = operands[0]; | |
3563 | rtx op1 = operands[1]; | |
3564 | rtx op2 = operands[2]; | |
3565 | rtx op3 = operands[3]; | |
3566 | rtx tmp; | |
27b097f8 | 3567 | HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); |
df10b6d4 MM |
3568 | |
3569 | if (ele == 0) | |
3570 | tmp = op1; | |
3571 | else | |
3572 | { | |
3573 | if (GET_CODE (op3) == SCRATCH) | |
3574 | op3 = gen_reg_rtx (V4SFmode); | |
6a742466 | 3575 | emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele))); |
df10b6d4 MM |
3576 | tmp = op3; |
3577 | } | |
3578 | emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); | |
3579 | DONE; | |
e2a99194 MM |
3580 | } |
3581 | [(set_attr "length" "8") | |
df10b6d4 MM |
3582 | (set_attr "type" "fp")]) |
3583 | ||
e2a99194 | 3584 | (define_insn_and_split "*vsx_extract_v4sf_<mode>_load" |
208a0405 | 3585 | [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r") |
e2a99194 MM |
3586 | (vec_select:SF |
3587 | (match_operand:V4SF 1 "memory_operand" "m,Z,m,m") | |
3588 | (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) | |
3589 | (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))] | |
3590 | "VECTOR_MEM_VSX_P (V4SFmode)" | |
3591 | "#" | |
3592 | "&& reload_completed" | |
3593 | [(set (match_dup 0) (match_dup 4))] | |
3594 | { | |
3595 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
3596 | operands[3], SFmode); | |
3597 | } | |
3598 | [(set_attr "type" "fpload,fpload,fpload,load") | |
d17fbef8 | 3599 | (set_attr "length" "8") |
208a0405 | 3600 | (set_attr "isa" "*,p7v,p9v,*")]) |
e2a99194 | 3601 | |
75c299ac | 3602 | ;; Variable V4SF extract from a register |
e2a99194 | 3603 | (define_insn_and_split "vsx_extract_v4sf_var" |
75c299ac MM |
3604 | [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") |
3605 | (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v") | |
3606 | (match_operand:DI 2 "gpc_reg_operand" "r")] | |
e2a99194 | 3607 | UNSPEC_VSX_EXTRACT)) |
75c299ac MM |
3608 | (clobber (match_scratch:DI 3 "=r")) |
3609 | (clobber (match_scratch:V2DI 4 "=&v"))] | |
19970253 | 3610 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" |
e2a99194 MM |
3611 | "#" |
3612 | "&& reload_completed" | |
3613 | [(const_int 0)] | |
3614 | { | |
3615 | rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], | |
3616 | operands[3], operands[4]); | |
3617 | DONE; | |
3618 | }) | |
3619 | ||
75c299ac MM |
3620 | ;; Variable V4SF extract from memory |
3621 | (define_insn_and_split "*vsx_extract_v4sf_var_load" | |
3622 | [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r") | |
3623 | (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q") | |
3624 | (match_operand:DI 2 "gpc_reg_operand" "r,r")] | |
3625 | UNSPEC_VSX_EXTRACT)) | |
3626 | (clobber (match_scratch:DI 3 "=&b,&b"))] | |
3627 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" | |
3628 | "#" | |
3629 | "&& reload_completed" | |
3630 | [(set (match_dup 0) (match_dup 4))] | |
3631 | { | |
3632 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
3633 | operands[3], SFmode); | |
3634 | } | |
3635 | [(set_attr "type" "fpload,load")]) | |
3636 | ||
5aebfdad RH |
3637 | ;; Expand the builtin form of xxpermdi to canonical rtl. |
3638 | (define_expand "vsx_xxpermdi_<mode>" | |
a530e181 BS |
3639 | [(match_operand:VSX_L 0 "vsx_register_operand") |
3640 | (match_operand:VSX_L 1 "vsx_register_operand") | |
3641 | (match_operand:VSX_L 2 "vsx_register_operand") | |
3642 | (match_operand:QI 3 "u5bit_cint_operand")] | |
3643 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
3644 | { | |
3645 | rtx target = operands[0]; | |
3646 | rtx op0 = operands[1]; | |
3647 | rtx op1 = operands[2]; | |
3648 | int mask = INTVAL (operands[3]); | |
3649 | rtx perm0 = GEN_INT ((mask >> 1) & 1); | |
3650 | rtx perm1 = GEN_INT ((mask & 1) + 2); | |
3651 | rtx (*gen) (rtx, rtx, rtx, rtx, rtx); | |
3652 | ||
3653 | if (<MODE>mode == V2DFmode) | |
3654 | gen = gen_vsx_xxpermdi2_v2df_1; | |
3655 | else | |
3656 | { | |
3657 | gen = gen_vsx_xxpermdi2_v2di_1; | |
3658 | if (<MODE>mode != V2DImode) | |
3659 | { | |
3660 | target = gen_lowpart (V2DImode, target); | |
3661 | op0 = gen_lowpart (V2DImode, op0); | |
3662 | op1 = gen_lowpart (V2DImode, op1); | |
3663 | } | |
3664 | } | |
3665 | emit_insn (gen (target, op0, op1, perm0, perm1)); | |
3666 | DONE; | |
3667 | }) | |
3668 | ||
3669 | ;; Special version of xxpermdi that retains big-endian semantics. | |
3670 | (define_expand "vsx_xxpermdi_<mode>_be" | |
3671 | [(match_operand:VSX_L 0 "vsx_register_operand") | |
3672 | (match_operand:VSX_L 1 "vsx_register_operand") | |
3673 | (match_operand:VSX_L 2 "vsx_register_operand") | |
3674 | (match_operand:QI 3 "u5bit_cint_operand")] | |
29e6733c | 3675 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
5aebfdad RH |
3676 | { |
3677 | rtx target = operands[0]; | |
3678 | rtx op0 = operands[1]; | |
3679 | rtx op1 = operands[2]; | |
3680 | int mask = INTVAL (operands[3]); | |
3681 | rtx perm0 = GEN_INT ((mask >> 1) & 1); | |
3682 | rtx perm1 = GEN_INT ((mask & 1) + 2); | |
3683 | rtx (*gen) (rtx, rtx, rtx, rtx, rtx); | |
3684 | ||
3685 | if (<MODE>mode == V2DFmode) | |
3686 | gen = gen_vsx_xxpermdi2_v2df_1; | |
3687 | else | |
3688 | { | |
3689 | gen = gen_vsx_xxpermdi2_v2di_1; | |
3690 | if (<MODE>mode != V2DImode) | |
3691 | { | |
3692 | target = gen_lowpart (V2DImode, target); | |
c6d5ff83 MM |
3693 | op0 = gen_lowpart (V2DImode, op0); |
3694 | op1 = gen_lowpart (V2DImode, op1); | |
5aebfdad RH |
3695 | } |
3696 | } | |
54c4bfd7 BS |
3697 | /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a |
3698 | transformation we don't want; it is necessary for | |
3699 | rs6000_expand_vec_perm_const_1 but not for this use. So we | |
3700 | prepare for that by reversing the transformation here. */ | |
3701 | if (BYTES_BIG_ENDIAN) | |
3702 | emit_insn (gen (target, op0, op1, perm0, perm1)); | |
3703 | else | |
3704 | { | |
3705 | rtx p0 = GEN_INT (3 - INTVAL (perm1)); | |
3706 | rtx p1 = GEN_INT (3 - INTVAL (perm0)); | |
3707 | emit_insn (gen (target, op1, op0, p0, p1)); | |
3708 | } | |
5aebfdad RH |
3709 | DONE; |
3710 | }) | |
29e6733c | 3711 | |
5aebfdad | 3712 | (define_insn "vsx_xxpermdi2_<mode>_1" |
85949949 | 3713 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") |
5aebfdad RH |
3714 | (vec_select:VSX_D |
3715 | (vec_concat:<VS_double> | |
85949949 SB |
3716 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") |
3717 | (match_operand:VSX_D 2 "vsx_register_operand" "wa")) | |
5aebfdad RH |
3718 | (parallel [(match_operand 3 "const_0_to_1_operand" "") |
3719 | (match_operand 4 "const_2_to_3_operand" "")])))] | |
29e6733c MM |
3720 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
3721 | { | |
8adcc78b BS |
3722 | int op3, op4, mask; |
3723 | ||
3724 | /* For little endian, swap operands and invert/swap selectors | |
3725 | to get the correct xxpermdi. The operand swap sets up the | |
3726 | inputs as a little endian array. The selectors are swapped | |
3727 | because they are defined to use big endian ordering. The | |
3728 | selectors are inverted to get the correct doublewords for | |
3729 | little endian ordering. */ | |
3730 | if (BYTES_BIG_ENDIAN) | |
3731 | { | |
3732 | op3 = INTVAL (operands[3]); | |
3733 | op4 = INTVAL (operands[4]); | |
3734 | } | |
3735 | else | |
3736 | { | |
3737 | op3 = 3 - INTVAL (operands[4]); | |
3738 | op4 = 3 - INTVAL (operands[3]); | |
3739 | } | |
3740 | ||
3741 | mask = (op3 << 1) | (op4 - 2); | |
5aebfdad | 3742 | operands[3] = GEN_INT (mask); |
8adcc78b BS |
3743 | |
3744 | if (BYTES_BIG_ENDIAN) | |
3745 | return "xxpermdi %x0,%x1,%x2,%3"; | |
3746 | else | |
3747 | return "xxpermdi %x0,%x2,%x1,%3"; | |
29e6733c MM |
3748 | } |
3749 | [(set_attr "type" "vecperm")]) | |
3750 | ||
bcb9a772 MM |
3751 | ;; Extraction of a single element in a small integer vector. Until ISA 3.0, |
3752 | ;; none of the small types were allowed in a vector register, so we had to | |
3753 | ;; extract to a DImode and either do a direct move or store. | |
e2a99194 | 3754 | (define_expand "vsx_extract_<mode>" |
456f0dfa | 3755 | [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand") |
e2a99194 MM |
3756 | (vec_select:<VS_scalar> |
3757 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") | |
3758 | (parallel [(match_operand:QI 2 "const_int_operand")]))) | |
456f0dfa | 3759 | (clobber (match_scratch:VSX_EXTRACT_I 3))])] |
e2a99194 MM |
3760 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" |
3761 | { | |
456f0dfa | 3762 | /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ |
9bfda664 | 3763 | if (TARGET_P9_VECTOR) |
c5e74d9d | 3764 | { |
456f0dfa MM |
3765 | emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], |
3766 | operands[2])); | |
3767 | DONE; | |
c5e74d9d | 3768 | } |
456f0dfa | 3769 | }) |
c5e74d9d | 3770 | |
456f0dfa | 3771 | (define_insn "vsx_extract_<mode>_p9" |
16370e79 | 3772 | [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>") |
456f0dfa | 3773 | (vec_select:<VS_scalar> |
791e7779 | 3774 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>") |
16370e79 MM |
3775 | (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) |
3776 | (clobber (match_scratch:SI 3 "=r,X"))] | |
9bfda664 | 3777 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" |
c5e74d9d | 3778 | { |
16370e79 MM |
3779 | if (which_alternative == 0) |
3780 | return "#"; | |
c5e74d9d | 3781 | |
c5e74d9d | 3782 | else |
16370e79 MM |
3783 | { |
3784 | HOST_WIDE_INT elt = INTVAL (operands[2]); | |
427a7384 | 3785 | HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN |
16370e79 MM |
3786 | ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt |
3787 | : elt); | |
3788 | ||
3789 | HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); | |
3790 | HOST_WIDE_INT offset = unit_size * elt_adj; | |
3791 | ||
3792 | operands[2] = GEN_INT (offset); | |
3793 | if (unit_size == 4) | |
3794 | return "xxextractuw %x0,%x1,%2"; | |
3795 | else | |
3796 | return "vextractu<wd> %0,%1,%2"; | |
3797 | } | |
c5e74d9d | 3798 | } |
0c8ac746 SB |
3799 | [(set_attr "type" "vecsimple") |
3800 | (set_attr "isa" "p9v,*")]) | |
c5e74d9d | 3801 | |
16370e79 MM |
3802 | (define_split |
3803 | [(set (match_operand:<VS_scalar> 0 "int_reg_operand") | |
3804 | (vec_select:<VS_scalar> | |
3805 | (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") | |
3806 | (parallel [(match_operand:QI 2 "const_int_operand")]))) | |
3807 | (clobber (match_operand:SI 3 "int_reg_operand"))] | |
9bfda664 | 3808 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed" |
16370e79 MM |
3809 | [(const_int 0)] |
3810 | { | |
3811 | rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); | |
3812 | rtx op1 = operands[1]; | |
3813 | rtx op2 = operands[2]; | |
3814 | rtx op3 = operands[3]; | |
3815 | HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode); | |
3816 | ||
3817 | emit_move_insn (op3, GEN_INT (offset)); | |
427a7384 | 3818 | if (BYTES_BIG_ENDIAN) |
16370e79 MM |
3819 | emit_insn (gen_vextu<wd>lx (op0_si, op3, op1)); |
3820 | else | |
3821 | emit_insn (gen_vextu<wd>rx (op0_si, op3, op1)); | |
3822 | DONE; | |
3823 | }) | |
3824 | ||
456f0dfa MM |
3825 | ;; Optimize zero extracts to eliminate the AND after the extract. |
3826 | (define_insn_and_split "*vsx_extract_<mode>_di_p9" | |
16370e79 | 3827 | [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>") |
456f0dfa MM |
3828 | (zero_extend:DI |
3829 | (vec_select:<VS_scalar> | |
791e7779 | 3830 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>") |
16370e79 MM |
3831 | (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) |
3832 | (clobber (match_scratch:SI 3 "=r,X"))] | |
9bfda664 | 3833 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" |
456f0dfa MM |
3834 | "#" |
3835 | "&& reload_completed" | |
16370e79 MM |
3836 | [(parallel [(set (match_dup 4) |
3837 | (vec_select:<VS_scalar> | |
3838 | (match_dup 1) | |
3839 | (parallel [(match_dup 2)]))) | |
3840 | (clobber (match_dup 3))])] | |
456f0dfa | 3841 | { |
16370e79 | 3842 | operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); |
0c8ac746 SB |
3843 | } |
3844 | [(set_attr "isa" "p9v,*")]) | |
456f0dfa MM |
3845 | |
3846 | ;; Optimize stores to use the ISA 3.0 scalar store instructions | |
3847 | (define_insn_and_split "*vsx_extract_<mode>_store_p9" | |
16370e79 | 3848 | [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m") |
456f0dfa | 3849 | (vec_select:<VS_scalar> |
60fb638f | 3850 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v") |
16370e79 | 3851 | (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) |
1c689b82 | 3852 | (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&*r")) |
16370e79 | 3853 | (clobber (match_scratch:SI 4 "=X,&r"))] |
9bfda664 | 3854 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" |
456f0dfa MM |
3855 | "#" |
3856 | "&& reload_completed" | |
16370e79 MM |
3857 | [(parallel [(set (match_dup 3) |
3858 | (vec_select:<VS_scalar> | |
3859 | (match_dup 1) | |
3860 | (parallel [(match_dup 2)]))) | |
3861 | (clobber (match_dup 4))]) | |
456f0dfa MM |
3862 | (set (match_dup 0) |
3863 | (match_dup 3))]) | |
3864 | ||
787c7a65 | 3865 | (define_insn_and_split "*vsx_extract_si" |
791e7779 | 3866 | [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z") |
787c7a65 | 3867 | (vec_select:SI |
0c8ac746 | 3868 | (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v") |
156b5cca | 3869 | (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) |
0c8ac746 | 3870 | (clobber (match_scratch:V4SI 3 "=v,v,v"))] |
9bfda664 | 3871 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR" |
787c7a65 MM |
3872 | "#" |
3873 | "&& reload_completed" | |
3874 | [(const_int 0)] | |
3875 | { | |
3876 | rtx dest = operands[0]; | |
3877 | rtx src = operands[1]; | |
3878 | rtx element = operands[2]; | |
3879 | rtx vec_tmp = operands[3]; | |
3880 | int value; | |
3881 | ||
427a7384 | 3882 | if (!BYTES_BIG_ENDIAN) |
787c7a65 MM |
3883 | element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); |
3884 | ||
3885 | /* If the value is in the correct position, we can avoid doing the VSPLT<x> | |
3886 | instruction. */ | |
3887 | value = INTVAL (element); | |
3888 | if (value != 1) | |
9bfda664 | 3889 | emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); |
787c7a65 MM |
3890 | else |
3891 | vec_tmp = src; | |
3892 | ||
3893 | if (MEM_P (operands[0])) | |
3894 | { | |
3895 | if (can_create_pseudo_p ()) | |
3af0c6bc | 3896 | dest = rs6000_force_indexed_or_indirect_mem (dest); |
787c7a65 | 3897 | |
9bfda664 | 3898 | if (TARGET_P8_VECTOR) |
787c7a65 MM |
3899 | emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); |
3900 | else | |
3901 | emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp)))); | |
3902 | } | |
3903 | ||
9bfda664 | 3904 | else if (TARGET_P8_VECTOR) |
787c7a65 MM |
3905 | emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); |
3906 | else | |
3907 | emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), | |
3908 | gen_rtx_REG (DImode, REGNO (vec_tmp))); | |
3909 | ||
3910 | DONE; | |
3911 | } | |
863e8d53 | 3912 | [(set_attr "type" "mfvsr,vecperm,fpstore") |
791e7779 SB |
3913 | (set_attr "length" "8") |
3914 | (set_attr "isa" "*,p8v,*")]) | |
787c7a65 | 3915 | |
e2a99194 MM |
3916 | (define_insn_and_split "*vsx_extract_<mode>_p8" |
3917 | [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r") | |
3918 | (vec_select:<VS_scalar> | |
787c7a65 | 3919 | (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v") |
e2a99194 | 3920 | (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) |
787c7a65 | 3921 | (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] |
456f0dfa | 3922 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT |
9bfda664 | 3923 | && !TARGET_P9_VECTOR" |
e2a99194 MM |
3924 | "#" |
3925 | "&& reload_completed" | |
3926 | [(const_int 0)] | |
3927 | { | |
3928 | rtx dest = operands[0]; | |
3929 | rtx src = operands[1]; | |
3930 | rtx element = operands[2]; | |
3931 | rtx vec_tmp = operands[3]; | |
3932 | int value; | |
3933 | ||
427a7384 | 3934 | if (!BYTES_BIG_ENDIAN) |
e2a99194 MM |
3935 | element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element)); |
3936 | ||
3937 | /* If the value is in the correct position, we can avoid doing the VSPLT<x> | |
3938 | instruction. */ | |
3939 | value = INTVAL (element); | |
3940 | if (<MODE>mode == V16QImode) | |
3941 | { | |
3942 | if (value != 7) | |
3943 | emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element)); | |
3944 | else | |
3945 | vec_tmp = src; | |
3946 | } | |
3947 | else if (<MODE>mode == V8HImode) | |
3948 | { | |
3949 | if (value != 3) | |
3950 | emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element)); | |
3951 | else | |
3952 | vec_tmp = src; | |
3953 | } | |
e2a99194 MM |
3954 | else |
3955 | gcc_unreachable (); | |
3956 | ||
3957 | emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), | |
3958 | gen_rtx_REG (DImode, REGNO (vec_tmp))); | |
3959 | DONE; | |
3960 | } | |
863e8d53 | 3961 | [(set_attr "type" "mfvsr")]) |
e2a99194 MM |
3962 | |
3963 | ;; Optimize extracting a single scalar element from memory. | |
3964 | (define_insn_and_split "*vsx_extract_<mode>_load" | |
3965 | [(set (match_operand:<VS_scalar> 0 "register_operand" "=r") | |
3966 | (vec_select:<VS_scalar> | |
3967 | (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m") | |
3968 | (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) | |
3969 | (clobber (match_scratch:DI 3 "=&b"))] | |
3970 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
3971 | "#" | |
3972 | "&& reload_completed" | |
3973 | [(set (match_dup 0) (match_dup 4))] | |
3974 | { | |
3975 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
3976 | operands[3], <VS_scalar>mode); | |
3977 | } | |
3978 | [(set_attr "type" "load") | |
3979 | (set_attr "length" "8")]) | |
3980 | ||
75c299ac | 3981 | ;; Variable V16QI/V8HI/V4SI extract from a register |
e2a99194 | 3982 | (define_insn_and_split "vsx_extract_<mode>_var" |
75c299ac | 3983 | [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r") |
e2a99194 | 3984 | (unspec:<VS_scalar> |
75c299ac MM |
3985 | [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v") |
3986 | (match_operand:DI 2 "gpc_reg_operand" "r,r")] | |
e2a99194 | 3987 | UNSPEC_VSX_EXTRACT)) |
75c299ac MM |
3988 | (clobber (match_scratch:DI 3 "=r,r")) |
3989 | (clobber (match_scratch:V2DI 4 "=X,&v"))] | |
e2a99194 MM |
3990 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" |
3991 | "#" | |
3992 | "&& reload_completed" | |
3993 | [(const_int 0)] | |
3994 | { | |
3995 | rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], | |
3996 | operands[3], operands[4]); | |
3997 | DONE; | |
0c8ac746 | 3998 | } |
75c299ac MM |
3999 | [(set_attr "isa" "p9v,*")]) |
4000 | ||
4001 | ;; Variable V16QI/V8HI/V4SI extract from memory | |
4002 | (define_insn_and_split "*vsx_extract_<mode>_var_load" | |
4003 | [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r") | |
4004 | (unspec:<VS_scalar> | |
4005 | [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q") | |
4006 | (match_operand:DI 2 "gpc_reg_operand" "r")] | |
4007 | UNSPEC_VSX_EXTRACT)) | |
4008 | (clobber (match_scratch:DI 3 "=&b"))] | |
4009 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
4010 | "#" | |
4011 | "&& reload_completed" | |
4012 | [(set (match_dup 0) (match_dup 4))] | |
4013 | { | |
4014 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
4015 | operands[3], <VS_scalar>mode); | |
4016 | } | |
4017 | [(set_attr "type" "load")]) | |
c5e74d9d | 4018 | |
30d02149 CL |
4019 | ;; ISA 3.1 extract |
4020 | (define_expand "vextractl<mode>" | |
4021 | [(set (match_operand:V2DI 0 "altivec_register_operand") | |
4022 | (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand") | |
4023 | (match_operand:VI2 2 "altivec_register_operand") | |
4024 | (match_operand:SI 3 "register_operand")] | |
4025 | UNSPEC_EXTRACTL))] | |
4026 | "TARGET_POWER10" | |
4027 | { | |
4028 | if (BYTES_BIG_ENDIAN) | |
4029 | { | |
4030 | emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1], | |
4031 | operands[2], operands[3])); | |
4032 | emit_insn (gen_xxswapd_v2di (operands[0], operands[0])); | |
4033 | } | |
4034 | else | |
4035 | emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2], | |
4036 | operands[1], operands[3])); | |
4037 | DONE; | |
4038 | }) | |
4039 | ||
4040 | (define_insn "vextractl<mode>_internal" | |
4041 | [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") | |
4042 | (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v") | |
4043 | (match_operand:VEC_I 2 "altivec_register_operand" "v") | |
4044 | (match_operand:SI 3 "register_operand" "r")] | |
4045 | UNSPEC_EXTRACTL))] | |
4046 | "TARGET_POWER10" | |
4047 | "vext<du_or_d><wd>vlx %0,%1,%2,%3" | |
4048 | [(set_attr "type" "vecsimple")]) | |
4049 | ||
4050 | (define_expand "vextractr<mode>" | |
4051 | [(set (match_operand:V2DI 0 "altivec_register_operand") | |
4052 | (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand") | |
4053 | (match_operand:VI2 2 "altivec_register_operand") | |
4054 | (match_operand:SI 3 "register_operand")] | |
4055 | UNSPEC_EXTRACTR))] | |
4056 | "TARGET_POWER10" | |
4057 | { | |
4058 | if (BYTES_BIG_ENDIAN) | |
4059 | { | |
4060 | emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1], | |
4061 | operands[2], operands[3])); | |
4062 | emit_insn (gen_xxswapd_v2di (operands[0], operands[0])); | |
4063 | } | |
4064 | else | |
4065 | emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2], | |
4066 | operands[1], operands[3])); | |
4067 | DONE; | |
4068 | }) | |
4069 | ||
4070 | (define_insn "vextractr<mode>_internal" | |
4071 | [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") | |
4072 | (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v") | |
4073 | (match_operand:VEC_I 2 "altivec_register_operand" "v") | |
4074 | (match_operand:SI 3 "register_operand" "r")] | |
4075 | UNSPEC_EXTRACTR))] | |
4076 | "TARGET_POWER10" | |
4077 | "vext<du_or_d><wd>vrx %0,%1,%2,%3" | |
4078 | [(set_attr "type" "vecsimple")]) | |
4079 | ||
530e9095 CL |
4080 | (define_expand "vinsertvl_<mode>" |
4081 | [(set (match_operand:VI2 0 "altivec_register_operand") | |
4082 | (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand") | |
4083 | (match_operand:VI2 2 "altivec_register_operand") | |
4084 | (match_operand:SI 3 "register_operand" "r")] | |
4085 | UNSPEC_INSERTL))] | |
4086 | "TARGET_POWER10" | |
4087 | { | |
4088 | if (BYTES_BIG_ENDIAN) | |
4089 | emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3], | |
4090 | operands[1], operands[2])); | |
4091 | else | |
4092 | emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3], | |
4093 | operands[1], operands[2])); | |
4094 | DONE; | |
4095 | }) | |
4096 | ||
4097 | (define_insn "vinsertvl_internal_<mode>" | |
4098 | [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v") | |
4099 | (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r") | |
4100 | (match_operand:VEC_I 2 "altivec_register_operand" "v") | |
4101 | (match_operand:VEC_I 3 "altivec_register_operand" "0")] | |
4102 | UNSPEC_INSERTL))] | |
4103 | "TARGET_POWER10" | |
4104 | "vins<wd>vlx %0,%1,%2" | |
4105 | [(set_attr "type" "vecsimple")]) | |
4106 | ||
4107 | (define_expand "vinsertvr_<mode>" | |
4108 | [(set (match_operand:VI2 0 "altivec_register_operand") | |
4109 | (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand") | |
4110 | (match_operand:VI2 2 "altivec_register_operand") | |
4111 | (match_operand:SI 3 "register_operand" "r")] | |
4112 | UNSPEC_INSERTR))] | |
4113 | "TARGET_POWER10" | |
4114 | { | |
4115 | if (BYTES_BIG_ENDIAN) | |
4116 | emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3], | |
4117 | operands[1], operands[2])); | |
4118 | else | |
4119 | emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3], | |
4120 | operands[1], operands[2])); | |
4121 | DONE; | |
4122 | }) | |
4123 | ||
4124 | (define_insn "vinsertvr_internal_<mode>" | |
4125 | [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v") | |
4126 | (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r") | |
4127 | (match_operand:VEC_I 2 "altivec_register_operand" "v") | |
4128 | (match_operand:VEC_I 3 "altivec_register_operand" "0")] | |
4129 | UNSPEC_INSERTR))] | |
4130 | "TARGET_POWER10" | |
4131 | "vins<wd>vrx %0,%1,%2" | |
4132 | [(set_attr "type" "vecsimple")]) | |
4133 | ||
4134 | (define_expand "vinsertgl_<mode>" | |
4135 | [(set (match_operand:VI2 0 "altivec_register_operand") | |
4136 | (unspec:VI2 [(match_operand:SI 1 "register_operand") | |
4137 | (match_operand:VI2 2 "altivec_register_operand") | |
4138 | (match_operand:SI 3 "register_operand")] | |
4139 | UNSPEC_INSERTL))] | |
4140 | "TARGET_POWER10" | |
4141 | { | |
4142 | if (BYTES_BIG_ENDIAN) | |
4143 | emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3], | |
4144 | operands[1], operands[2])); | |
4145 | else | |
4146 | emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3], | |
4147 | operands[1], operands[2])); | |
4148 | DONE; | |
4149 | }) | |
4150 | ||
4151 | (define_insn "vinsertgl_internal_<mode>" | |
4152 | [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v") | |
4153 | (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r") | |
4154 | (match_operand:SI 2 "register_operand" "r") | |
4155 | (match_operand:VEC_I 3 "altivec_register_operand" "0")] | |
4156 | UNSPEC_INSERTL))] | |
4157 | "TARGET_POWER10" | |
4158 | "vins<wd>lx %0,%1,%2" | |
4159 | [(set_attr "type" "vecsimple")]) | |
4160 | ||
4161 | (define_expand "vinsertgr_<mode>" | |
4162 | [(set (match_operand:VI2 0 "altivec_register_operand") | |
4163 | (unspec:VI2 [(match_operand:SI 1 "register_operand") | |
4164 | (match_operand:VI2 2 "altivec_register_operand") | |
4165 | (match_operand:SI 3 "register_operand")] | |
4166 | UNSPEC_INSERTR))] | |
4167 | "TARGET_POWER10" | |
4168 | { | |
4169 | if (BYTES_BIG_ENDIAN) | |
4170 | emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3], | |
4171 | operands[1], operands[2])); | |
4172 | else | |
4173 | emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3], | |
4174 | operands[1], operands[2])); | |
4175 | DONE; | |
4176 | }) | |
4177 | ||
4178 | (define_insn "vinsertgr_internal_<mode>" | |
4179 | [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v") | |
4180 | (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r") | |
4181 | (match_operand:SI 2 "register_operand" "r") | |
4182 | (match_operand:VEC_I 3 "altivec_register_operand" "0")] | |
4183 | UNSPEC_INSERTR))] | |
4184 | "TARGET_POWER10" | |
4185 | "vins<wd>rx %0,%1,%2" | |
4186 | [(set_attr "type" "vecsimple")]) | |
4187 | ||
3f029aea CL |
4188 | (define_expand "vreplace_elt_<mode>" |
4189 | [(set (match_operand:REPLACE_ELT 0 "register_operand") | |
4190 | (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand") | |
4191 | (match_operand:<VS_scalar> 2 "register_operand") | |
4192 | (match_operand:QI 3 "const_0_to_3_operand")] | |
4193 | UNSPEC_REPLACE_ELT))] | |
4194 | "TARGET_POWER10" | |
4195 | { | |
4196 | int index; | |
4197 | /* Immediate value is the word index, convert to byte index and adjust for | |
4198 | Endianness if needed. */ | |
4199 | if (BYTES_BIG_ENDIAN) | |
4200 | index = INTVAL (operands[3]) << <REPLACE_ELT_sh>; | |
4201 | ||
4202 | else | |
4203 | index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>); | |
4204 | ||
4205 | emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1], | |
4206 | operands[2], | |
4207 | GEN_INT (index))); | |
4208 | DONE; | |
4209 | } | |
4210 | [(set_attr "type" "vecsimple")]) | |
4211 | ||
4212 | (define_expand "vreplace_un_<mode>" | |
4213 | [(set (match_operand:REPLACE_ELT 0 "register_operand") | |
4214 | (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand") | |
4215 | (match_operand:<VS_scalar> 2 "register_operand") | |
4216 | (match_operand:QI 3 "const_0_to_12_operand")] | |
4217 | UNSPEC_REPLACE_UN))] | |
4218 | "TARGET_POWER10" | |
4219 | { | |
4220 | /* Immediate value is the byte index Big Endian numbering. */ | |
4221 | emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1], | |
4222 | operands[2], operands[3])); | |
4223 | DONE; | |
4224 | } | |
4225 | [(set_attr "type" "vecsimple")]) | |
4226 | ||
4227 | (define_insn "vreplace_elt_<mode>_inst" | |
4228 | [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v") | |
4229 | (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0") | |
4230 | (match_operand:<VS_scalar> 2 "register_operand" "r") | |
4231 | (match_operand:QI 3 "const_0_to_12_operand" "n")] | |
4232 | UNSPEC_REPLACE_ELT))] | |
4233 | "TARGET_POWER10" | |
4234 | "vins<REPLACE_ELT_char> %0,%2,%3" | |
4235 | [(set_attr "type" "vecsimple")]) | |
4236 | ||
156b5cca MM |
4237 | ;; VSX_EXTRACT optimizations |
4238 | ;; Optimize double d = (double) vec_extract (vi, <n>) | |
4239 | ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP | |
4240 | (define_insn_and_split "*vsx_extract_si_<uns>float_df" | |
cc998fd5 | 4241 | [(set (match_operand:DF 0 "gpc_reg_operand" "=wa") |
156b5cca MM |
4242 | (any_float:DF |
4243 | (vec_select:SI | |
4244 | (match_operand:V4SI 1 "gpc_reg_operand" "v") | |
4245 | (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) | |
4246 | (clobber (match_scratch:V4SI 3 "=v"))] | |
4247 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" | |
4248 | "#" | |
4249 | "&& 1" | |
4250 | [(const_int 0)] | |
4251 | { | |
4252 | rtx dest = operands[0]; | |
4253 | rtx src = operands[1]; | |
4254 | rtx element = operands[2]; | |
4255 | rtx v4si_tmp = operands[3]; | |
4256 | int value; | |
4257 | ||
427a7384 | 4258 | if (!BYTES_BIG_ENDIAN) |
156b5cca MM |
4259 | element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); |
4260 | ||
4261 | /* If the value is in the correct position, we can avoid doing the VSPLT<x> | |
4262 | instruction. */ | |
4263 | value = INTVAL (element); | |
4264 | if (value != 0) | |
4265 | { | |
4266 | if (GET_CODE (v4si_tmp) == SCRATCH) | |
4267 | v4si_tmp = gen_reg_rtx (V4SImode); | |
4268 | emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); | |
4269 | } | |
4270 | else | |
4271 | v4si_tmp = src; | |
4272 | ||
4273 | emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp)); | |
4274 | DONE; | |
4275 | }) | |
4276 | ||
4277 | ;; Optimize <type> f = (<type>) vec_extract (vi, <n>) | |
4278 | ;; where <type> is a floating point type that supported by the hardware that is | |
4279 | ;; not double. First convert the value to double, and then to the desired | |
4280 | ;; type. | |
4281 | (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>" | |
72e3386e | 4282 | [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa") |
156b5cca MM |
4283 | (any_float:VSX_EXTRACT_FL |
4284 | (vec_select:SI | |
4285 | (match_operand:V4SI 1 "gpc_reg_operand" "v") | |
4286 | (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) | |
4287 | (clobber (match_scratch:V4SI 3 "=v")) | |
cc998fd5 | 4288 | (clobber (match_scratch:DF 4 "=wa"))] |
156b5cca MM |
4289 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" |
4290 | "#" | |
4291 | "&& 1" | |
4292 | [(const_int 0)] | |
4293 | { | |
4294 | rtx dest = operands[0]; | |
4295 | rtx src = operands[1]; | |
4296 | rtx element = operands[2]; | |
4297 | rtx v4si_tmp = operands[3]; | |
4298 | rtx df_tmp = operands[4]; | |
4299 | int value; | |
4300 | ||
427a7384 | 4301 | if (!BYTES_BIG_ENDIAN) |
156b5cca MM |
4302 | element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); |
4303 | ||
4304 | /* If the value is in the correct position, we can avoid doing the VSPLT<x> | |
4305 | instruction. */ | |
4306 | value = INTVAL (element); | |
4307 | if (value != 0) | |
4308 | { | |
4309 | if (GET_CODE (v4si_tmp) == SCRATCH) | |
4310 | v4si_tmp = gen_reg_rtx (V4SImode); | |
4311 | emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); | |
4312 | } | |
4313 | else | |
4314 | v4si_tmp = src; | |
4315 | ||
4316 | if (GET_CODE (df_tmp) == SCRATCH) | |
4317 | df_tmp = gen_reg_rtx (DFmode); | |
4318 | ||
4319 | emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp)); | |
4320 | ||
4321 | if (<MODE>mode == SFmode) | |
4322 | emit_insn (gen_truncdfsf2 (dest, df_tmp)); | |
4323 | else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode)) | |
4324 | emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); | |
4325 | else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode) | |
4326 | && TARGET_FLOAT128_HW) | |
4327 | emit_insn (gen_extenddftf2_hw (dest, df_tmp)); | |
4328 | else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode)) | |
4329 | emit_insn (gen_extenddfif2 (dest, df_tmp)); | |
4330 | else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW) | |
4331 | emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); | |
4332 | else | |
4333 | gcc_unreachable (); | |
4334 | ||
4335 | DONE; | |
4336 | }) | |
4337 | ||
16370e79 MM |
4338 | ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>) |
4339 | ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE | |
4340 | ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char, | |
4341 | ;; vector short or vector unsigned short. | |
4342 | (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>" | |
cb152d12 | 4343 | [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa") |
16370e79 MM |
4344 | (float:FL_CONV |
4345 | (vec_select:<VSX_EXTRACT_I:VS_scalar> | |
4346 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") | |
4347 | (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) | |
4348 | (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] | |
4349 | "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT | |
9bfda664 | 4350 | && TARGET_P9_VECTOR" |
16370e79 MM |
4351 | "#" |
4352 | "&& reload_completed" | |
4353 | [(parallel [(set (match_dup 3) | |
4354 | (vec_select:<VSX_EXTRACT_I:VS_scalar> | |
4355 | (match_dup 1) | |
4356 | (parallel [(match_dup 2)]))) | |
4357 | (clobber (scratch:SI))]) | |
4358 | (set (match_dup 4) | |
4359 | (sign_extend:DI (match_dup 3))) | |
4360 | (set (match_dup 0) | |
4361 | (float:<FL_CONV:MODE> (match_dup 4)))] | |
4362 | { | |
4363 | operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); | |
cb152d12 | 4364 | } |
f6e5e4b8 | 4365 | [(set_attr "isa" "<FL_CONV:VSisa>")]) |
16370e79 MM |
4366 | |
4367 | (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>" | |
cb152d12 | 4368 | [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa") |
16370e79 MM |
4369 | (unsigned_float:FL_CONV |
4370 | (vec_select:<VSX_EXTRACT_I:VS_scalar> | |
4371 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") | |
4372 | (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) | |
4373 | (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] | |
4374 | "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT | |
9bfda664 | 4375 | && TARGET_P9_VECTOR" |
16370e79 MM |
4376 | "#" |
4377 | "&& reload_completed" | |
4378 | [(parallel [(set (match_dup 3) | |
4379 | (vec_select:<VSX_EXTRACT_I:VS_scalar> | |
4380 | (match_dup 1) | |
4381 | (parallel [(match_dup 2)]))) | |
4382 | (clobber (scratch:SI))]) | |
4383 | (set (match_dup 0) | |
4384 | (float:<FL_CONV:MODE> (match_dup 4)))] | |
4385 | { | |
4386 | operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); | |
cb152d12 | 4387 | } |
f6e5e4b8 | 4388 | [(set_attr "isa" "<FL_CONV:VSisa>")]) |
16370e79 | 4389 | |
bcb9a772 MM |
4390 | ;; V4SI/V8HI/V16QI set operation on ISA 3.0 |
4391 | (define_insn "vsx_set_<mode>_p9" | |
4392 | [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>") | |
4393 | (unspec:VSX_EXTRACT_I | |
4394 | [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0") | |
4395 | (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>") | |
4396 | (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")] | |
4397 | UNSPEC_VSX_SET))] | |
9bfda664 | 4398 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64" |
bcb9a772 MM |
4399 | { |
4400 | int ele = INTVAL (operands[3]); | |
4401 | int nunits = GET_MODE_NUNITS (<MODE>mode); | |
4402 | ||
427a7384 | 4403 | if (!BYTES_BIG_ENDIAN) |
bcb9a772 MM |
4404 | ele = nunits - 1 - ele; |
4405 | ||
bc28bbb6 | 4406 | operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele); |
bcb9a772 MM |
4407 | if (<MODE>mode == V4SImode) |
4408 | return "xxinsertw %x0,%x2,%3"; | |
4409 | else | |
4410 | return "vinsert<wd> %0,%2,%3"; | |
4411 | } | |
4412 | [(set_attr "type" "vecperm")]) | |
4413 | ||
16122c22 MM |
4414 | (define_insn_and_split "vsx_set_v4sf_p9" |
4415 | [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") | |
4416 | (unspec:V4SF | |
4417 | [(match_operand:V4SF 1 "gpc_reg_operand" "0") | |
72e3386e | 4418 | (match_operand:SF 2 "gpc_reg_operand" "wa") |
16122c22 MM |
4419 | (match_operand:QI 3 "const_0_to_3_operand" "n")] |
4420 | UNSPEC_VSX_SET)) | |
791e7779 | 4421 | (clobber (match_scratch:SI 4 "=&wa"))] |
9bfda664 | 4422 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" |
16122c22 MM |
4423 | "#" |
4424 | "&& reload_completed" | |
4425 | [(set (match_dup 5) | |
4426 | (unspec:V4SF [(match_dup 2)] | |
4427 | UNSPEC_VSX_CVDPSPN)) | |
4428 | (parallel [(set (match_dup 4) | |
4429 | (vec_select:SI (match_dup 6) | |
4430 | (parallel [(match_dup 7)]))) | |
4431 | (clobber (scratch:SI))]) | |
4432 | (set (match_dup 8) | |
4433 | (unspec:V4SI [(match_dup 8) | |
4434 | (match_dup 4) | |
4435 | (match_dup 3)] | |
4436 | UNSPEC_VSX_SET))] | |
4437 | { | |
4438 | unsigned int tmp_regno = reg_or_subregno (operands[4]); | |
4439 | ||
4440 | operands[5] = gen_rtx_REG (V4SFmode, tmp_regno); | |
4441 | operands[6] = gen_rtx_REG (V4SImode, tmp_regno); | |
f74fc01d | 4442 | operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3); |
16122c22 MM |
4443 | operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); |
4444 | } | |
4445 | [(set_attr "type" "vecperm") | |
0c8ac746 SB |
4446 | (set_attr "length" "12") |
4447 | (set_attr "isa" "p9v")]) | |
16122c22 MM |
4448 | |
4449 | ;; Special case setting 0.0f to a V4SF element | |
4450 | (define_insn_and_split "*vsx_set_v4sf_p9_zero" | |
4451 | [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") | |
4452 | (unspec:V4SF | |
4453 | [(match_operand:V4SF 1 "gpc_reg_operand" "0") | |
4454 | (match_operand:SF 2 "zero_fp_constant" "j") | |
4455 | (match_operand:QI 3 "const_0_to_3_operand" "n")] | |
4456 | UNSPEC_VSX_SET)) | |
791e7779 | 4457 | (clobber (match_scratch:SI 4 "=&wa"))] |
9bfda664 | 4458 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" |
16122c22 MM |
4459 | "#" |
4460 | "&& reload_completed" | |
4461 | [(set (match_dup 4) | |
4462 | (const_int 0)) | |
4463 | (set (match_dup 5) | |
4464 | (unspec:V4SI [(match_dup 5) | |
4465 | (match_dup 4) | |
4466 | (match_dup 3)] | |
4467 | UNSPEC_VSX_SET))] | |
4468 | { | |
4469 | operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); | |
4470 | } | |
4471 | [(set_attr "type" "vecperm") | |
0c8ac746 SB |
4472 | (set_attr "length" "8") |
4473 | (set_attr "isa" "p9v")]) | |
16122c22 MM |
4474 | |
4475 | ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element | |
4476 | ;; that is in the default scalar position (1 for big endian, 2 for little | |
4477 | ;; endian). We just need to do an xxinsertw since the element is in the | |
4478 | ;; correct location. | |
4479 | ||
4480 | (define_insn "*vsx_insert_extract_v4sf_p9" | |
4481 | [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") | |
4482 | (unspec:V4SF | |
4483 | [(match_operand:V4SF 1 "gpc_reg_operand" "0") | |
4484 | (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") | |
4485 | (parallel | |
4486 | [(match_operand:QI 3 "const_0_to_3_operand" "n")])) | |
4487 | (match_operand:QI 4 "const_0_to_3_operand" "n")] | |
4488 | UNSPEC_VSX_SET))] | |
9bfda664 | 4489 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64 |
427a7384 | 4490 | && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))" |
16122c22 MM |
4491 | { |
4492 | int ele = INTVAL (operands[4]); | |
4493 | ||
427a7384 | 4494 | if (!BYTES_BIG_ENDIAN) |
16122c22 MM |
4495 | ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele; |
4496 | ||
4497 | operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele); | |
4498 | return "xxinsertw %x0,%x2,%4"; | |
4499 | } | |
4500 | [(set_attr "type" "vecperm")]) | |
4501 | ||
4502 | ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element | |
4503 | ;; that is in the default scalar position (1 for big endian, 2 for little | |
4504 | ;; endian). Convert the insert/extract to int and avoid doing the conversion. | |
4505 | ||
4506 | (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2" | |
4507 | [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") | |
4508 | (unspec:V4SF | |
4509 | [(match_operand:V4SF 1 "gpc_reg_operand" "0") | |
4510 | (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") | |
4511 | (parallel | |
4512 | [(match_operand:QI 3 "const_0_to_3_operand" "n")])) | |
4513 | (match_operand:QI 4 "const_0_to_3_operand" "n")] | |
4514 | UNSPEC_VSX_SET)) | |
791e7779 | 4515 | (clobber (match_scratch:SI 5 "=&wa"))] |
16122c22 | 4516 | "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) |
9bfda664 | 4517 | && TARGET_P9_VECTOR && TARGET_POWERPC64 |
427a7384 | 4518 | && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))" |
16122c22 MM |
4519 | "#" |
4520 | "&& 1" | |
4521 | [(parallel [(set (match_dup 5) | |
4522 | (vec_select:SI (match_dup 6) | |
4523 | (parallel [(match_dup 3)]))) | |
4524 | (clobber (scratch:SI))]) | |
4525 | (set (match_dup 7) | |
4526 | (unspec:V4SI [(match_dup 8) | |
4527 | (match_dup 5) | |
4528 | (match_dup 4)] | |
4529 | UNSPEC_VSX_SET))] | |
4530 | { | |
4531 | if (GET_CODE (operands[5]) == SCRATCH) | |
4532 | operands[5] = gen_reg_rtx (SImode); | |
4533 | ||
4534 | operands[6] = gen_lowpart (V4SImode, operands[2]); | |
4535 | operands[7] = gen_lowpart (V4SImode, operands[0]); | |
4536 | operands[8] = gen_lowpart (V4SImode, operands[1]); | |
4537 | } | |
0c8ac746 SB |
4538 | [(set_attr "type" "vecperm") |
4539 | (set_attr "isa" "p9v")]) | |
16122c22 | 4540 | |
5aebfdad RH |
4541 | ;; Expanders for builtins |
4542 | (define_expand "vsx_mergel_<mode>" | |
ad18eed2 SB |
4543 | [(use (match_operand:VSX_D 0 "vsx_register_operand")) |
4544 | (use (match_operand:VSX_D 1 "vsx_register_operand")) | |
4545 | (use (match_operand:VSX_D 2 "vsx_register_operand"))] | |
5aebfdad | 4546 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
68d3bacf | 4547 | { |
427a7384 SB |
4548 | rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); |
4549 | rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); | |
68d3bacf | 4550 | x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); |
f7df4a84 | 4551 | emit_insn (gen_rtx_SET (operands[0], x)); |
c3e96073 | 4552 | DONE; |
68d3bacf | 4553 | }) |
5aebfdad RH |
4554 | |
4555 | (define_expand "vsx_mergeh_<mode>" | |
ad18eed2 SB |
4556 | [(use (match_operand:VSX_D 0 "vsx_register_operand")) |
4557 | (use (match_operand:VSX_D 1 "vsx_register_operand")) | |
4558 | (use (match_operand:VSX_D 2 "vsx_register_operand"))] | |
5aebfdad | 4559 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
68d3bacf | 4560 | { |
427a7384 SB |
4561 | rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); |
4562 | rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); | |
68d3bacf | 4563 | x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); |
f7df4a84 | 4564 | emit_insn (gen_rtx_SET (operands[0], x)); |
c3e96073 | 4565 | DONE; |
68d3bacf | 4566 | }) |
5aebfdad | 4567 | |
29e6733c | 4568 | ;; V2DF/V2DI splat |
d1f6caae MM |
4569 | ;; We separate the register splat insn from the memory splat insn to force the |
4570 | ;; register allocator to generate the indexed form of the SPLAT when it is | |
4571 | ;; given an offsettable memory reference. Otherwise, if the register and | |
4572 | ;; memory insns were combined into a single insn, the register allocator will | |
4573 | ;; load the value into a register, and then do a double word permute. | |
4574 | (define_expand "vsx_splat_<mode>" | |
4575 | [(set (match_operand:VSX_D 0 "vsx_register_operand") | |
29e6733c | 4576 | (vec_duplicate:VSX_D |
d1f6caae MM |
4577 | (match_operand:<VS_scalar> 1 "input_operand")))] |
4578 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
4579 | { | |
4580 | rtx op1 = operands[1]; | |
4581 | if (MEM_P (op1)) | |
3af0c6bc | 4582 | operands[1] = rs6000_force_indexed_or_indirect_mem (op1); |
d1f6caae MM |
4583 | else if (!REG_P (op1)) |
4584 | op1 = force_reg (<VSX_D:VS_scalar>mode, op1); | |
4585 | }) | |
4586 | ||
4587 | (define_insn "vsx_splat_<mode>_reg" | |
012f609e | 4588 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") |
d1f6caae | 4589 | (vec_duplicate:VSX_D |
11d7bd36 | 4590 | (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))] |
29e6733c MM |
4591 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
4592 | "@ | |
29e6733c | 4593 | xxpermdi %x0,%x1,%x1,0 |
d1f6caae | 4594 | mtvsrdd %x0,%1,%1" |
b0894ae0 | 4595 | [(set_attr "type" "vecperm,vecmove")]) |
d1f6caae | 4596 | |
949c7a09 MM |
4597 | (define_insn "*vsx_splat_v2df_xxspltidp" |
4598 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") | |
4599 | (vec_duplicate:V2DF | |
4600 | (match_operand:DF 1 "xxspltidp_operand" "eF")))] | |
4601 | "TARGET_POWER10" | |
4602 | { | |
4603 | HOST_WIDE_INT value; | |
4604 | ||
4605 | if (!xxspltidp_constant_p (operands[1], DFmode, &value)) | |
4606 | gcc_unreachable (); | |
4607 | ||
4608 | operands[2] = GEN_INT (value); | |
4609 | return "xxspltidp %x0,%1"; | |
4610 | } | |
4611 | [(set_attr "type" "vecperm") | |
4612 | (set_attr "prefixed" "yes")]) | |
4613 | ||
012f609e SB |
4614 | (define_insn "vsx_splat_<mode>_mem" |
4615 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
d1f6caae MM |
4616 | (vec_duplicate:VSX_D |
4617 | (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))] | |
4618 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
4619 | "lxvdsx %x0,%y1" | |
4620 | [(set_attr "type" "vecload")]) | |
29e6733c | 4621 | |
6019c0fc MM |
4622 | ;; V4SI splat support |
4623 | (define_insn "vsx_splat_v4si" | |
4624 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we") | |
50c78b9a | 4625 | (vec_duplicate:V4SI |
58f2fb5c | 4626 | (match_operand:SI 1 "splat_input_operand" "r,Z")))] |
50c78b9a MM |
4627 | "TARGET_P9_VECTOR" |
4628 | "@ | |
4629 | mtvsrws %x0,%1 | |
4630 | lxvwsx %x0,%y1" | |
6019c0fc MM |
4631 | [(set_attr "type" "vecperm,vecload")]) |
4632 | ||
4633 | ;; SImode is not currently allowed in vector registers. This pattern | |
4634 | ;; allows us to use direct move to get the value in a vector register | |
4635 | ;; so that we can use XXSPLTW | |
4636 | (define_insn "vsx_splat_v4si_di" | |
4637 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we") | |
4638 | (vec_duplicate:V4SI | |
4639 | (truncate:SI | |
e670418f | 4640 | (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))] |
6019c0fc MM |
4641 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" |
4642 | "@ | |
4643 | xxspltw %x0,%x1,1 | |
4644 | mtvsrws %x0,%1" | |
818502a3 SB |
4645 | [(set_attr "type" "vecperm") |
4646 | (set_attr "isa" "p8v,*")]) | |
50c78b9a MM |
4647 | |
4648 | ;; V4SF splat (ISA 3.0) | |
6019c0fc | 4649 | (define_insn_and_split "vsx_splat_v4sf" |
50c78b9a MM |
4650 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") |
4651 | (vec_duplicate:V4SF | |
8509e170 | 4652 | (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))] |
50c78b9a MM |
4653 | "TARGET_P9_VECTOR" |
4654 | "@ | |
4655 | lxvwsx %x0,%y1 | |
4656 | # | |
4657 | mtvsrws %x0,%1" | |
4658 | "&& reload_completed && vsx_register_operand (operands[1], SFmode)" | |
4659 | [(set (match_dup 0) | |
4660 | (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN)) | |
4661 | (set (match_dup 0) | |
70c11966 MM |
4662 | (unspec:V4SF [(match_dup 0) |
4663 | (const_int 0)] UNSPEC_VSX_XXSPLTW))] | |
50c78b9a | 4664 | "" |
b0894ae0 | 4665 | [(set_attr "type" "vecload,vecperm,vecperm") |
911c8df0 | 4666 | (set_attr "length" "*,8,*") |
8509e170 | 4667 | (set_attr "isa" "*,p8v,*")]) |
50c78b9a MM |
4668 | |
4669 | ;; V4SF/V4SI splat from a vector element | |
29e6733c | 4670 | (define_insn "vsx_xxspltw_<mode>" |
7858932e | 4671 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
29e6733c MM |
4672 | (vec_duplicate:VSX_W |
4673 | (vec_select:<VS_scalar> | |
7858932e | 4674 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") |
29e6733c | 4675 | (parallel |
6019c0fc | 4676 | [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))] |
29e6733c | 4677 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
bf53d4b8 BS |
4678 | { |
4679 | if (!BYTES_BIG_ENDIAN) | |
4680 | operands[2] = GEN_INT (3 - INTVAL (operands[2])); | |
4681 | ||
4682 | return "xxspltw %x0,%x1,%2"; | |
4683 | } | |
4684 | [(set_attr "type" "vecperm")]) | |
4685 | ||
4686 | (define_insn "vsx_xxspltw_<mode>_direct" | |
7858932e SB |
4687 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
4688 | (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa") | |
6019c0fc | 4689 | (match_operand:QI 2 "u5bit_cint_operand" "i")] |
bf53d4b8 BS |
4690 | UNSPEC_VSX_XXSPLTW))] |
4691 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
29e6733c MM |
4692 | "xxspltw %x0,%x1,%2" |
4693 | [(set_attr "type" "vecperm")]) | |
4694 | ||
6019c0fc MM |
4695 | ;; V16QI/V8HI splat support on ISA 2.07 |
4696 | (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di" | |
4697 | [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v") | |
4698 | (vec_duplicate:VSX_SPLAT_I | |
4699 | (truncate:<VS_scalar> | |
4700 | (match_operand:DI 1 "altivec_register_operand" "v"))))] | |
4701 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
4702 | "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>" | |
4703 | [(set_attr "type" "vecperm")]) | |
4704 | ||
2ccdda19 BS |
4705 | ;; V2DF/V2DI splat for use by vec_splat builtin |
4706 | (define_insn "vsx_xxspltd_<mode>" | |
4707 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
4708 | (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa") | |
4709 | (match_operand:QI 2 "u5bit_cint_operand" "i")] | |
4710 | UNSPEC_VSX_XXSPLTD))] | |
4711 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
4712 | { | |
427a7384 SB |
4713 | if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0) |
4714 | || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1)) | |
2ccdda19 BS |
4715 | return "xxpermdi %x0,%x1,%x1,0"; |
4716 | else | |
4717 | return "xxpermdi %x0,%x1,%x1,3"; | |
4718 | } | |
4719 | [(set_attr "type" "vecperm")]) | |
4720 | ||
29e6733c MM |
4721 | ;; V4SF/V4SI interleave |
4722 | (define_insn "vsx_xxmrghw_<mode>" | |
7858932e | 4723 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
5aebfdad RH |
4724 | (vec_select:VSX_W |
4725 | (vec_concat:<VS_double> | |
7858932e SB |
4726 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") |
4727 | (match_operand:VSX_W 2 "vsx_register_operand" "wa")) | |
5aebfdad RH |
4728 | (parallel [(const_int 0) (const_int 4) |
4729 | (const_int 1) (const_int 5)])))] | |
29e6733c | 4730 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
ed79f4d0 BS |
4731 | { |
4732 | if (BYTES_BIG_ENDIAN) | |
4733 | return "xxmrghw %x0,%x1,%x2"; | |
4734 | else | |
4735 | return "xxmrglw %x0,%x2,%x1"; | |
4736 | } | |
29e6733c MM |
4737 | [(set_attr "type" "vecperm")]) |
4738 | ||
4739 | (define_insn "vsx_xxmrglw_<mode>" | |
7858932e | 4740 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
5aebfdad RH |
4741 | (vec_select:VSX_W |
4742 | (vec_concat:<VS_double> | |
7858932e SB |
4743 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") |
4744 | (match_operand:VSX_W 2 "vsx_register_operand" "wa")) | |
5aebfdad RH |
4745 | (parallel [(const_int 2) (const_int 6) |
4746 | (const_int 3) (const_int 7)])))] | |
29e6733c | 4747 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
ed79f4d0 BS |
4748 | { |
4749 | if (BYTES_BIG_ENDIAN) | |
4750 | return "xxmrglw %x0,%x1,%x2"; | |
4751 | else | |
4752 | return "xxmrghw %x0,%x2,%x1"; | |
4753 | } | |
29e6733c MM |
4754 | [(set_attr "type" "vecperm")]) |
4755 | ||
4756 | ;; Shift left double by word immediate | |
4757 | (define_insn "vsx_xxsldwi_<mode>" | |
cb152d12 SB |
4758 | [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa") |
4759 | (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa") | |
4760 | (match_operand:VSX_L 2 "vsx_register_operand" "wa") | |
29e6733c MM |
4761 | (match_operand:QI 3 "u5bit_cint_operand" "i")] |
4762 | UNSPEC_VSX_SLDWI))] | |
4763 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
4764 | "xxsldwi %x0,%x1,%x2,%3" | |
cb152d12 SB |
4765 | [(set_attr "type" "vecperm") |
4766 | (set_attr "isa" "<VSisa>")]) | |
df10b6d4 MM |
4767 | |
4768 | \f | |
4769 | ;; Vector reduction insns and splitters | |
4770 | ||
5e8edf67 | 4771 | (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df" |
85949949 | 4772 | [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa") |
df10b6d4 MM |
4773 | (VEC_reduc:V2DF |
4774 | (vec_concat:V2DF | |
4775 | (vec_select:DF | |
85949949 | 4776 | (match_operand:V2DF 1 "vfloat_operand" "wa,wa") |
df10b6d4 MM |
4777 | (parallel [(const_int 1)])) |
4778 | (vec_select:DF | |
4779 | (match_dup 1) | |
4780 | (parallel [(const_int 0)]))) | |
4781 | (match_dup 1))) | |
85949949 | 4782 | (clobber (match_scratch:V2DF 2 "=0,&wa"))] |
df10b6d4 MM |
4783 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
4784 | "#" | |
0ec7641e | 4785 | "&& 1" |
df10b6d4 | 4786 | [(const_int 0)] |
df10b6d4 MM |
4787 | { |
4788 | rtx tmp = (GET_CODE (operands[2]) == SCRATCH) | |
4789 | ? gen_reg_rtx (V2DFmode) | |
4790 | : operands[2]; | |
4791 | emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx)); | |
4792 | emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1])); | |
4793 | DONE; | |
6c332313 | 4794 | } |
df10b6d4 MM |
4795 | [(set_attr "length" "8") |
4796 | (set_attr "type" "veccomplex")]) | |
4797 | ||
5e8edf67 | 4798 | (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf" |
8d3620ba | 4799 | [(set (match_operand:V4SF 0 "vfloat_operand" "=wa") |
df10b6d4 MM |
4800 | (VEC_reduc:V4SF |
4801 | (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) | |
8d3620ba SB |
4802 | (match_operand:V4SF 1 "vfloat_operand" "wa"))) |
4803 | (clobber (match_scratch:V4SF 2 "=&wa")) | |
4804 | (clobber (match_scratch:V4SF 3 "=&wa"))] | |
df10b6d4 MM |
4805 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
4806 | "#" | |
0ec7641e | 4807 | "&& 1" |
df10b6d4 | 4808 | [(const_int 0)] |
df10b6d4 MM |
4809 | { |
4810 | rtx op0 = operands[0]; | |
4811 | rtx op1 = operands[1]; | |
4812 | rtx tmp2, tmp3, tmp4; | |
4813 | ||
4814 | if (can_create_pseudo_p ()) | |
4815 | { | |
4816 | tmp2 = gen_reg_rtx (V4SFmode); | |
4817 | tmp3 = gen_reg_rtx (V4SFmode); | |
4818 | tmp4 = gen_reg_rtx (V4SFmode); | |
4819 | } | |
4820 | else | |
4821 | { | |
4822 | tmp2 = operands[2]; | |
4823 | tmp3 = operands[3]; | |
4824 | tmp4 = tmp2; | |
4825 | } | |
4826 | ||
4827 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); | |
4828 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); | |
4829 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); | |
4830 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3)); | |
4831 | DONE; | |
6c332313 | 4832 | } |
df10b6d4 MM |
4833 | [(set_attr "length" "16") |
4834 | (set_attr "type" "veccomplex")]) | |
4835 | ||
4836 | ;; Combiner patterns with the vector reduction patterns that knows we can get | |
4837 | ;; to the top element of the V2DF array without doing an extract. | |
4838 | ||
4839 | (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar" | |
85949949 | 4840 | [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa") |
df10b6d4 MM |
4841 | (vec_select:DF |
4842 | (VEC_reduc:V2DF | |
4843 | (vec_concat:V2DF | |
4844 | (vec_select:DF | |
85949949 | 4845 | (match_operand:V2DF 1 "vfloat_operand" "wa,wa") |
df10b6d4 MM |
4846 | (parallel [(const_int 1)])) |
4847 | (vec_select:DF | |
4848 | (match_dup 1) | |
4849 | (parallel [(const_int 0)]))) | |
4850 | (match_dup 1)) | |
4851 | (parallel [(const_int 1)]))) | |
85949949 | 4852 | (clobber (match_scratch:DF 2 "=0,&wa"))] |
fbf3df55 | 4853 | "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)" |
df10b6d4 | 4854 | "#" |
0ec7641e | 4855 | "&& 1" |
df10b6d4 | 4856 | [(const_int 0)] |
df10b6d4 MM |
4857 | { |
4858 | rtx hi = gen_highpart (DFmode, operands[1]); | |
4859 | rtx lo = (GET_CODE (operands[2]) == SCRATCH) | |
4860 | ? gen_reg_rtx (DFmode) | |
4861 | : operands[2]; | |
4862 | ||
4863 | emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx)); | |
4864 | emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo)); | |
4865 | DONE; | |
6c332313 | 4866 | } |
df10b6d4 MM |
4867 | [(set_attr "length" "8") |
4868 | (set_attr "type" "veccomplex")]) | |
4869 | ||
4870 | (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar" | |
8d3620ba | 4871 | [(set (match_operand:SF 0 "vfloat_operand" "=f") |
df10b6d4 MM |
4872 | (vec_select:SF |
4873 | (VEC_reduc:V4SF | |
4874 | (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) | |
8d3620ba | 4875 | (match_operand:V4SF 1 "vfloat_operand" "wa")) |
df10b6d4 | 4876 | (parallel [(const_int 3)]))) |
8d3620ba SB |
4877 | (clobber (match_scratch:V4SF 2 "=&wa")) |
4878 | (clobber (match_scratch:V4SF 3 "=&wa")) | |
4879 | (clobber (match_scratch:V4SF 4 "=0"))] | |
03bb10aa | 4880 | "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)" |
df10b6d4 | 4881 | "#" |
0ec7641e | 4882 | "&& 1" |
df10b6d4 | 4883 | [(const_int 0)] |
df10b6d4 MM |
4884 | { |
4885 | rtx op0 = operands[0]; | |
4886 | rtx op1 = operands[1]; | |
4887 | rtx tmp2, tmp3, tmp4, tmp5; | |
4888 | ||
4889 | if (can_create_pseudo_p ()) | |
4890 | { | |
4891 | tmp2 = gen_reg_rtx (V4SFmode); | |
4892 | tmp3 = gen_reg_rtx (V4SFmode); | |
4893 | tmp4 = gen_reg_rtx (V4SFmode); | |
4894 | tmp5 = gen_reg_rtx (V4SFmode); | |
4895 | } | |
4896 | else | |
4897 | { | |
4898 | tmp2 = operands[2]; | |
4899 | tmp3 = operands[3]; | |
4900 | tmp4 = tmp2; | |
4901 | tmp5 = operands[4]; | |
4902 | } | |
4903 | ||
4904 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); | |
4905 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); | |
4906 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); | |
4907 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3)); | |
4908 | emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5)); | |
4909 | DONE; | |
6c332313 | 4910 | } |
df10b6d4 MM |
4911 | [(set_attr "length" "20") |
4912 | (set_attr "type" "veccomplex")]) | |
d86e633a MM |
4913 | |
4914 | \f | |
4915 | ;; Power8 Vector fusion. The fused ops must be physically adjacent. | |
4916 | (define_peephole | |
ad18eed2 SB |
4917 | [(set (match_operand:P 0 "base_reg_operand") |
4918 | (match_operand:P 1 "short_cint_operand")) | |
4919 | (set (match_operand:VSX_M 2 "vsx_register_operand") | |
50c78b9a | 4920 | (mem:VSX_M (plus:P (match_dup 0) |
ad18eed2 | 4921 | (match_operand:P 3 "int_reg_operand"))))] |
5d57fdc1 | 4922 | "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" |
0bcd172e | 4923 | "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" |
d86e633a MM |
4924 | [(set_attr "length" "8") |
4925 | (set_attr "type" "vecload")]) | |
4926 | ||
4927 | (define_peephole | |
ad18eed2 SB |
4928 | [(set (match_operand:P 0 "base_reg_operand") |
4929 | (match_operand:P 1 "short_cint_operand")) | |
4930 | (set (match_operand:VSX_M 2 "vsx_register_operand") | |
4931 | (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand") | |
50c78b9a | 4932 | (match_dup 0))))] |
5d57fdc1 | 4933 | "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" |
0bcd172e | 4934 | "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" |
d86e633a MM |
4935 | [(set_attr "length" "8") |
4936 | (set_attr "type" "vecload")]) | |
50c78b9a MM |
4937 | |
4938 | \f | |
db042e16 CL |
4939 | ;; ISA 3.1 vector extend sign support |
4940 | (define_insn "vsx_sign_extend_v2di_v1ti" | |
4941 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
4942 | (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")] | |
4943 | UNSPEC_VSX_SIGN_EXTEND))] | |
4944 | "TARGET_POWER10" | |
4945 | "vextsd2q %0,%1" | |
4946 | [(set_attr "type" "vecexts")]) | |
4947 | ||
4948 | (define_expand "vsignextend_v2di_v1ti" | |
4949 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
4950 | (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")] | |
4951 | UNSPEC_VSX_SIGN_EXTEND))] | |
4952 | "TARGET_POWER10" | |
4953 | { | |
4954 | if (BYTES_BIG_ENDIAN) | |
4955 | { | |
4956 | rtx tmp = gen_reg_rtx (V2DImode); | |
4957 | ||
4958 | emit_insn (gen_altivec_vrevev2di2(tmp, operands[1])); | |
4959 | emit_insn (gen_vsx_sign_extend_v2di_v1ti(operands[0], tmp)); | |
4960 | DONE; | |
4961 | } | |
4962 | ||
4963 | emit_insn (gen_vsx_sign_extend_v2di_v1ti(operands[0], operands[1])); | |
4964 | }) | |
4965 | ||
50c78b9a MM |
4966 | ;; ISA 3.0 vector extend sign support |
4967 | ||
4968 | (define_insn "vsx_sign_extend_qi_<mode>" | |
4969 | [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") | |
4970 | (unspec:VSINT_84 | |
4971 | [(match_operand:V16QI 1 "vsx_register_operand" "v")] | |
4972 | UNSPEC_VSX_SIGN_EXTEND))] | |
4973 | "TARGET_P9_VECTOR" | |
4974 | "vextsb2<wd> %0,%1" | |
7c788ce2 | 4975 | [(set_attr "type" "vecexts")]) |
50c78b9a | 4976 | |
db042e16 CL |
4977 | (define_expand "vsignextend_qi_<mode>" |
4978 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
4979 | (unspec:VIlong | |
4980 | [(match_operand:V16QI 1 "vsx_register_operand" "v")] | |
4981 | UNSPEC_VSX_SIGN_EXTEND))] | |
4982 | "TARGET_P9_VECTOR" | |
4983 | { | |
4984 | if (BYTES_BIG_ENDIAN) | |
4985 | { | |
4986 | rtx tmp = gen_reg_rtx (V16QImode); | |
4987 | emit_insn (gen_altivec_vrevev16qi2(tmp, operands[1])); | |
4988 | emit_insn (gen_vsx_sign_extend_qi_<mode>(operands[0], tmp)); | |
4989 | } | |
4990 | else | |
4991 | emit_insn (gen_vsx_sign_extend_qi_<mode>(operands[0], operands[1])); | |
4992 | DONE; | |
4993 | }) | |
4994 | ||
ac11b8c0 | 4995 | (define_insn "vsx_sign_extend_hi_<mode>" |
50c78b9a MM |
4996 | [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") |
4997 | (unspec:VSINT_84 | |
4998 | [(match_operand:V8HI 1 "vsx_register_operand" "v")] | |
4999 | UNSPEC_VSX_SIGN_EXTEND))] | |
5000 | "TARGET_P9_VECTOR" | |
5001 | "vextsh2<wd> %0,%1" | |
7c788ce2 | 5002 | [(set_attr "type" "vecexts")]) |
50c78b9a | 5003 | |
db042e16 CL |
5004 | (define_expand "vsignextend_hi_<mode>" |
5005 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
5006 | (unspec:VIlong | |
5007 | [(match_operand:V8HI 1 "vsx_register_operand" "v")] | |
5008 | UNSPEC_VSX_SIGN_EXTEND))] | |
5009 | "TARGET_P9_VECTOR" | |
5010 | { | |
5011 | if (BYTES_BIG_ENDIAN) | |
5012 | { | |
5013 | rtx tmp = gen_reg_rtx (V8HImode); | |
5014 | emit_insn (gen_altivec_vrevev8hi2(tmp, operands[1])); | |
5015 | emit_insn (gen_vsx_sign_extend_hi_<mode>(operands[0], tmp)); | |
5016 | } | |
5017 | else | |
5018 | emit_insn (gen_vsx_sign_extend_hi_<mode>(operands[0], operands[1])); | |
5019 | DONE; | |
5020 | }) | |
5021 | ||
5022 | (define_insn "vsx_sign_extend_si_v2di" | |
50c78b9a MM |
5023 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") |
5024 | (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] | |
5025 | UNSPEC_VSX_SIGN_EXTEND))] | |
5026 | "TARGET_P9_VECTOR" | |
5027 | "vextsw2d %0,%1" | |
7c788ce2 | 5028 | [(set_attr "type" "vecexts")]) |
ac11b8c0 | 5029 | |
db042e16 CL |
5030 | (define_expand "vsignextend_si_v2di" |
5031 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") | |
5032 | (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] | |
5033 | UNSPEC_VSX_SIGN_EXTEND))] | |
5034 | "TARGET_P9_VECTOR" | |
5035 | { | |
5036 | if (BYTES_BIG_ENDIAN) | |
5037 | { | |
5038 | rtx tmp = gen_reg_rtx (V4SImode); | |
5039 | ||
5040 | emit_insn (gen_altivec_vrevev4si2(tmp, operands[1])); | |
5041 | emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], tmp)); | |
5042 | } | |
5043 | else | |
5044 | emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], operands[1])); | |
5045 | DONE; | |
5046 | }) | |
5047 | ||
87325119 WS |
5048 | ;; ISA 3.1 vector sign extend |
5049 | ;; Move DI value from GPR to TI mode in VSX register, word 1. | |
5050 | (define_insn "mtvsrdd_diti_w1" | |
5051 | [(set (match_operand:TI 0 "register_operand" "=wa") | |
5052 | (unspec:TI [(match_operand:DI 1 "register_operand" "r")] | |
5053 | UNSPEC_MTVSRD_DITI_W1))] | |
5054 | "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" | |
5055 | "mtvsrdd %x0,0,%1" | |
5056 | [(set_attr "type" "vecmove")]) | |
5057 | ||
5058 | ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg | |
5059 | (define_insn "extendditi2_vector" | |
5060 | [(set (match_operand:TI 0 "gpc_reg_operand" "=v") | |
5061 | (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")] | |
5062 | UNSPEC_EXTENDDITI2))] | |
5063 | "TARGET_POWER10" | |
5064 | "vextsd2q %0,%1" | |
5065 | [(set_attr "type" "vecexts")]) | |
5066 | ||
5067 | (define_expand "extendditi2" | |
5068 | [(set (match_operand:TI 0 "gpc_reg_operand") | |
5069 | (sign_extend:DI (match_operand:DI 1 "gpc_reg_operand")))] | |
5070 | "TARGET_POWER10" | |
5071 | { | |
5072 | /* Move 64-bit src from GPR to vector reg and sign extend to 128-bits. */ | |
5073 | rtx temp = gen_reg_rtx (TImode); | |
5074 | emit_insn (gen_mtvsrdd_diti_w1 (temp, operands[1])); | |
5075 | emit_insn (gen_extendditi2_vector (operands[0], temp)); | |
5076 | DONE; | |
5077 | }) | |
5078 | ||
ac11b8c0 | 5079 | \f |
e9e6d4f6 KN |
5080 | ;; ISA 3.0 Binary Floating-Point Support |
5081 | ||
b70bb05b | 5082 | ;; VSX Scalar Extract Exponent Quad-Precision |
cdb4b7aa | 5083 | (define_insn "xsxexpqp_<mode>" |
b70bb05b | 5084 | [(set (match_operand:DI 0 "altivec_register_operand" "=v") |
cdb4b7aa | 5085 | (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] |
b70bb05b KN |
5086 | UNSPEC_VSX_SXEXPDP))] |
5087 | "TARGET_P9_VECTOR" | |
5088 | "xsxexpqp %0,%1" | |
5089 | [(set_attr "type" "vecmove")]) | |
5090 | ||
e9e6d4f6 KN |
5091 | ;; VSX Scalar Extract Exponent Double-Precision |
5092 | (define_insn "xsxexpdp" | |
5093 | [(set (match_operand:DI 0 "register_operand" "=r") | |
5094 | (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] | |
5095 | UNSPEC_VSX_SXEXPDP))] | |
5096 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5097 | "xsxexpdp %0,%x1" | |
5098 | [(set_attr "type" "integer")]) | |
5099 | ||
b70bb05b | 5100 | ;; VSX Scalar Extract Significand Quad-Precision |
cdb4b7aa | 5101 | (define_insn "xsxsigqp_<mode>" |
b70bb05b | 5102 | [(set (match_operand:TI 0 "altivec_register_operand" "=v") |
cdb4b7aa | 5103 | (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] |
b70bb05b KN |
5104 | UNSPEC_VSX_SXSIG))] |
5105 | "TARGET_P9_VECTOR" | |
5106 | "xsxsigqp %0,%1" | |
5107 | [(set_attr "type" "vecmove")]) | |
5108 | ||
e9e6d4f6 KN |
5109 | ;; VSX Scalar Extract Significand Double-Precision |
5110 | (define_insn "xsxsigdp" | |
5111 | [(set (match_operand:DI 0 "register_operand" "=r") | |
5112 | (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] | |
b70bb05b | 5113 | UNSPEC_VSX_SXSIG))] |
e9e6d4f6 KN |
5114 | "TARGET_P9_VECTOR && TARGET_64BIT" |
5115 | "xsxsigdp %0,%x1" | |
5116 | [(set_attr "type" "integer")]) | |
5117 | ||
b70bb05b | 5118 | ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument |
cdb4b7aa MM |
5119 | (define_insn "xsiexpqpf_<mode>" |
5120 | [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") | |
5121 | (unspec:IEEE128 | |
5122 | [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
5123 | (match_operand:DI 2 "altivec_register_operand" "v")] | |
b70bb05b KN |
5124 | UNSPEC_VSX_SIEXPQP))] |
5125 | "TARGET_P9_VECTOR" | |
5126 | "xsiexpqp %0,%1,%2" | |
5127 | [(set_attr "type" "vecmove")]) | |
5128 | ||
5129 | ;; VSX Scalar Insert Exponent Quad-Precision | |
cdb4b7aa MM |
5130 | (define_insn "xsiexpqp_<mode>" |
5131 | [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") | |
5132 | (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v") | |
5133 | (match_operand:DI 2 "altivec_register_operand" "v")] | |
b70bb05b KN |
5134 | UNSPEC_VSX_SIEXPQP))] |
5135 | "TARGET_P9_VECTOR" | |
5136 | "xsiexpqp %0,%1,%2" | |
5137 | [(set_attr "type" "vecmove")]) | |
5138 | ||
e9e6d4f6 KN |
5139 | ;; VSX Scalar Insert Exponent Double-Precision |
5140 | (define_insn "xsiexpdp" | |
5141 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") | |
5142 | (unspec:DF [(match_operand:DI 1 "register_operand" "r") | |
5143 | (match_operand:DI 2 "register_operand" "r")] | |
5144 | UNSPEC_VSX_SIEXPDP))] | |
5145 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5146 | "xsiexpdp %x0,%1,%2" | |
5147 | [(set_attr "type" "fpsimple")]) | |
5148 | ||
28826a66 KN |
5149 | ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument |
5150 | (define_insn "xsiexpdpf" | |
5151 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") | |
5152 | (unspec:DF [(match_operand:DF 1 "register_operand" "r") | |
5153 | (match_operand:DI 2 "register_operand" "r")] | |
5154 | UNSPEC_VSX_SIEXPDP))] | |
5155 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5156 | "xsiexpdp %x0,%1,%2" | |
5157 | [(set_attr "type" "fpsimple")]) | |
5158 | ||
e9e6d4f6 KN |
5159 | ;; VSX Scalar Compare Exponents Double-Precision |
5160 | (define_expand "xscmpexpdp_<code>" | |
5161 | [(set (match_dup 3) | |
5162 | (compare:CCFP | |
5163 | (unspec:DF | |
5164 | [(match_operand:DF 1 "vsx_register_operand" "wa") | |
5165 | (match_operand:DF 2 "vsx_register_operand" "wa")] | |
5166 | UNSPEC_VSX_SCMPEXPDP) | |
5167 | (const_int 0))) | |
5168 | (set (match_operand:SI 0 "register_operand" "=r") | |
5169 | (CMP_TEST:SI (match_dup 3) | |
5170 | (const_int 0)))] | |
5171 | "TARGET_P9_VECTOR" | |
5172 | { | |
5fba7efc SB |
5173 | if (<CODE> == UNORDERED && !HONOR_NANS (DFmode)) |
5174 | { | |
5175 | emit_move_insn (operands[0], const0_rtx); | |
5176 | DONE; | |
5177 | } | |
5178 | ||
e9e6d4f6 KN |
5179 | operands[3] = gen_reg_rtx (CCFPmode); |
5180 | }) | |
5181 | ||
5182 | (define_insn "*xscmpexpdp" | |
5183 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") | |
5184 | (compare:CCFP | |
5185 | (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa") | |
5186 | (match_operand:DF 2 "vsx_register_operand" "wa")] | |
5187 | UNSPEC_VSX_SCMPEXPDP) | |
5188 | (match_operand:SI 3 "zero_constant" "j")))] | |
5189 | "TARGET_P9_VECTOR" | |
5190 | "xscmpexpdp %0,%x1,%x2" | |
5191 | [(set_attr "type" "fpcompare")]) | |
5192 | ||
fc756f9f CL |
5193 | ;; VSX Scalar Compare Exponents Quad-Precision |
5194 | (define_expand "xscmpexpqp_<code>_<mode>" | |
5195 | [(set (match_dup 3) | |
5196 | (compare:CCFP | |
5197 | (unspec:IEEE128 | |
5198 | [(match_operand:IEEE128 1 "vsx_register_operand" "v") | |
5199 | (match_operand:IEEE128 2 "vsx_register_operand" "v")] | |
5200 | UNSPEC_VSX_SCMPEXPQP) | |
5201 | (const_int 0))) | |
5202 | (set (match_operand:SI 0 "register_operand" "=r") | |
5203 | (CMP_TEST:SI (match_dup 3) | |
5204 | (const_int 0)))] | |
5205 | "TARGET_P9_VECTOR" | |
5206 | { | |
5fba7efc SB |
5207 | if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode)) |
5208 | { | |
5209 | emit_move_insn (operands[0], const0_rtx); | |
5210 | DONE; | |
5211 | } | |
5212 | ||
fc756f9f CL |
5213 | operands[3] = gen_reg_rtx (CCFPmode); |
5214 | }) | |
5215 | ||
5216 | (define_insn "*xscmpexpqp" | |
5217 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") | |
5218 | (compare:CCFP | |
5219 | (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
5220 | (match_operand:IEEE128 2 "altivec_register_operand" "v")] | |
5221 | UNSPEC_VSX_SCMPEXPQP) | |
5222 | (match_operand:SI 3 "zero_constant" "j")))] | |
5223 | "TARGET_P9_VECTOR" | |
5224 | "xscmpexpqp %0,%1,%2" | |
5225 | [(set_attr "type" "fpcompare")]) | |
5226 | ||
b70bb05b KN |
5227 | ;; VSX Scalar Test Data Class Quad-Precision |
5228 | ;; (Expansion for scalar_test_data_class (__ieee128, int)) | |
5229 | ;; (Has side effect of setting the lt bit if operand 1 is negative, | |
5230 | ;; setting the eq bit if any of the conditions tested by operand 2 | |
5231 | ;; are satisfied, and clearing the gt and undordered bits to zero.) | |
cdb4b7aa | 5232 | (define_expand "xststdcqp_<mode>" |
b70bb05b KN |
5233 | [(set (match_dup 3) |
5234 | (compare:CCFP | |
cdb4b7aa MM |
5235 | (unspec:IEEE128 |
5236 | [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
b70bb05b KN |
5237 | (match_operand:SI 2 "u7bit_cint_operand" "n")] |
5238 | UNSPEC_VSX_STSTDC) | |
5239 | (const_int 0))) | |
5240 | (set (match_operand:SI 0 "register_operand" "=r") | |
5241 | (eq:SI (match_dup 3) | |
5242 | (const_int 0)))] | |
5243 | "TARGET_P9_VECTOR" | |
5244 | { | |
5245 | operands[3] = gen_reg_rtx (CCFPmode); | |
5246 | }) | |
5247 | ||
e9e6d4f6 KN |
5248 | ;; VSX Scalar Test Data Class Double- and Single-Precision |
5249 | ;; (The lt bit is set if operand 1 is negative. The eq bit is set | |
5250 | ;; if any of the conditions tested by operand 2 are satisfied. | |
5251 | ;; The gt and unordered bits are cleared to zero.) | |
4c5d4de7 | 5252 | (define_expand "xststdc<sd>p" |
e9e6d4f6 KN |
5253 | [(set (match_dup 3) |
5254 | (compare:CCFP | |
5255 | (unspec:SFDF | |
5256 | [(match_operand:SFDF 1 "vsx_register_operand" "wa") | |
5257 | (match_operand:SI 2 "u7bit_cint_operand" "n")] | |
5258 | UNSPEC_VSX_STSTDC) | |
5259 | (match_dup 4))) | |
5260 | (set (match_operand:SI 0 "register_operand" "=r") | |
5261 | (eq:SI (match_dup 3) | |
5262 | (const_int 0)))] | |
5263 | "TARGET_P9_VECTOR" | |
5264 | { | |
5265 | operands[3] = gen_reg_rtx (CCFPmode); | |
5266 | operands[4] = CONST0_RTX (SImode); | |
5267 | }) | |
5268 | ||
b70bb05b | 5269 | ;; The VSX Scalar Test Negative Quad-Precision |
cdb4b7aa | 5270 | (define_expand "xststdcnegqp_<mode>" |
b70bb05b KN |
5271 | [(set (match_dup 2) |
5272 | (compare:CCFP | |
cdb4b7aa MM |
5273 | (unspec:IEEE128 |
5274 | [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
b70bb05b KN |
5275 | (const_int 0)] |
5276 | UNSPEC_VSX_STSTDC) | |
5277 | (const_int 0))) | |
5278 | (set (match_operand:SI 0 "register_operand" "=r") | |
5279 | (lt:SI (match_dup 2) | |
5280 | (const_int 0)))] | |
5281 | "TARGET_P9_VECTOR" | |
5282 | { | |
5283 | operands[2] = gen_reg_rtx (CCFPmode); | |
5284 | }) | |
5285 | ||
5286 | ;; The VSX Scalar Test Negative Double- and Single-Precision | |
4c5d4de7 | 5287 | (define_expand "xststdcneg<sd>p" |
e9e6d4f6 KN |
5288 | [(set (match_dup 2) |
5289 | (compare:CCFP | |
5290 | (unspec:SFDF | |
5291 | [(match_operand:SFDF 1 "vsx_register_operand" "wa") | |
5292 | (const_int 0)] | |
5293 | UNSPEC_VSX_STSTDC) | |
5294 | (match_dup 3))) | |
5295 | (set (match_operand:SI 0 "register_operand" "=r") | |
5296 | (lt:SI (match_dup 2) | |
5297 | (const_int 0)))] | |
5298 | "TARGET_P9_VECTOR" | |
5299 | { | |
5300 | operands[2] = gen_reg_rtx (CCFPmode); | |
5301 | operands[3] = CONST0_RTX (SImode); | |
5302 | }) | |
5303 | ||
cdb4b7aa | 5304 | (define_insn "*xststdcqp_<mode>" |
b70bb05b KN |
5305 | [(set (match_operand:CCFP 0 "" "=y") |
5306 | (compare:CCFP | |
cdb4b7aa MM |
5307 | (unspec:IEEE128 |
5308 | [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
5309 | (match_operand:SI 2 "u7bit_cint_operand" "n")] | |
b70bb05b KN |
5310 | UNSPEC_VSX_STSTDC) |
5311 | (const_int 0)))] | |
5312 | "TARGET_P9_VECTOR" | |
5313 | "xststdcqp %0,%1,%2" | |
5314 | [(set_attr "type" "fpcompare")]) | |
5315 | ||
4c5d4de7 | 5316 | (define_insn "*xststdc<sd>p" |
e9e6d4f6 KN |
5317 | [(set (match_operand:CCFP 0 "" "=y") |
5318 | (compare:CCFP | |
5319 | (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") | |
5320 | (match_operand:SI 2 "u7bit_cint_operand" "n")] | |
5321 | UNSPEC_VSX_STSTDC) | |
5322 | (match_operand:SI 3 "zero_constant" "j")))] | |
5323 | "TARGET_P9_VECTOR" | |
4c5d4de7 | 5324 | "xststdc<sd>p %0,%x1,%2" |
e9e6d4f6 KN |
5325 | [(set_attr "type" "fpcompare")]) |
5326 | ||
5327 | ;; VSX Vector Extract Exponent Double and Single Precision | |
6cc8f683 | 5328 | (define_insn "xvxexp<sd>p" |
e9e6d4f6 KN |
5329 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
5330 | (unspec:VSX_F | |
5331 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
5332 | UNSPEC_VSX_VXEXP))] | |
5333 | "TARGET_P9_VECTOR" | |
6cc8f683 | 5334 | "xvxexp<sd>p %x0,%x1" |
e9e6d4f6 KN |
5335 | [(set_attr "type" "vecsimple")]) |
5336 | ||
5337 | ;; VSX Vector Extract Significand Double and Single Precision | |
6cc8f683 | 5338 | (define_insn "xvxsig<sd>p" |
e9e6d4f6 KN |
5339 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
5340 | (unspec:VSX_F | |
5341 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
5342 | UNSPEC_VSX_VXSIG))] | |
5343 | "TARGET_P9_VECTOR" | |
6cc8f683 | 5344 | "xvxsig<sd>p %x0,%x1" |
e9e6d4f6 KN |
5345 | [(set_attr "type" "vecsimple")]) |
5346 | ||
5347 | ;; VSX Vector Insert Exponent Double and Single Precision | |
6cc8f683 | 5348 | (define_insn "xviexp<sd>p" |
e9e6d4f6 KN |
5349 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
5350 | (unspec:VSX_F | |
5351 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
5352 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")] | |
5353 | UNSPEC_VSX_VIEXP))] | |
5354 | "TARGET_P9_VECTOR" | |
6cc8f683 | 5355 | "xviexp<sd>p %x0,%x1,%x2" |
e9e6d4f6 KN |
5356 | [(set_attr "type" "vecsimple")]) |
5357 | ||
5358 | ;; VSX Vector Test Data Class Double and Single Precision | |
5359 | ;; The corresponding elements of the result vector are all ones | |
5360 | ;; if any of the conditions tested by operand 3 are satisfied. | |
6cc8f683 | 5361 | (define_insn "xvtstdc<sd>p" |
e9e6d4f6 KN |
5362 | [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa") |
5363 | (unspec:<VSI> | |
5364 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
5365 | (match_operand:SI 2 "u7bit_cint_operand" "n")] | |
5366 | UNSPEC_VSX_VTSTDC))] | |
5367 | "TARGET_P9_VECTOR" | |
6cc8f683 | 5368 | "xvtstdc<sd>p %x0,%x1,%2" |
e9e6d4f6 | 5369 | [(set_attr "type" "vecsimple")]) |
902cb7b1 KN |
5370 | |
5371 | ;; ISA 3.0 String Operations Support | |
5372 | ||
5373 | ;; Compare vectors producing a vector result and a predicate, setting CR6 | |
5374 | ;; to indicate a combined status. This pattern matches v16qi, v8hi, and | |
5375 | ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no | |
50181506 KN |
5376 | ;; need to match v4sf, v2df, or v2di modes because those are expanded |
5377 | ;; to use Power8 instructions. | |
902cb7b1 KN |
5378 | (define_insn "*vsx_ne_<mode>_p" |
5379 | [(set (reg:CC CR6_REGNO) | |
5380 | (unspec:CC | |
5381 | [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") | |
5382 | (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))] | |
5383 | UNSPEC_PREDICATE)) | |
5384 | (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v") | |
5385 | (ne:VSX_EXTRACT_I (match_dup 1) | |
5386 | (match_dup 2)))] | |
5387 | "TARGET_P9_VECTOR" | |
50181506 | 5388 | "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2" |
902cb7b1 KN |
5389 | [(set_attr "type" "vecsimple")]) |
5390 | ||
5391 | (define_insn "*vector_nez_<mode>_p" | |
5392 | [(set (reg:CC CR6_REGNO) | |
5393 | (unspec:CC [(unspec:VI | |
5394 | [(match_operand:VI 1 "gpc_reg_operand" "v") | |
5395 | (match_operand:VI 2 "gpc_reg_operand" "v")] | |
5396 | UNSPEC_NEZ_P)] | |
5397 | UNSPEC_PREDICATE)) | |
5398 | (set (match_operand:VI 0 "gpc_reg_operand" "=v") | |
5399 | (unspec:VI [(match_dup 1) | |
5400 | (match_dup 2)] | |
5401 | UNSPEC_NEZ_P))] | |
5402 | "TARGET_P9_VECTOR" | |
5403 | "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2" | |
5404 | [(set_attr "type" "vecsimple")]) | |
5405 | ||
029435a3 CL |
5406 | ;; Return first position of match between vectors using natural order |
5407 | ;; for both LE and BE execution modes. | |
4d85d480 CL |
5408 | (define_expand "first_match_index_<mode>" |
5409 | [(match_operand:SI 0 "register_operand") | |
5410 | (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") | |
5411 | (match_operand:VSX_EXTRACT_I 2 "register_operand")] | |
5412 | UNSPEC_VSX_FIRST_MATCH_INDEX)] | |
5413 | "TARGET_P9_VECTOR" | |
5414 | { | |
5415 | int sh; | |
5416 | ||
5417 | rtx cmp_result = gen_reg_rtx (<MODE>mode); | |
5418 | rtx not_result = gen_reg_rtx (<MODE>mode); | |
5419 | ||
24f68831 CL |
5420 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], |
5421 | operands[2])); | |
4d85d480 CL |
5422 | emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result)); |
5423 | ||
5424 | sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; | |
5425 | ||
5426 | if (<MODE>mode == V16QImode) | |
029435a3 CL |
5427 | { |
5428 | if (!BYTES_BIG_ENDIAN) | |
5429 | emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result)); | |
5430 | else | |
5431 | emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result)); | |
5432 | } | |
4d85d480 CL |
5433 | else |
5434 | { | |
5435 | rtx tmp = gen_reg_rtx (SImode); | |
029435a3 CL |
5436 | if (!BYTES_BIG_ENDIAN) |
5437 | emit_insn (gen_vctzlsbb_<mode> (tmp, not_result)); | |
5438 | else | |
5439 | emit_insn (gen_vclzlsbb_<mode> (tmp, not_result)); | |
5440 | emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); | |
4d85d480 CL |
5441 | } |
5442 | DONE; | |
5443 | }) | |
5444 | ||
029435a3 CL |
5445 | ;; Return first position of match between vectors or end of string (EOS) using |
5446 | ;; natural element order for both LE and BE execution modes. | |
4d85d480 CL |
5447 | (define_expand "first_match_or_eos_index_<mode>" |
5448 | [(match_operand:SI 0 "register_operand") | |
5449 | (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") | |
5450 | (match_operand:VSX_EXTRACT_I 2 "register_operand")] | |
5451 | UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)] | |
5452 | "TARGET_P9_VECTOR" | |
5453 | { | |
5454 | int sh; | |
5455 | rtx cmpz1_result = gen_reg_rtx (<MODE>mode); | |
5456 | rtx cmpz2_result = gen_reg_rtx (<MODE>mode); | |
5457 | rtx cmpz_result = gen_reg_rtx (<MODE>mode); | |
5458 | rtx and_result = gen_reg_rtx (<MODE>mode); | |
5459 | rtx result = gen_reg_rtx (<MODE>mode); | |
5460 | rtx vzero = gen_reg_rtx (<MODE>mode); | |
5461 | ||
5462 | /* Vector with zeros in elements that correspond to zeros in operands. */ | |
5463 | emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); | |
5464 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); | |
5465 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); | |
5466 | emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); | |
5467 | ||
5468 | /* Vector with ones in elments that do not match. */ | |
5469 | emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], | |
5470 | operands[2])); | |
5471 | ||
5472 | /* Create vector with ones in elements where there was a zero in one of | |
5473 | the source elements or the elements that match. */ | |
5474 | emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result)); | |
5475 | sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; | |
5476 | ||
5477 | if (<MODE>mode == V16QImode) | |
029435a3 CL |
5478 | { |
5479 | if (!BYTES_BIG_ENDIAN) | |
5480 | emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); | |
5481 | else | |
5482 | emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); | |
5483 | } | |
4d85d480 CL |
5484 | else |
5485 | { | |
5486 | rtx tmp = gen_reg_rtx (SImode); | |
029435a3 CL |
5487 | if (!BYTES_BIG_ENDIAN) |
5488 | emit_insn (gen_vctzlsbb_<mode> (tmp, result)); | |
5489 | else | |
5490 | emit_insn (gen_vclzlsbb_<mode> (tmp, result)); | |
5491 | emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); | |
4d85d480 CL |
5492 | } |
5493 | DONE; | |
5494 | }) | |
5495 | ||
029435a3 CL |
5496 | ;; Return first position of mismatch between vectors using natural |
5497 | ;; element order for both LE and BE execution modes. | |
4d85d480 CL |
5498 | (define_expand "first_mismatch_index_<mode>" |
5499 | [(match_operand:SI 0 "register_operand") | |
5500 | (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") | |
5501 | (match_operand:VSX_EXTRACT_I 2 "register_operand")] | |
5502 | UNSPEC_VSX_FIRST_MISMATCH_INDEX)] | |
5503 | "TARGET_P9_VECTOR" | |
5504 | { | |
5505 | int sh; | |
5506 | rtx cmp_result = gen_reg_rtx (<MODE>mode); | |
5507 | ||
5508 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], | |
5509 | operands[2])); | |
5510 | sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; | |
5511 | ||
5512 | if (<MODE>mode == V16QImode) | |
029435a3 CL |
5513 | { |
5514 | if (!BYTES_BIG_ENDIAN) | |
5515 | emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result)); | |
5516 | else | |
5517 | emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result)); | |
5518 | } | |
4d85d480 CL |
5519 | else |
5520 | { | |
5521 | rtx tmp = gen_reg_rtx (SImode); | |
029435a3 CL |
5522 | if (!BYTES_BIG_ENDIAN) |
5523 | emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result)); | |
5524 | else | |
5525 | emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result)); | |
5526 | emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); | |
4d85d480 CL |
5527 | } |
5528 | DONE; | |
5529 | }) | |
5530 | ||
5531 | ;; Return first position of mismatch between vectors or end of string (EOS) | |
029435a3 | 5532 | ;; using natural element order for both LE and BE execution modes. |
4d85d480 CL |
5533 | (define_expand "first_mismatch_or_eos_index_<mode>" |
5534 | [(match_operand:SI 0 "register_operand") | |
5535 | (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") | |
5536 | (match_operand:VSX_EXTRACT_I 2 "register_operand")] | |
5537 | UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)] | |
5538 | "TARGET_P9_VECTOR" | |
5539 | { | |
5540 | int sh; | |
5541 | rtx cmpz1_result = gen_reg_rtx (<MODE>mode); | |
5542 | rtx cmpz2_result = gen_reg_rtx (<MODE>mode); | |
5543 | rtx cmpz_result = gen_reg_rtx (<MODE>mode); | |
5544 | rtx not_cmpz_result = gen_reg_rtx (<MODE>mode); | |
5545 | rtx and_result = gen_reg_rtx (<MODE>mode); | |
5546 | rtx result = gen_reg_rtx (<MODE>mode); | |
5547 | rtx vzero = gen_reg_rtx (<MODE>mode); | |
5548 | ||
5549 | /* Vector with zeros in elements that correspond to zeros in operands. */ | |
5550 | emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); | |
5551 | ||
5552 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); | |
5553 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); | |
5554 | emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); | |
5555 | ||
5556 | /* Vector with ones in elments that match. */ | |
5557 | emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], | |
5558 | operands[2])); | |
5559 | emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result)); | |
5560 | ||
5561 | /* Create vector with ones in elements where there was a zero in one of | |
5562 | the source elements or the elements did not match. */ | |
5563 | emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result)); | |
5564 | sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; | |
5565 | ||
5566 | if (<MODE>mode == V16QImode) | |
029435a3 CL |
5567 | { |
5568 | if (!BYTES_BIG_ENDIAN) | |
5569 | emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); | |
5570 | else | |
5571 | emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); | |
5572 | } | |
4d85d480 CL |
5573 | else |
5574 | { | |
5575 | rtx tmp = gen_reg_rtx (SImode); | |
029435a3 CL |
5576 | if (!BYTES_BIG_ENDIAN) |
5577 | emit_insn (gen_vctzlsbb_<mode> (tmp, result)); | |
5578 | else | |
5579 | emit_insn (gen_vclzlsbb_<mode> (tmp, result)); | |
5580 | emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); | |
4d85d480 CL |
5581 | } |
5582 | DONE; | |
5583 | }) | |
5584 | ||
902cb7b1 KN |
5585 | ;; Load VSX Vector with Length |
5586 | (define_expand "lxvl" | |
5587 | [(set (match_dup 3) | |
84e77783 CL |
5588 | (ashift:DI (match_operand:DI 2 "register_operand") |
5589 | (const_int 56))) | |
902cb7b1 KN |
5590 | (set (match_operand:V16QI 0 "vsx_register_operand") |
5591 | (unspec:V16QI | |
5592 | [(match_operand:DI 1 "gpc_reg_operand") | |
84e77783 | 5593 | (mem:V16QI (match_dup 1)) |
902cb7b1 KN |
5594 | (match_dup 3)] |
5595 | UNSPEC_LXVL))] | |
5596 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5597 | { | |
5598 | operands[3] = gen_reg_rtx (DImode); | |
5599 | }) | |
5600 | ||
5601 | (define_insn "*lxvl" | |
5602 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
5603 | (unspec:V16QI | |
5604 | [(match_operand:DI 1 "gpc_reg_operand" "b") | |
84e77783 CL |
5605 | (mem:V16QI (match_dup 1)) |
5606 | (match_operand:DI 2 "register_operand" "r")] | |
902cb7b1 KN |
5607 | UNSPEC_LXVL))] |
5608 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
84e77783 CL |
5609 | "lxvl %x0,%1,%2" |
5610 | [(set_attr "type" "vecload")]) | |
902cb7b1 | 5611 | |
1262c6cf CL |
5612 | (define_insn "lxvll" |
5613 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
5614 | (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") | |
84e77783 | 5615 | (mem:V16QI (match_dup 1)) |
1262c6cf CL |
5616 | (match_operand:DI 2 "register_operand" "r")] |
5617 | UNSPEC_LXVLL))] | |
5618 | "TARGET_P9_VECTOR" | |
5619 | "lxvll %x0,%1,%2" | |
5620 | [(set_attr "type" "vecload")]) | |
5621 | ||
5622 | ;; Expand for builtin xl_len_r | |
5623 | (define_expand "xl_len_r" | |
5624 | [(match_operand:V16QI 0 "vsx_register_operand") | |
5625 | (match_operand:DI 1 "register_operand") | |
5626 | (match_operand:DI 2 "register_operand")] | |
5627 | "" | |
5628 | { | |
5629 | rtx shift_mask = gen_reg_rtx (V16QImode); | |
5630 | rtx rtx_vtmp = gen_reg_rtx (V16QImode); | |
5631 | rtx tmp = gen_reg_rtx (DImode); | |
5632 | ||
f64b9156 | 5633 | emit_insn (gen_altivec_lvsl_reg_di (shift_mask, operands[2])); |
1262c6cf CL |
5634 | emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); |
5635 | emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp)); | |
5636 | emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp, | |
5637 | shift_mask)); | |
5638 | DONE; | |
5639 | }) | |
5640 | ||
5641 | (define_insn "stxvll" | |
5642 | [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) | |
5643 | (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa") | |
84e77783 | 5644 | (mem:V16QI (match_dup 1)) |
1262c6cf CL |
5645 | (match_operand:DI 2 "register_operand" "r")] |
5646 | UNSPEC_STXVLL))] | |
5647 | "TARGET_P9_VECTOR" | |
5648 | "stxvll %x0,%1,%2" | |
5649 | [(set_attr "type" "vecstore")]) | |
5650 | ||
902cb7b1 KN |
5651 | ;; Store VSX Vector with Length |
5652 | (define_expand "stxvl" | |
5653 | [(set (match_dup 3) | |
84e77783 CL |
5654 | (ashift:DI (match_operand:DI 2 "register_operand") |
5655 | (const_int 56))) | |
902cb7b1 KN |
5656 | (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) |
5657 | (unspec:V16QI | |
5658 | [(match_operand:V16QI 0 "vsx_register_operand") | |
84e77783 | 5659 | (mem:V16QI (match_dup 1)) |
902cb7b1 KN |
5660 | (match_dup 3)] |
5661 | UNSPEC_STXVL))] | |
5662 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5663 | { | |
5664 | operands[3] = gen_reg_rtx (DImode); | |
5665 | }) | |
5666 | ||
4070208f KL |
5667 | ;; Define optab for vector access with length vectorization exploitation. |
5668 | (define_expand "len_load_v16qi" | |
5669 | [(match_operand:V16QI 0 "vlogical_operand") | |
5670 | (match_operand:V16QI 1 "memory_operand") | |
5671 | (match_operand:QI 2 "gpc_reg_operand")] | |
5672 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5673 | { | |
5674 | rtx mem = XEXP (operands[1], 0); | |
5675 | mem = force_reg (DImode, mem); | |
5676 | rtx len = gen_lowpart (DImode, operands[2]); | |
5677 | emit_insn (gen_lxvl (operands[0], mem, len)); | |
5678 | DONE; | |
5679 | }) | |
5680 | ||
5681 | (define_expand "len_store_v16qi" | |
5682 | [(match_operand:V16QI 0 "memory_operand") | |
5683 | (match_operand:V16QI 1 "vlogical_operand") | |
5684 | (match_operand:QI 2 "gpc_reg_operand") | |
5685 | ] | |
5686 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5687 | { | |
5688 | rtx mem = XEXP (operands[0], 0); | |
5689 | mem = force_reg (DImode, mem); | |
5690 | rtx len = gen_lowpart (DImode, operands[2]); | |
5691 | emit_insn (gen_stxvl (operands[1], mem, len)); | |
5692 | DONE; | |
5693 | }) | |
5694 | ||
902cb7b1 KN |
5695 | (define_insn "*stxvl" |
5696 | [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) | |
5697 | (unspec:V16QI | |
5698 | [(match_operand:V16QI 0 "vsx_register_operand" "wa") | |
84e77783 CL |
5699 | (mem:V16QI (match_dup 1)) |
5700 | (match_operand:DI 2 "register_operand" "r")] | |
902cb7b1 KN |
5701 | UNSPEC_STXVL))] |
5702 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
84e77783 CL |
5703 | "stxvl %x0,%1,%2" |
5704 | [(set_attr "type" "vecstore")]) | |
902cb7b1 | 5705 | |
1262c6cf CL |
5706 | ;; Expand for builtin xst_len_r |
5707 | (define_expand "xst_len_r" | |
5708 | [(match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
5709 | (match_operand:DI 1 "register_operand" "b") | |
5710 | (match_operand:DI 2 "register_operand" "r")] | |
5711 | "UNSPEC_XST_LEN_R" | |
5712 | { | |
5713 | rtx shift_mask = gen_reg_rtx (V16QImode); | |
5714 | rtx rtx_vtmp = gen_reg_rtx (V16QImode); | |
5715 | rtx tmp = gen_reg_rtx (DImode); | |
5716 | ||
f64b9156 | 5717 | emit_insn (gen_altivec_lvsr_reg_di (shift_mask, operands[2])); |
1262c6cf CL |
5718 | emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0], |
5719 | shift_mask)); | |
5720 | emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); | |
5721 | emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp)); | |
5722 | DONE; | |
5723 | }) | |
5724 | ||
41e18197 | 5725 | ;; Vector Compare Not Equal Byte (specified/not+eq:) |
902cb7b1 KN |
5726 | (define_insn "vcmpneb" |
5727 | [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") | |
41e18197 WS |
5728 | (not:V16QI |
5729 | (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v") | |
5730 | (match_operand:V16QI 2 "altivec_register_operand" "v"))))] | |
902cb7b1 KN |
5731 | "TARGET_P9_VECTOR" |
5732 | "vcmpneb %0,%1,%2" | |
5733 | [(set_attr "type" "vecsimple")]) | |
5734 | ||
f03122f2 CL |
5735 | ;; Vector Compare Not Equal v1ti (specified/not+eq:) |
5736 | (define_expand "vcmpnet" | |
5737 | [(set (match_operand:V1TI 0 "altivec_register_operand") | |
5738 | (not:V1TI | |
5739 | (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand") | |
5740 | (match_operand:V1TI 2 "altivec_register_operand"))))] | |
5741 | "TARGET_POWER10" | |
5742 | { | |
5743 | emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2])); | |
5744 | emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0])); | |
5745 | DONE; | |
5746 | }) | |
5747 | ||
902cb7b1 KN |
5748 | ;; Vector Compare Not Equal or Zero Byte |
5749 | (define_insn "vcmpnezb" | |
5750 | [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") | |
5751 | (unspec:V16QI | |
5752 | [(match_operand:V16QI 1 "altivec_register_operand" "v") | |
5753 | (match_operand:V16QI 2 "altivec_register_operand" "v")] | |
5754 | UNSPEC_VCMPNEZB))] | |
5755 | "TARGET_P9_VECTOR" | |
5756 | "vcmpnezb %0,%1,%2" | |
5757 | [(set_attr "type" "vecsimple")]) | |
5758 | ||
9d36bd3b AS |
5759 | ;; Vector Compare Not Equal or Zero Byte predicate or record-form |
5760 | (define_insn "vcmpnezb_p" | |
5761 | [(set (reg:CC CR6_REGNO) | |
5762 | (unspec:CC | |
5763 | [(match_operand:V16QI 1 "altivec_register_operand" "v") | |
5764 | (match_operand:V16QI 2 "altivec_register_operand" "v")] | |
5765 | UNSPEC_VCMPNEZB)) | |
5766 | (set (match_operand:V16QI 0 "altivec_register_operand" "=v") | |
5767 | (unspec:V16QI | |
5768 | [(match_dup 1) | |
5769 | (match_dup 2)] | |
5770 | UNSPEC_VCMPNEZB))] | |
5771 | "TARGET_P9_VECTOR" | |
5772 | "vcmpnezb. %0,%1,%2" | |
5773 | [(set_attr "type" "vecsimple")]) | |
5774 | ||
41e18197 | 5775 | ;; Vector Compare Not Equal Half Word (specified/not+eq:) |
902cb7b1 KN |
5776 | (define_insn "vcmpneh" |
5777 | [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") | |
41e18197 WS |
5778 | (not:V8HI |
5779 | (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v") | |
5780 | (match_operand:V8HI 2 "altivec_register_operand" "v"))))] | |
902cb7b1 KN |
5781 | "TARGET_P9_VECTOR" |
5782 | "vcmpneh %0,%1,%2" | |
5783 | [(set_attr "type" "vecsimple")]) | |
5784 | ||
5785 | ;; Vector Compare Not Equal or Zero Half Word | |
5786 | (define_insn "vcmpnezh" | |
5787 | [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") | |
5788 | (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") | |
5789 | (match_operand:V8HI 2 "altivec_register_operand" "v")] | |
5790 | UNSPEC_VCMPNEZH))] | |
5791 | "TARGET_P9_VECTOR" | |
5792 | "vcmpnezh %0,%1,%2" | |
5793 | [(set_attr "type" "vecsimple")]) | |
5794 | ||
41e18197 | 5795 | ;; Vector Compare Not Equal Word (specified/not+eq:) |
902cb7b1 KN |
5796 | (define_insn "vcmpnew" |
5797 | [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") | |
41e18197 WS |
5798 | (not:V4SI |
5799 | (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v") | |
5800 | (match_operand:V4SI 2 "altivec_register_operand" "v"))))] | |
902cb7b1 KN |
5801 | "TARGET_P9_VECTOR" |
5802 | "vcmpnew %0,%1,%2" | |
5803 | [(set_attr "type" "vecsimple")]) | |
5804 | ||
902cb7b1 KN |
5805 | ;; Vector Compare Not Equal or Zero Word |
5806 | (define_insn "vcmpnezw" | |
5807 | [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") | |
5808 | (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v") | |
5809 | (match_operand:V4SI 2 "altivec_register_operand" "v")] | |
5810 | UNSPEC_VCMPNEZW))] | |
5811 | "TARGET_P9_VECTOR" | |
5812 | "vcmpnezw %0,%1,%2" | |
5813 | [(set_attr "type" "vecsimple")]) | |
5814 | ||
5815 | ;; Vector Count Leading Zero Least-Significant Bits Byte | |
029435a3 | 5816 | (define_insn "vclzlsbb_<mode>" |
902cb7b1 KN |
5817 | [(set (match_operand:SI 0 "register_operand" "=r") |
5818 | (unspec:SI | |
029435a3 | 5819 | [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] |
902cb7b1 KN |
5820 | UNSPEC_VCLZLSBB))] |
5821 | "TARGET_P9_VECTOR" | |
5822 | "vclzlsbb %0,%1" | |
5823 | [(set_attr "type" "vecsimple")]) | |
5824 | ||
5825 | ;; Vector Count Trailing Zero Least-Significant Bits Byte | |
4d85d480 | 5826 | (define_insn "vctzlsbb_<mode>" |
902cb7b1 KN |
5827 | [(set (match_operand:SI 0 "register_operand" "=r") |
5828 | (unspec:SI | |
4d85d480 | 5829 | [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] |
902cb7b1 KN |
5830 | UNSPEC_VCTZLSBB))] |
5831 | "TARGET_P9_VECTOR" | |
5832 | "vctzlsbb %0,%1" | |
5833 | [(set_attr "type" "vecsimple")]) | |
5834 | ||
5835 | ;; Vector Extract Unsigned Byte Left-Indexed | |
5836 | (define_insn "vextublx" | |
5837 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5838 | (unspec:SI | |
5839 | [(match_operand:SI 1 "register_operand" "r") | |
5840 | (match_operand:V16QI 2 "altivec_register_operand" "v")] | |
5841 | UNSPEC_VEXTUBLX))] | |
5842 | "TARGET_P9_VECTOR" | |
5843 | "vextublx %0,%1,%2" | |
5844 | [(set_attr "type" "vecsimple")]) | |
5845 | ||
5846 | ;; Vector Extract Unsigned Byte Right-Indexed | |
5847 | (define_insn "vextubrx" | |
5848 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5849 | (unspec:SI | |
5850 | [(match_operand:SI 1 "register_operand" "r") | |
5851 | (match_operand:V16QI 2 "altivec_register_operand" "v")] | |
5852 | UNSPEC_VEXTUBRX))] | |
5853 | "TARGET_P9_VECTOR" | |
5854 | "vextubrx %0,%1,%2" | |
5855 | [(set_attr "type" "vecsimple")]) | |
5856 | ||
5857 | ;; Vector Extract Unsigned Half Word Left-Indexed | |
5858 | (define_insn "vextuhlx" | |
5859 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5860 | (unspec:SI | |
5861 | [(match_operand:SI 1 "register_operand" "r") | |
f13d510e | 5862 | (match_operand:V8HI 2 "altivec_register_operand" "v")] |
902cb7b1 KN |
5863 | UNSPEC_VEXTUHLX))] |
5864 | "TARGET_P9_VECTOR" | |
5865 | "vextuhlx %0,%1,%2" | |
5866 | [(set_attr "type" "vecsimple")]) | |
5867 | ||
5868 | ;; Vector Extract Unsigned Half Word Right-Indexed | |
5869 | (define_insn "vextuhrx" | |
5870 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5871 | (unspec:SI | |
5872 | [(match_operand:SI 1 "register_operand" "r") | |
f13d510e | 5873 | (match_operand:V8HI 2 "altivec_register_operand" "v")] |
902cb7b1 KN |
5874 | UNSPEC_VEXTUHRX))] |
5875 | "TARGET_P9_VECTOR" | |
5876 | "vextuhrx %0,%1,%2" | |
5877 | [(set_attr "type" "vecsimple")]) | |
5878 | ||
5879 | ;; Vector Extract Unsigned Word Left-Indexed | |
5880 | (define_insn "vextuwlx" | |
5881 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5882 | (unspec:SI | |
5883 | [(match_operand:SI 1 "register_operand" "r") | |
f13d510e | 5884 | (match_operand:V4SI 2 "altivec_register_operand" "v")] |
902cb7b1 KN |
5885 | UNSPEC_VEXTUWLX))] |
5886 | "TARGET_P9_VECTOR" | |
5887 | "vextuwlx %0,%1,%2" | |
5888 | [(set_attr "type" "vecsimple")]) | |
5889 | ||
5890 | ;; Vector Extract Unsigned Word Right-Indexed | |
5891 | (define_insn "vextuwrx" | |
5892 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5893 | (unspec:SI | |
5894 | [(match_operand:SI 1 "register_operand" "r") | |
f13d510e | 5895 | (match_operand:V4SI 2 "altivec_register_operand" "v")] |
902cb7b1 KN |
5896 | UNSPEC_VEXTUWRX))] |
5897 | "TARGET_P9_VECTOR" | |
5898 | "vextuwrx %0,%1,%2" | |
5899 | [(set_attr "type" "vecsimple")]) | |
16370e79 MM |
5900 | |
5901 | ;; Vector insert/extract word at arbitrary byte values. Note, the little | |
5902 | ;; endian version needs to adjust the byte number, and the V4SI element in | |
5903 | ;; vinsert4b. | |
b8bf5603 CL |
5904 | (define_insn "extract4b" |
5905 | [(set (match_operand:V2DI 0 "vsx_register_operand") | |
5906 | (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa") | |
5907 | (match_operand:QI 2 "const_0_to_12_operand" "n")] | |
5908 | UNSPEC_XXEXTRACTUW))] | |
5909 | "TARGET_P9_VECTOR" | |
5910 | { | |
427a7384 | 5911 | if (!BYTES_BIG_ENDIAN) |
b8bf5603 CL |
5912 | operands[2] = GEN_INT (12 - INTVAL (operands[2])); |
5913 | ||
5914 | return "xxextractuw %x0,%x1,%2"; | |
5915 | }) | |
5916 | ||
5917 | (define_expand "insert4b" | |
5918 | [(set (match_operand:V16QI 0 "vsx_register_operand") | |
5919 | (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") | |
5920 | (match_operand:V16QI 2 "vsx_register_operand") | |
5921 | (match_operand:QI 3 "const_0_to_12_operand")] | |
5922 | UNSPEC_XXINSERTW))] | |
5923 | "TARGET_P9_VECTOR" | |
5924 | { | |
427a7384 | 5925 | if (!BYTES_BIG_ENDIAN) |
b8bf5603 CL |
5926 | { |
5927 | rtx op1 = operands[1]; | |
5928 | rtx v4si_tmp = gen_reg_rtx (V4SImode); | |
5929 | emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx)); | |
5930 | operands[1] = v4si_tmp; | |
5931 | operands[3] = GEN_INT (12 - INTVAL (operands[3])); | |
5932 | } | |
5933 | }) | |
5934 | ||
5935 | (define_insn "*insert4b_internal" | |
5936 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
5937 | (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") | |
5938 | (match_operand:V16QI 2 "vsx_register_operand" "0") | |
5939 | (match_operand:QI 3 "const_0_to_12_operand" "n")] | |
5940 | UNSPEC_XXINSERTW))] | |
5941 | "TARGET_P9_VECTOR" | |
5942 | "xxinsertw %x0,%x1,%3" | |
5943 | [(set_attr "type" "vecperm")]) | |
5944 | ||
fba4b861 | 5945 | |
26bca0ed CL |
5946 | ;; Generate vector extract four float 32 values from left four elements |
5947 | ;; of eight element vector of float 16 values. | |
5948 | (define_expand "vextract_fp_from_shorth" | |
5949 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
5950 | (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] | |
5951 | UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))] | |
5952 | "TARGET_P9_VECTOR" | |
5953 | { | |
26bca0ed | 5954 | int i; |
6ad1bf18 | 5955 | int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; |
8d31eb8f | 5956 | int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7}; |
26bca0ed CL |
5957 | |
5958 | rtx rvals[16]; | |
5959 | rtx mask = gen_reg_rtx (V16QImode); | |
5960 | rtx tmp = gen_reg_rtx (V16QImode); | |
5961 | rtvec v; | |
5962 | ||
5963 | for (i = 0; i < 16; i++) | |
6ad1bf18 CL |
5964 | if (!BYTES_BIG_ENDIAN) |
5965 | rvals[i] = GEN_INT (vals_le[i]); | |
5966 | else | |
5967 | rvals[i] = GEN_INT (vals_be[i]); | |
26bca0ed CL |
5968 | |
5969 | /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 | |
5970 | inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move | |
6ad1bf18 CL |
5971 | src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the |
5972 | conversion instruction. */ | |
26bca0ed | 5973 | v = gen_rtvec_v (16, rvals); |
8e1863ec | 5974 | emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); |
26bca0ed CL |
5975 | emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], |
5976 | operands[1], mask)); | |
5977 | emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); | |
5978 | DONE; | |
5979 | }) | |
5980 | ||
5981 | ;; Generate vector extract four float 32 values from right four elements | |
5982 | ;; of eight element vector of float 16 values. | |
5983 | (define_expand "vextract_fp_from_shortl" | |
5984 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
5985 | (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] | |
5986 | UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))] | |
5987 | "TARGET_P9_VECTOR" | |
5988 | { | |
6ad1bf18 | 5989 | int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; |
8d31eb8f | 5990 | int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15}; |
6ad1bf18 | 5991 | |
26bca0ed CL |
5992 | int i; |
5993 | rtx rvals[16]; | |
5994 | rtx mask = gen_reg_rtx (V16QImode); | |
5995 | rtx tmp = gen_reg_rtx (V16QImode); | |
5996 | rtvec v; | |
5997 | ||
5998 | for (i = 0; i < 16; i++) | |
6ad1bf18 CL |
5999 | if (!BYTES_BIG_ENDIAN) |
6000 | rvals[i] = GEN_INT (vals_le[i]); | |
6001 | else | |
6002 | rvals[i] = GEN_INT (vals_be[i]); | |
26bca0ed CL |
6003 | |
6004 | /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 | |
6005 | inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move | |
6ad1bf18 CL |
6006 | src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the |
6007 | conversion instruction. */ | |
26bca0ed | 6008 | v = gen_rtvec_v (16, rvals); |
8e1863ec | 6009 | emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); |
26bca0ed CL |
6010 | emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], |
6011 | operands[1], mask)); | |
6012 | emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); | |
6013 | DONE; | |
6014 | }) | |
6015 | ||
b7d3a6a6 MM |
6016 | ;; Support for ISA 3.0 vector byte reverse |
6017 | ||
6018 | ;; Swap all bytes with in a vector | |
6019 | (define_insn "p9_xxbrq_v1ti" | |
6020 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") | |
6021 | (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))] | |
6022 | "TARGET_P9_VECTOR" | |
6023 | "xxbrq %x0,%x1" | |
6024 | [(set_attr "type" "vecperm")]) | |
6025 | ||
6026 | (define_expand "p9_xxbrq_v16qi" | |
6027 | [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa")) | |
d6126f8b | 6028 | (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))] |
b7d3a6a6 MM |
6029 | "TARGET_P9_VECTOR" |
6030 | { | |
d6126f8b | 6031 | rtx op0 = gen_reg_rtx (V1TImode); |
b7d3a6a6 MM |
6032 | rtx op1 = gen_lowpart (V1TImode, operands[1]); |
6033 | emit_insn (gen_p9_xxbrq_v1ti (op0, op1)); | |
d6126f8b | 6034 | emit_move_insn (operands[0], gen_lowpart (V16QImode, op0)); |
b7d3a6a6 MM |
6035 | DONE; |
6036 | }) | |
6037 | ||
6038 | ;; Swap all bytes in each 64-bit element | |
d6126f8b JJ |
6039 | (define_insn "p9_xxbrd_v2di" |
6040 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
6041 | (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))] | |
b7d3a6a6 MM |
6042 | "TARGET_P9_VECTOR" |
6043 | "xxbrd %x0,%x1" | |
6044 | [(set_attr "type" "vecperm")]) | |
6045 | ||
d6126f8b JJ |
6046 | (define_expand "p9_xxbrd_v2df" |
6047 | [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa")) | |
6048 | (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))] | |
6049 | "TARGET_P9_VECTOR" | |
6050 | { | |
6051 | rtx op0 = gen_reg_rtx (V2DImode); | |
6052 | rtx op1 = gen_lowpart (V2DImode, operands[1]); | |
6053 | emit_insn (gen_p9_xxbrd_v2di (op0, op1)); | |
6054 | emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0)); | |
6055 | DONE; | |
6056 | }) | |
6057 | ||
b7d3a6a6 | 6058 | ;; Swap all bytes in each 32-bit element |
d6126f8b JJ |
6059 | (define_insn "p9_xxbrw_v4si" |
6060 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") | |
6061 | (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))] | |
b7d3a6a6 MM |
6062 | "TARGET_P9_VECTOR" |
6063 | "xxbrw %x0,%x1" | |
6064 | [(set_attr "type" "vecperm")]) | |
6065 | ||
d6126f8b JJ |
6066 | (define_expand "p9_xxbrw_v4sf" |
6067 | [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa")) | |
6068 | (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))] | |
6069 | "TARGET_P9_VECTOR" | |
6070 | { | |
6071 | rtx op0 = gen_reg_rtx (V4SImode); | |
6072 | rtx op1 = gen_lowpart (V4SImode, operands[1]); | |
6073 | emit_insn (gen_p9_xxbrw_v4si (op0, op1)); | |
6074 | emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0)); | |
6075 | DONE; | |
6076 | }) | |
6077 | ||
fc504349 CL |
6078 | ;; Swap all bytes in each element of vector |
6079 | (define_expand "revb_<mode>" | |
d6126f8b JJ |
6080 | [(use (match_operand:VEC_REVB 0 "vsx_register_operand")) |
6081 | (use (match_operand:VEC_REVB 1 "vsx_register_operand"))] | |
fc504349 CL |
6082 | "" |
6083 | { | |
6084 | if (TARGET_P9_VECTOR) | |
6085 | emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1])); | |
6086 | else | |
6087 | { | |
6088 | /* Want to have the elements in reverse order relative | |
6089 | to the endian mode in use, i.e. in LE mode, put elements | |
6090 | in BE order. */ | |
6091 | rtx sel = swap_endian_selector_for_mode(<MODE>mode); | |
6092 | emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], | |
6093 | operands[1], sel)); | |
6094 | } | |
6095 | ||
6096 | DONE; | |
6097 | }) | |
6098 | ||
6099 | ;; Reversing bytes in vector char is just a NOP. | |
6100 | (define_expand "revb_v16qi" | |
6101 | [(set (match_operand:V16QI 0 "vsx_register_operand") | |
6102 | (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))] | |
6103 | "" | |
6104 | { | |
6105 | emit_move_insn (operands[0], operands[1]); | |
6106 | DONE; | |
6107 | }) | |
6108 | ||
b7d3a6a6 MM |
6109 | ;; Swap all bytes in each 16-bit element |
6110 | (define_insn "p9_xxbrh_v8hi" | |
6111 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
6112 | (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))] | |
6113 | "TARGET_P9_VECTOR" | |
6114 | "xxbrh %x0,%x1" | |
6115 | [(set_attr "type" "vecperm")]) | |
6116 | \f | |
fba4b861 MM |
6117 | |
6118 | ;; Operand numbers for the following peephole2 | |
6119 | (define_constants | |
6120 | [(SFBOOL_TMP_GPR 0) ;; GPR temporary | |
6121 | (SFBOOL_TMP_VSX 1) ;; vector temporary | |
6122 | (SFBOOL_MFVSR_D 2) ;; move to gpr dest | |
6123 | (SFBOOL_MFVSR_A 3) ;; move to gpr src | |
6124 | (SFBOOL_BOOL_D 4) ;; and/ior/xor dest | |
6125 | (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1 | |
6126 | (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1 | |
6127 | (SFBOOL_SHL_D 7) ;; shift left dest | |
6128 | (SFBOOL_SHL_A 8) ;; shift left arg | |
6129 | (SFBOOL_MTVSR_D 9) ;; move to vecter dest | |
7a6ed74d MM |
6130 | (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode |
6131 | (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode | |
6132 | (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode | |
6133 | (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode | |
fba4b861 MM |
6134 | |
6135 | ;; Attempt to optimize some common GLIBC operations using logical operations to | |
6136 | ;; pick apart SFmode operations. For example, there is code from e_powf.c | |
6137 | ;; after macro expansion that looks like: | |
6138 | ;; | |
6139 | ;; typedef union { | |
6140 | ;; float value; | |
6141 | ;; uint32_t word; | |
6142 | ;; } ieee_float_shape_type; | |
6143 | ;; | |
6144 | ;; float t1; | |
6145 | ;; int32_t is; | |
6146 | ;; | |
6147 | ;; do { | |
6148 | ;; ieee_float_shape_type gf_u; | |
6149 | ;; gf_u.value = (t1); | |
6150 | ;; (is) = gf_u.word; | |
6151 | ;; } while (0); | |
6152 | ;; | |
6153 | ;; do { | |
6154 | ;; ieee_float_shape_type sf_u; | |
6155 | ;; sf_u.word = (is & 0xfffff000); | |
6156 | ;; (t1) = sf_u.value; | |
6157 | ;; } while (0); | |
6158 | ;; | |
6159 | ;; | |
6160 | ;; This would result in two direct move operations (convert to memory format, | |
6161 | ;; direct move to GPR, do the AND operation, direct move to VSX, convert to | |
6162 | ;; scalar format). With this peephole, we eliminate the direct move to the | |
6163 | ;; GPR, and instead move the integer mask value to the vector register after a | |
6164 | ;; shift and do the VSX logical operation. | |
6165 | ||
6166 | ;; The insns for dealing with SFmode in GPR registers looks like: | |
6167 | ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN)) | |
6168 | ;; | |
6169 | ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX)) | |
6170 | ;; | |
7a6ed74d | 6171 | ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3))) |
fba4b861 | 6172 | ;; |
7a6ed74d | 6173 | ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32))) |
fba4b861 | 6174 | ;; |
7a6ed74d | 6175 | ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD)) |
fba4b861 | 6176 | ;; |
7a6ed74d | 6177 | ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN)) |
fba4b861 MM |
6178 | |
6179 | (define_peephole2 | |
6180 | [(match_scratch:DI SFBOOL_TMP_GPR "r") | |
6181 | (match_scratch:V4SF SFBOOL_TMP_VSX "wa") | |
6182 | ||
7a6ed74d | 6183 | ;; MFVSRWZ (aka zero_extend) |
fba4b861 | 6184 | (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand") |
7a6ed74d MM |
6185 | (zero_extend:DI |
6186 | (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand"))) | |
fba4b861 MM |
6187 | |
6188 | ;; AND/IOR/XOR operation on int | |
6189 | (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand") | |
6190 | (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand") | |
6191 | (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand"))) | |
6192 | ||
6193 | ;; SLDI | |
6194 | (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand") | |
6195 | (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand") | |
6196 | (const_int 32))) | |
6197 | ||
6198 | ;; MTVSRD | |
6199 | (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand") | |
6200 | (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))] | |
6201 | ||
6202 | "TARGET_POWERPC64 && TARGET_DIRECT_MOVE | |
6203 | /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO | |
6204 | to compare registers, when the mode is different. */ | |
6205 | && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]) | |
6206 | && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]) | |
6207 | && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D]) | |
6208 | && (REG_P (operands[SFBOOL_BOOL_A2]) | |
6209 | || CONST_INT_P (operands[SFBOOL_BOOL_A2])) | |
6210 | && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D]) | |
7a6ed74d | 6211 | || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D])) |
fba4b861 MM |
6212 | && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1]) |
6213 | || (REG_P (operands[SFBOOL_BOOL_A2]) | |
6214 | && REGNO (operands[SFBOOL_MFVSR_D]) | |
6215 | == REGNO (operands[SFBOOL_BOOL_A2]))) | |
6216 | && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A]) | |
6217 | && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D]) | |
7a6ed74d MM |
6218 | || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D])) |
6219 | && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])" | |
fba4b861 MM |
6220 | [(set (match_dup SFBOOL_TMP_GPR) |
6221 | (ashift:DI (match_dup SFBOOL_BOOL_A_DI) | |
6222 | (const_int 32))) | |
6223 | ||
6224 | (set (match_dup SFBOOL_TMP_VSX_DI) | |
6225 | (match_dup SFBOOL_TMP_GPR)) | |
6226 | ||
6227 | (set (match_dup SFBOOL_MTVSR_D_V4SF) | |
7a6ed74d | 6228 | (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF) |
fba4b861 MM |
6229 | (match_dup SFBOOL_TMP_VSX)))] |
6230 | { | |
6231 | rtx bool_a1 = operands[SFBOOL_BOOL_A1]; | |
6232 | rtx bool_a2 = operands[SFBOOL_BOOL_A2]; | |
6233 | int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]); | |
7a6ed74d | 6234 | int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]); |
fba4b861 MM |
6235 | int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]); |
6236 | int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]); | |
6237 | ||
6238 | if (CONST_INT_P (bool_a2)) | |
6239 | { | |
6240 | rtx tmp_gpr = operands[SFBOOL_TMP_GPR]; | |
6241 | emit_move_insn (tmp_gpr, bool_a2); | |
6242 | operands[SFBOOL_BOOL_A_DI] = tmp_gpr; | |
6243 | } | |
6244 | else | |
6245 | { | |
6246 | int regno_bool_a1 = REGNO (bool_a1); | |
6247 | int regno_bool_a2 = REGNO (bool_a2); | |
6248 | int regno_bool_a = (regno_mfvsr_d == regno_bool_a1 | |
6249 | ? regno_bool_a2 : regno_bool_a1); | |
6250 | operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a); | |
6251 | } | |
6252 | ||
7a6ed74d | 6253 | operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a); |
fba4b861 MM |
6254 | operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); |
6255 | operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); | |
6256 | }) | |
51df4136 KL |
6257 | |
6258 | ;; Support signed/unsigned long long to float conversion vectorization. | |
6259 | ;; Note that any_float (pc) here is just for code attribute <su>. | |
6260 | (define_expand "vec_pack<su>_float_v2di" | |
6261 | [(match_operand:V4SF 0 "vfloat_operand") | |
6262 | (match_operand:V2DI 1 "vint_operand") | |
6263 | (match_operand:V2DI 2 "vint_operand") | |
6264 | (any_float (pc))] | |
6265 | "TARGET_VSX" | |
6266 | { | |
6267 | rtx r1 = gen_reg_rtx (V4SFmode); | |
6268 | rtx r2 = gen_reg_rtx (V4SFmode); | |
6269 | emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1])); | |
6270 | emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2])); | |
6271 | rs6000_expand_extract_even (operands[0], r1, r2); | |
6272 | DONE; | |
6273 | }) | |
6274 | ||
6275 | ;; Support float to signed/unsigned long long conversion vectorization. | |
6276 | ;; Note that any_fix (pc) here is just for code attribute <su>. | |
6277 | (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf" | |
6278 | [(match_operand:V2DI 0 "vint_operand") | |
6279 | (match_operand:V4SF 1 "vfloat_operand") | |
6280 | (any_fix (pc))] | |
6281 | "TARGET_VSX" | |
6282 | { | |
6283 | rtx reg = gen_reg_rtx (V4SFmode); | |
6284 | rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); | |
6285 | emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg)); | |
6286 | DONE; | |
6287 | }) | |
6288 | ||
6289 | ;; Note that any_fix (pc) here is just for code attribute <su>. | |
6290 | (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf" | |
6291 | [(match_operand:V2DI 0 "vint_operand") | |
6292 | (match_operand:V4SF 1 "vfloat_operand") | |
6293 | (any_fix (pc))] | |
6294 | "TARGET_VSX" | |
6295 | { | |
6296 | rtx reg = gen_reg_rtx (V4SFmode); | |
6297 | rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); | |
6298 | emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg)); | |
6299 | DONE; | |
6300 | }) | |
6301 | ||
8ee2640b PB |
6302 | (define_insn "vsx_<xvcvbf16>" |
6303 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
6304 | (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")] | |
6305 | XVCVBF16))] | |
5d9d0c94 | 6306 | "TARGET_POWER10" |
8ee2640b PB |
6307 | "<xvcvbf16> %x0,%x1" |
6308 | [(set_attr "type" "vecfloat")]) | |
02ef74ba CL |
6309 | |
6310 | (define_insn "vec_mtvsrbmi" | |
6311 | [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") | |
6312 | (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")] | |
6313 | UNSPEC_MTVSBM))] | |
6314 | "TARGET_POWER10" | |
6315 | "mtvsrbmi %0,%1" | |
6316 | ) | |
6317 | ||
6318 | (define_insn "vec_mtvsr_<mode>" | |
6319 | [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v") | |
6320 | (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")] | |
6321 | UNSPEC_MTVSBM))] | |
6322 | "TARGET_POWER10" | |
6323 | "mtvsr<wd>m %0,%1"; | |
6324 | [(set_attr "type" "vecsimple")]) | |
6325 | ||
6326 | (define_insn "vec_cntmb_<mode>" | |
6327 | [(set (match_operand:DI 0 "gpc_reg_operand" "=r") | |
6328 | (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v") | |
6329 | (match_operand:QI 2 "const_0_to_1_operand" "n")] | |
6330 | UNSPEC_VCNTMB))] | |
6331 | "TARGET_POWER10" | |
03e93e1e | 6332 | "vcntmb<wd> %0,%1,%2" |
02ef74ba CL |
6333 | [(set_attr "type" "vecsimple")]) |
6334 | ||
6335 | (define_insn "vec_extract_<mode>" | |
6336 | [(set (match_operand:SI 0 "register_operand" "=r") | |
6337 | (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")] | |
6338 | UNSPEC_VEXTRACT))] | |
6339 | "TARGET_POWER10" | |
03e93e1e | 6340 | "vextract<wd>m %0,%1" |
02ef74ba CL |
6341 | [(set_attr "type" "vecsimple")]) |
6342 | ||
6343 | (define_insn "vec_expand_<mode>" | |
6344 | [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v") | |
6345 | (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")] | |
6346 | UNSPEC_VEXPAND))] | |
6347 | "TARGET_POWER10" | |
03e93e1e | 6348 | "vexpand<wd>m %0,%1" |
02ef74ba | 6349 | [(set_attr "type" "vecsimple")]) |
f1ad419e CL |
6350 | |
6351 | (define_insn "dives_<mode>" | |
6352 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
6353 | (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v") | |
6354 | (match_operand:VIlong 2 "vsx_register_operand" "v")] | |
6355 | UNSPEC_VDIVES))] | |
6356 | "TARGET_POWER10" | |
6357 | "vdives<wd> %0,%1,%2" | |
6358 | [(set_attr "type" "vecdiv") | |
6359 | (set_attr "size" "<bits>")]) | |
6360 | ||
6361 | (define_insn "diveu_<mode>" | |
6362 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
6363 | (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v") | |
6364 | (match_operand:VIlong 2 "vsx_register_operand" "v")] | |
6365 | UNSPEC_VDIVEU))] | |
6366 | "TARGET_POWER10" | |
6367 | "vdiveu<wd> %0,%1,%2" | |
6368 | [(set_attr "type" "vecdiv") | |
6369 | (set_attr "size" "<bits>")]) | |
6370 | ||
6371 | (define_insn "div<mode>3" | |
6372 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
6373 | (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6374 | (match_operand:VIlong 2 "vsx_register_operand" "v")))] | |
6375 | "TARGET_POWER10" | |
6376 | "vdivs<wd> %0,%1,%2" | |
6377 | [(set_attr "type" "vecdiv") | |
6378 | (set_attr "size" "<bits>")]) | |
6379 | ||
6380 | (define_insn "udiv<mode>3" | |
6381 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
6382 | (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6383 | (match_operand:VIlong 2 "vsx_register_operand" "v")))] | |
6384 | "TARGET_POWER10" | |
6385 | "vdivu<wd> %0,%1,%2" | |
6386 | [(set_attr "type" "vecdiv") | |
6387 | (set_attr "size" "<bits>")]) | |
6388 | ||
062c762e | 6389 | (define_insn "mod<mode>3" |
f1ad419e CL |
6390 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") |
6391 | (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6392 | (match_operand:VIlong 2 "vsx_register_operand" "v")))] | |
6393 | "TARGET_POWER10" | |
6394 | "vmods<wd> %0,%1,%2" | |
6395 | [(set_attr "type" "vecdiv") | |
6396 | (set_attr "size" "<bits>")]) | |
6397 | ||
062c762e | 6398 | (define_insn "umod<mode>3" |
f1ad419e CL |
6399 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") |
6400 | (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6401 | (match_operand:VIlong 2 "vsx_register_operand" "v")))] | |
6402 | "TARGET_POWER10" | |
6403 | "vmodu<wd> %0,%1,%2" | |
6404 | [(set_attr "type" "vecdiv") | |
6405 | (set_attr "size" "<bits>")]) | |
6406 | ||
1c0d49b9 | 6407 | (define_insn "smul<mode>3_highpart" |
f1ad419e CL |
6408 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") |
6409 | (mult:VIlong (ashiftrt | |
6410 | (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6411 | (const_int 32)) | |
6412 | (ashiftrt | |
6413 | (match_operand:VIlong 2 "vsx_register_operand" "v") | |
6414 | (const_int 32))))] | |
6415 | "TARGET_POWER10" | |
6416 | "vmulhs<wd> %0,%1,%2" | |
6417 | [(set_attr "type" "veccomplex")]) | |
6418 | ||
1c0d49b9 | 6419 | (define_insn "umul<mode>3_highpart" |
f1ad419e CL |
6420 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") |
6421 | (us_mult:VIlong (ashiftrt | |
6422 | (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6423 | (const_int 32)) | |
6424 | (ashiftrt | |
6425 | (match_operand:VIlong 2 "vsx_register_operand" "v") | |
6426 | (const_int 32))))] | |
6427 | "TARGET_POWER10" | |
6428 | "vmulhu<wd> %0,%1,%2" | |
6429 | [(set_attr "type" "veccomplex")]) | |
6430 | ||
6431 | ;; Vector multiply low double word | |
6432 | (define_insn "mulv2di3" | |
6433 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") | |
6434 | (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v") | |
6435 | (match_operand:V2DI 2 "vsx_register_operand" "v")))] | |
6436 | "TARGET_POWER10" | |
6437 | "vmulld %0,%1,%2" | |
6438 | [(set_attr "type" "veccomplex")]) | |
d2883be3 MM |
6439 | |
6440 | \f | |
6441 | ;; XXSPLTIW built-in function support | |
6442 | (define_insn "xxspltiw_v4si" | |
6443 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
6444 | (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")] | |
6445 | UNSPEC_XXSPLTIW))] | |
6446 | "TARGET_POWER10" | |
6447 | "xxspltiw %x0,%1" | |
5973dac9 | 6448 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6449 | (set_attr "prefixed" "yes")]) |
6450 | ||
6451 | (define_expand "xxspltiw_v4sf" | |
6452 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
6453 | (unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")] | |
6454 | UNSPEC_XXSPLTIW))] | |
6455 | "TARGET_POWER10" | |
6456 | { | |
6457 | long value = rs6000_const_f32_to_i32 (operands[1]); | |
6458 | emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value))); | |
6459 | DONE; | |
6460 | }) | |
6461 | ||
6462 | (define_insn "xxspltiw_v4sf_inst" | |
6463 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
6464 | (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")] | |
6465 | UNSPEC_XXSPLTIW))] | |
6466 | "TARGET_POWER10" | |
6467 | "xxspltiw %x0,%1" | |
5973dac9 | 6468 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6469 | (set_attr "prefixed" "yes")]) |
6470 | ||
6471 | ;; XXSPLTIDP built-in function support | |
6472 | (define_expand "xxspltidp_v2df" | |
6473 | [(set (match_operand:V2DF 0 "register_operand" ) | |
6474 | (unspec:V2DF [(match_operand:SF 1 "const_double_operand")] | |
5973dac9 | 6475 | UNSPEC_XXSPLTIDP))] |
d2883be3 MM |
6476 | "TARGET_POWER10" |
6477 | { | |
6478 | long value = rs6000_const_f32_to_i32 (operands[1]); | |
6479 | rs6000_emit_xxspltidp_v2df (operands[0], value); | |
6480 | DONE; | |
6481 | }) | |
6482 | ||
949c7a09 MM |
6483 | (define_mode_iterator XXSPLTIDP [SF DF V2DF]) |
6484 | ||
6485 | (define_insn "xxspltidp_<mode>_inst" | |
6486 | [(set (match_operand:XXSPLTIDP 0 "register_operand" "=wa") | |
6487 | (unspec:XXSPLTIDP [(match_operand:SI 1 "c32bit_cint_operand" "n")] | |
6488 | UNSPEC_XXSPLTIDP))] | |
d2883be3 MM |
6489 | "TARGET_POWER10" |
6490 | "xxspltidp %x0,%1" | |
5973dac9 | 6491 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6492 | (set_attr "prefixed" "yes")]) |
6493 | ||
949c7a09 MM |
6494 | ;; Generate the XXSPLTIDP instruction to support SFmode and DFmode scalar |
6495 | ;; constants and V2DF vector constants where both elements are the same. The | |
6496 | ;; constant has to be expressible as a SFmode constant that is not a SFmode | |
6497 | ;; denormal value. | |
6498 | (define_insn_and_split "*xxspltidp_<mode>_internal" | |
6499 | [(set (match_operand:XXSPLTIDP 0 "vsx_register_operand" "=wa") | |
6500 | (match_operand:XXSPLTIDP 1 "xxspltidp_operand" "eF"))] | |
6501 | "TARGET_POWER10" | |
6502 | "#" | |
6503 | "&& 1" | |
6504 | [(set (match_operand:XXSPLTIDP 0 "vsx_register_operand") | |
6505 | (unspec:XXSPLTIDP [(match_dup 2)] UNSPEC_XXSPLTIDP))] | |
6506 | { | |
6507 | HOST_WIDE_INT value = 0; | |
6508 | ||
6509 | if (!xxspltidp_constant_p (operands[1], <MODE>mode, &value)) | |
6510 | gcc_unreachable (); | |
6511 | ||
6512 | operands[2] = GEN_INT (value); | |
6513 | } | |
6514 | [(set_attr "type" "vecperm") | |
6515 | (set_attr "prefixed" "yes")]) | |
6516 | ||
d2883be3 MM |
6517 | ;; XXSPLTI32DX built-in function support |
6518 | (define_expand "xxsplti32dx_v4si" | |
6519 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
6520 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
6521 | (match_operand:QI 2 "u1bit_cint_operand" "n") | |
6522 | (match_operand:SI 3 "s32bit_cint_operand" "n")] | |
6523 | UNSPEC_XXSPLTI32DX))] | |
6524 | "TARGET_POWER10" | |
6525 | { | |
6526 | int index = INTVAL (operands[2]); | |
6527 | ||
6528 | if (!BYTES_BIG_ENDIAN) | |
6529 | index = 1 - index; | |
6530 | ||
6531 | emit_insn (gen_xxsplti32dx_v4si_inst (operands[0], operands[1], | |
6532 | GEN_INT (index), operands[3])); | |
6533 | DONE; | |
6534 | } | |
5973dac9 | 6535 | [(set_attr "type" "vecperm")]) |
d2883be3 MM |
6536 | |
6537 | (define_insn "xxsplti32dx_v4si_inst" | |
6538 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
6539 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
6540 | (match_operand:QI 2 "u1bit_cint_operand" "n") | |
6541 | (match_operand:SI 3 "s32bit_cint_operand" "n")] | |
6542 | UNSPEC_XXSPLTI32DX))] | |
6543 | "TARGET_POWER10" | |
6544 | "xxsplti32dx %x0,%2,%3" | |
5973dac9 | 6545 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6546 | (set_attr "prefixed" "yes")]) |
6547 | ||
6548 | (define_expand "xxsplti32dx_v4sf" | |
6549 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
6550 | (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") | |
6551 | (match_operand:QI 2 "u1bit_cint_operand" "n") | |
6552 | (match_operand:SF 3 "const_double_operand" "n")] | |
6553 | UNSPEC_XXSPLTI32DX))] | |
6554 | "TARGET_POWER10" | |
6555 | { | |
6556 | int index = INTVAL (operands[2]); | |
6557 | long value = rs6000_const_f32_to_i32 (operands[3]); | |
6558 | if (!BYTES_BIG_ENDIAN) | |
6559 | index = 1 - index; | |
6560 | ||
6561 | emit_insn (gen_xxsplti32dx_v4sf_inst (operands[0], operands[1], | |
6562 | GEN_INT (index), GEN_INT (value))); | |
6563 | DONE; | |
6564 | }) | |
6565 | ||
6566 | (define_insn "xxsplti32dx_v4sf_inst" | |
6567 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
6568 | (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") | |
6569 | (match_operand:QI 2 "u1bit_cint_operand" "n") | |
6570 | (match_operand:SI 3 "s32bit_cint_operand" "n")] | |
6571 | UNSPEC_XXSPLTI32DX))] | |
6572 | "TARGET_POWER10" | |
6573 | "xxsplti32dx %x0,%2,%3" | |
5973dac9 | 6574 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6575 | (set_attr "prefixed" "yes")]) |
6576 | ||
6577 | ;; XXBLEND built-in function support | |
6578 | (define_insn "xxblend_<mode>" | |
6579 | [(set (match_operand:VM3 0 "register_operand" "=wa") | |
6580 | (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa") | |
6581 | (match_operand:VM3 2 "register_operand" "wa") | |
6582 | (match_operand:VM3 3 "register_operand" "wa")] | |
6583 | UNSPEC_XXBLEND))] | |
6584 | "TARGET_POWER10" | |
6585 | "xxblendv<VM3_char> %x0,%x1,%x2,%x3" | |
5973dac9 | 6586 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6587 | (set_attr "prefixed" "yes")]) |
6588 | ||
6589 | ;; XXPERMX built-in function support | |
6590 | (define_expand "xxpermx" | |
6591 | [(set (match_operand:V2DI 0 "register_operand" "+wa") | |
6592 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa") | |
6593 | (match_operand:V2DI 2 "register_operand" "wa") | |
6594 | (match_operand:V16QI 3 "register_operand" "wa") | |
6595 | (match_operand:QI 4 "u8bit_cint_operand" "n")] | |
6596 | UNSPEC_XXPERMX))] | |
6597 | "TARGET_POWER10" | |
6598 | { | |
6599 | if (BYTES_BIG_ENDIAN) | |
6600 | emit_insn (gen_xxpermx_inst (operands[0], operands[1], | |
6601 | operands[2], operands[3], | |
6602 | operands[4])); | |
6603 | else | |
6604 | { | |
6605 | /* Reverse value of byte element indexes by XORing with 0xFF. | |
6606 | Reverse the 32-byte section identifier match by subracting bits [0:2] | |
6607 | of elemet from 7. */ | |
6608 | int value = INTVAL (operands[4]); | |
6609 | rtx vreg = gen_reg_rtx (V16QImode); | |
6610 | ||
6611 | emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1))); | |
6612 | emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg)); | |
6613 | value = 7 - value; | |
6614 | emit_insn (gen_xxpermx_inst (operands[0], operands[2], | |
6615 | operands[1], operands[3], | |
6616 | GEN_INT (value))); | |
6617 | } | |
6618 | ||
6619 | DONE; | |
6620 | } | |
5973dac9 | 6621 | [(set_attr "type" "vecperm")]) |
d2883be3 MM |
6622 | |
6623 | (define_insn "xxpermx_inst" | |
6624 | [(set (match_operand:V2DI 0 "register_operand" "+v") | |
6625 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") | |
6626 | (match_operand:V2DI 2 "register_operand" "v") | |
6627 | (match_operand:V16QI 3 "register_operand" "v") | |
6628 | (match_operand:QI 4 "u3bit_cint_operand" "n")] | |
6629 | UNSPEC_XXPERMX))] | |
6630 | "TARGET_POWER10" | |
6631 | "xxpermx %x0,%x1,%x2,%x3,%4" | |
5973dac9 | 6632 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6633 | (set_attr "prefixed" "yes")]) |
6634 | ||
6635 | ;; XXEVAL built-in function support | |
6636 | (define_insn "xxeval" | |
6637 | [(set (match_operand:V2DI 0 "register_operand" "=wa") | |
6638 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa") | |
6639 | (match_operand:V2DI 2 "register_operand" "wa") | |
6640 | (match_operand:V2DI 3 "register_operand" "wa") | |
6641 | (match_operand:QI 4 "u8bit_cint_operand" "n")] | |
6642 | UNSPEC_XXEVAL))] | |
6643 | "TARGET_POWER10" | |
6644 | "xxeval %0,%1,%2,%3,%4" | |
5973dac9 | 6645 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6646 | (set_attr "prefixed" "yes")]) |
6647 |