]>
Commit | Line | Data |
---|---|---|
43cacb12 | 1 | ;; Machine description for AArch64 SVE. |
a5544970 | 2 | ;; Copyright (C) 2009-2019 Free Software Foundation, Inc. |
43cacb12 RS |
3 | ;; Contributed by ARM Ltd. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 3, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
915d28fe RS |
21 | ;; The file is organised into the following sections (search for the full |
22 | ;; line): | |
23 | ;; | |
24 | ;; == General notes | |
25 | ;; ---- Note on the handling of big-endian SVE | |
34467289 | 26 | ;; ---- Description of UNSPEC_PTEST |
00fa90d9 | 27 | ;; ---- Description of UNSPEC_PRED_Z |
06308276 | 28 | ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X |
c9c5a809 | 29 | ;; ---- Note on predicated FP arithmetic patterns and GP "strictness" |
915d28fe RS |
30 | ;; |
31 | ;; == Moves | |
32 | ;; ---- Moves of single vectors | |
33 | ;; ---- Moves of multiple vectors | |
34 | ;; ---- Moves of predicates | |
35 | ;; | |
36 | ;; == Loads | |
37 | ;; ---- Normal contiguous loads | |
38 | ;; ---- Normal gather loads | |
39 | ;; | |
40 | ;; == Stores | |
41 | ;; ---- Normal contiguous stores | |
42 | ;; ---- Normal scatter stores | |
43 | ;; | |
44 | ;; == Vector creation | |
45 | ;; ---- [INT,FP] Duplicate element | |
46 | ;; ---- [INT,FP] Initialize from individual elements | |
47 | ;; ---- [INT] Linear series | |
48 | ;; ---- [PRED] Duplicate element | |
49 | ;; | |
50 | ;; == Vector decomposition | |
51 | ;; ---- [INT,FP] Extract index | |
52 | ;; ---- [INT,FP] Extract active element | |
53 | ;; ---- [PRED] Extract index | |
54 | ;; | |
55 | ;; == Unary arithmetic | |
56 | ;; ---- [INT] General unary arithmetic corresponding to rtx codes | |
d7a09c44 | 57 | ;; ---- [INT] General unary arithmetic corresponding to unspecs |
d113ece6 | 58 | ;; ---- [INT] Zero extension |
e0a0be93 | 59 | ;; ---- [INT] Logical inverse |
d45b20a5 | 60 | ;; ---- [FP] General unary arithmetic corresponding to unspecs |
915d28fe RS |
61 | ;; ---- [PRED] Inverse |
62 | ||
63 | ;; == Binary arithmetic | |
64 | ;; ---- [INT] General binary arithmetic corresponding to rtx codes | |
65 | ;; ---- [INT] Addition | |
66 | ;; ---- [INT] Subtraction | |
a229966c | 67 | ;; ---- [INT] Take address |
915d28fe | 68 | ;; ---- [INT] Absolute difference |
915d28fe RS |
69 | ;; ---- [INT] Highpart multiplication |
70 | ;; ---- [INT] Division | |
71 | ;; ---- [INT] Binary logical operations | |
72 | ;; ---- [INT] Binary logical operations (inverted second input) | |
73 | ;; ---- [INT] Shifts | |
915d28fe RS |
74 | ;; ---- [FP] General binary arithmetic corresponding to rtx codes |
75 | ;; ---- [FP] General binary arithmetic corresponding to unspecs | |
76 | ;; ---- [FP] Addition | |
77 | ;; ---- [FP] Subtraction | |
78 | ;; ---- [FP] Absolute difference | |
79 | ;; ---- [FP] Multiplication | |
915d28fe RS |
80 | ;; ---- [FP] Binary logical operations |
81 | ;; ---- [FP] Sign copying | |
82 | ;; ---- [FP] Maximum and minimum | |
83 | ;; ---- [PRED] Binary logical operations | |
84 | ;; ---- [PRED] Binary logical operations (inverted second input) | |
85 | ;; ---- [PRED] Binary logical operations (inverted result) | |
86 | ;; | |
87 | ;; == Ternary arithmetic | |
88 | ;; ---- [INT] MLA and MAD | |
89 | ;; ---- [INT] MLS and MSB | |
90 | ;; ---- [INT] Dot product | |
91 | ;; ---- [INT] Sum of absolute differences | |
92 | ;; ---- [FP] General ternary arithmetic corresponding to unspecs | |
915d28fe RS |
93 | ;; |
94 | ;; == Comparisons and selects | |
95 | ;; ---- [INT,FP] Select based on predicates | |
96 | ;; ---- [INT,FP] Compare and select | |
97 | ;; ---- [INT] Comparisons | |
98 | ;; ---- [INT] While tests | |
42b4e87d RS |
99 | ;; ---- [FP] Direct comparisons |
100 | ;; ---- [FP] Absolute comparisons | |
915d28fe RS |
101 | ;; ---- [PRED] Test bits |
102 | ;; | |
103 | ;; == Reductions | |
104 | ;; ---- [INT,FP] Conditional reductions | |
105 | ;; ---- [INT] Tree reductions | |
106 | ;; ---- [FP] Tree reductions | |
107 | ;; ---- [FP] Left-to-right reductions | |
108 | ;; | |
109 | ;; == Permutes | |
110 | ;; ---- [INT,FP] General permutes | |
111 | ;; ---- [INT,FP] Special-purpose unary permutes | |
112 | ;; ---- [INT,FP] Special-purpose binary permutes | |
113 | ;; ---- [PRED] Special-purpose binary permutes | |
114 | ;; | |
115 | ;; == Conversions | |
116 | ;; ---- [INT<-INT] Packs | |
117 | ;; ---- [INT<-INT] Unpacks | |
118 | ;; ---- [INT<-FP] Conversions | |
119 | ;; ---- [INT<-FP] Packs | |
120 | ;; ---- [INT<-FP] Unpacks | |
121 | ;; ---- [FP<-INT] Conversions | |
122 | ;; ---- [FP<-INT] Packs | |
123 | ;; ---- [FP<-INT] Unpacks | |
124 | ;; ---- [FP<-FP] Packs | |
125 | ;; ---- [FP<-FP] Unpacks | |
126 | ;; ---- [PRED<-PRED] Packs | |
127 | ;; ---- [PRED<-PRED] Unpacks | |
128 | ||
129 | ;; ========================================================================= | |
130 | ;; == General notes | |
131 | ;; ========================================================================= | |
132 | ;; | |
133 | ;; ------------------------------------------------------------------------- | |
134 | ;; ---- Note on the handling of big-endian SVE | |
135 | ;; ------------------------------------------------------------------------- | |
43cacb12 RS |
136 | ;; |
137 | ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the | |
138 | ;; same way as movdi or movti would: the first byte of memory goes | |
139 | ;; into the most significant byte of the register and the last byte | |
140 | ;; of memory goes into the least significant byte of the register. | |
141 | ;; This is the most natural ordering for Advanced SIMD and matches | |
142 | ;; the ABI layout for 64-bit and 128-bit vector types. | |
143 | ;; | |
144 | ;; As a result, the order of bytes within the register is what GCC | |
145 | ;; expects for a big-endian target, and subreg offsets therefore work | |
146 | ;; as expected, with the first element in memory having subreg offset 0 | |
147 | ;; and the last element in memory having the subreg offset associated | |
148 | ;; with a big-endian lowpart. However, this ordering also means that | |
149 | ;; GCC's lane numbering does not match the architecture's numbering: | |
150 | ;; GCC always treats the element at the lowest address in memory | |
151 | ;; (subreg offset 0) as element 0, while the architecture treats | |
152 | ;; the least significant end of the register as element 0. | |
153 | ;; | |
154 | ;; The situation for SVE is different. We want the layout of the | |
155 | ;; SVE register to be same for mov<mode> as it is for maskload<mode>: | |
156 | ;; logically, a mov<mode> load must be indistinguishable from a | |
157 | ;; maskload<mode> whose mask is all true. We therefore need the | |
158 | ;; register layout to match LD1 rather than LDR. The ABI layout of | |
159 | ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. | |
160 | ;; | |
161 | ;; As a result, the architecture lane numbering matches GCC's lane | |
162 | ;; numbering, with element 0 always being the first in memory. | |
163 | ;; However: | |
164 | ;; | |
165 | ;; - Applying a subreg offset to a register does not give the element | |
166 | ;; that GCC expects: the first element in memory has the subreg offset | |
167 | ;; associated with a big-endian lowpart while the last element in memory | |
168 | ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. | |
169 | ;; | |
170 | ;; - We cannot use LDR and STR for spill slots that might be accessed | |
171 | ;; via subregs, since although the elements have the order GCC expects, | |
172 | ;; the order of the bytes within the elements is different. We instead | |
173 | ;; access spill slots via LD1 and ST1, using secondary reloads to | |
174 | ;; reserve a predicate register. | |
34467289 RS |
175 | ;; |
176 | ;; ------------------------------------------------------------------------- | |
177 | ;; ---- Description of UNSPEC_PTEST | |
178 | ;; ------------------------------------------------------------------------- | |
179 | ;; | |
180 | ;; SVE provides a PTEST instruction for testing the active lanes of a | |
181 | ;; predicate and setting the flags based on the result. The associated | |
182 | ;; condition code tests are: | |
183 | ;; | |
184 | ;; - any (= ne): at least one active bit is set | |
185 | ;; - none (= eq): all active bits are clear (*) | |
186 | ;; - first (= mi): the first active bit is set | |
187 | ;; - nfrst (= pl): the first active bit is clear (*) | |
188 | ;; - last (= cc): the last active bit is set | |
189 | ;; - nlast (= cs): the last active bit is clear (*) | |
190 | ;; | |
191 | ;; where the conditions marked (*) are also true when there are no active | |
192 | ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results | |
193 | ;; of a PTEST use the condition code mode CC_NZC. | |
194 | ;; | |
195 | ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). | |
196 | ;; This means that for other predicate modes, we need a governing predicate | |
197 | ;; in which all bits are defined. | |
198 | ;; | |
199 | ;; For example, most predicated .H operations ignore the odd bits of the | |
200 | ;; governing predicate, so that an active lane is represented by the | |
201 | ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be | |
202 | ;; any value. To test a .H predicate, we instead need "10" and "00" | |
203 | ;; respectively, so that the condition only tests the even bits of the | |
204 | ;; predicate. | |
205 | ;; | |
206 | ;; Several instructions set the flags as a side-effect, in the same way | |
207 | ;; that a separate PTEST would. It's important for code quality that we | |
208 | ;; use these flags results as often as possible, particularly in the case | |
209 | ;; of WHILE* and RDFFR. | |
210 | ;; | |
211 | ;; Also, some of the instructions that set the flags are unpredicated | |
212 | ;; and instead implicitly test all .B, .H, .S or .D elements, as though | |
213 | ;; they were predicated on a PTRUE of that size. For example, a .S | |
214 | ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE | |
215 | ;; would. | |
216 | ;; | |
217 | ;; We therefore need to represent PTEST operations in a way that | |
218 | ;; makes it easy to combine them with both predicated and unpredicated | |
219 | ;; operations, while using a VNx16BI governing predicate for all | |
220 | ;; predicate modes. We do this using: | |
221 | ;; | |
222 | ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) | |
223 | ;; | |
224 | ;; where: | |
225 | ;; | |
226 | ;; - GP is the real VNx16BI governing predicate | |
227 | ;; | |
228 | ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting | |
229 | ;; GP to CAST_GP are guaranteed to be clear in GP. | |
230 | ;; | |
231 | ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value | |
232 | ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and | |
233 | ;; SVE_MAYBE_NOT_PTRUE otherwise. | |
234 | ;; | |
235 | ;; - OP is the predicate we want to test, of the same mode as CAST_GP. | |
c9c5a809 RS |
236 | ;; |
237 | ;; ------------------------------------------------------------------------- | |
00fa90d9 RS |
238 | ;; ---- Description of UNSPEC_PRED_Z |
239 | ;; ------------------------------------------------------------------------- | |
240 | ;; | |
241 | ;; SVE integer comparisons are predicated and return zero for inactive | |
242 | ;; lanes. Sometimes we use them with predicates that are all-true and | |
243 | ;; sometimes we use them with general predicates. | |
244 | ;; | |
245 | ;; The integer comparisons also set the flags and so build-in the effect | |
246 | ;; of a PTEST. We therefore want to be able to combine integer comparison | |
247 | ;; patterns with PTESTs of the result. One difficulty with doing this is | |
248 | ;; that (as noted above) the PTEST is always a .B operation and so can place | |
249 | ;; stronger requirements on the governing predicate than the comparison does. | |
250 | ;; | |
251 | ;; For example, when applying a separate PTEST to the result of a full-vector | |
252 | ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a | |
253 | ;; .B PTRUE. In constrast, the comparison might be predicated on either | |
254 | ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate | |
255 | ;; bits don't matter for .H operations. | |
256 | ;; | |
257 | ;; We therefore can't rely on a full-vector comparison using the same | |
258 | ;; predicate register as a following PTEST. We instead need to remember | |
259 | ;; whether a comparison is known to be a full-vector comparison and use | |
260 | ;; this information in addition to a check for equal predicate registers. | |
261 | ;; At the same time, it's useful to have a common representation for all | |
262 | ;; integer comparisons, so that they can be handled by a single set of | |
263 | ;; patterns. | |
264 | ;; | |
265 | ;; We therefore take a similar approach to UNSPEC_PTEST above and use: | |
266 | ;; | |
267 | ;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z) | |
268 | ;; | |
269 | ;; where: | |
270 | ;; | |
271 | ;; - GP is the governing predicate, of mode <M:VPRED> | |
272 | ;; | |
273 | ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value | |
274 | ;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE | |
275 | ;; otherwise | |
276 | ;; | |
277 | ;; - CODE is the comparison code | |
278 | ;; | |
279 | ;; - OP0 and OP1 are the values being compared, of mode M | |
280 | ;; | |
281 | ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero. | |
282 | ;; | |
283 | ;; ------------------------------------------------------------------------- | |
06308276 RS |
284 | ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X |
285 | ;; ------------------------------------------------------------------------- | |
286 | ;; | |
287 | ;; Many SVE integer operations are predicated. We can generate them | |
288 | ;; from four sources: | |
289 | ;; | |
290 | ;; (1) Using normal unpredicated optabs. In this case we need to create | |
291 | ;; an all-true predicate register to act as the governing predicate | |
292 | ;; for the SVE instruction. There are no inactive lanes, and thus | |
293 | ;; the values of inactive lanes don't matter. | |
294 | ;; | |
295 | ;; (2) Using _x ACLE functions. In this case the function provides a | |
296 | ;; specific predicate and some lanes might be inactive. However, | |
297 | ;; as for (1), the values of the inactive lanes don't matter. | |
298 | ;; We can make extra lanes active without changing the behavior | |
299 | ;; (although for code-quality reasons we should avoid doing so | |
300 | ;; needlessly). | |
301 | ;; | |
302 | ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. | |
303 | ;; These optabs have a predicate operand that specifies which lanes are | |
304 | ;; active and another operand that provides the values of inactive lanes. | |
305 | ;; | |
306 | ;; (4) Using _m and _z ACLE functions. These functions map to the same | |
307 | ;; patterns as (3), with the _z functions setting inactive lanes to zero | |
308 | ;; and the _m functions setting the inactive lanes to one of the function | |
309 | ;; arguments. | |
310 | ;; | |
311 | ;; For (1) and (2) we need a way of attaching the predicate to a normal | |
312 | ;; unpredicated integer operation. We do this using: | |
313 | ;; | |
314 | ;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X) | |
315 | ;; | |
316 | ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED | |
317 | ;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE; | |
318 | ;; it always is for (1), but might not be for (2). | |
319 | ;; | |
320 | ;; The unspec as a whole has the same value as (code:M ...) when PRED is | |
321 | ;; all-true. It is always semantically valid to replace PRED with a PTRUE, | |
322 | ;; but as noted above, we should only do so if there's a specific benefit. | |
323 | ;; | |
324 | ;; (The "_X" in the unspec is named after the ACLE functions in (2).) | |
325 | ;; | |
326 | ;; For (3) and (4) we can simply use the SVE port's normal representation | |
327 | ;; of a predicate-based select: | |
328 | ;; | |
329 | ;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL) | |
330 | ;; | |
331 | ;; where INACTIVE specifies the values of inactive lanes. | |
332 | ;; | |
333 | ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather | |
334 | ;; than inserting the integer operation directly. This is mostly useful | |
335 | ;; if we want the combine pass to merge an integer operation with an explicit | |
336 | ;; vcond_mask (in other words, with a following SEL instruction). However, | |
337 | ;; it's generally better to merge such operations at the gimple level | |
338 | ;; using (3). | |
339 | ;; | |
340 | ;; ------------------------------------------------------------------------- | |
c9c5a809 RS |
341 | ;; ---- Note on predicated FP arithmetic patterns and GP "strictness" |
342 | ;; ------------------------------------------------------------------------- | |
343 | ;; | |
344 | ;; Most SVE floating-point operations are predicated. We can generate | |
345 | ;; them from four sources: | |
346 | ;; | |
347 | ;; (1) Using normal unpredicated optabs. In this case we need to create | |
348 | ;; an all-true predicate register to act as the governing predicate | |
349 | ;; for the SVE instruction. There are no inactive lanes, and thus | |
350 | ;; the values of inactive lanes don't matter. | |
351 | ;; | |
352 | ;; (2) Using _x ACLE functions. In this case the function provides a | |
353 | ;; specific predicate and some lanes might be inactive. However, | |
354 | ;; as for (1), the values of the inactive lanes don't matter. | |
355 | ;; | |
356 | ;; The instruction must have the same exception behavior as the | |
357 | ;; function call unless things like command-line flags specifically | |
358 | ;; allow otherwise. For example, with -ffast-math, it is OK to | |
359 | ;; raise exceptions for inactive lanes, but normally it isn't. | |
360 | ;; | |
361 | ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. | |
362 | ;; These optabs have a predicate operand that specifies which lanes are | |
363 | ;; active and another operand that provides the values of inactive lanes. | |
364 | ;; | |
365 | ;; (4) Using _m and _z ACLE functions. These functions map to the same | |
366 | ;; patterns as (3), with the _z functions setting inactive lanes to zero | |
367 | ;; and the _m functions setting the inactive lanes to one of the function | |
368 | ;; arguments. | |
369 | ;; | |
370 | ;; So: | |
371 | ;; | |
372 | ;; - In (1), the predicate is known to be all true and the pattern can use | |
373 | ;; unpredicated operations where available. | |
374 | ;; | |
375 | ;; - In (2), the predicate might or might not be all true. The pattern can | |
376 | ;; use unpredicated instructions if the predicate is all-true or if things | |
377 | ;; like command-line flags allow exceptions for inactive lanes. | |
378 | ;; | |
379 | ;; - (3) and (4) represent a native SVE predicated operation. Some lanes | |
380 | ;; might be inactive and inactive lanes of the result must have specific | |
381 | ;; values. There is no scope for using unpredicated instructions (and no | |
382 | ;; reason to want to), so the question about command-line flags doesn't | |
383 | ;; arise. | |
384 | ;; | |
385 | ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...) | |
386 | ;; in combination with a separate predicate operand, e.g. | |
387 | ;; | |
388 | ;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
389 | ;; (sqrt:SVE_F 2 "register_operand" "w")] | |
390 | ;; ....) | |
391 | ;; | |
392 | ;; because (sqrt ...) can raise an exception for any lane, including | |
393 | ;; inactive ones. We therefore need to use an unspec instead. | |
394 | ;; | |
395 | ;; Also, (2) requires some way of distinguishing the case in which the | |
396 | ;; predicate might have inactive lanes and cannot be changed from the | |
397 | ;; case in which the predicate has no inactive lanes or can be changed. | |
398 | ;; This information is also useful when matching combined FP patterns | |
399 | ;; in which the predicates might not be equal. | |
400 | ;; | |
401 | ;; We therefore model FP operations as an unspec of the form: | |
402 | ;; | |
403 | ;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>) | |
404 | ;; | |
405 | ;; where: | |
406 | ;; | |
407 | ;; - PRED is the governing predicate. | |
408 | ;; | |
409 | ;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the | |
410 | ;; value SVE_STRICT_GP if PRED might have inactive lanes and if those | |
411 | ;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise. | |
412 | ;; | |
413 | ;; - OP0 OP1 ... are the normal input operands to the operation. | |
414 | ;; | |
415 | ;; - MNEMONIC is the mnemonic of the associated SVE instruction. | |
43cacb12 | 416 | |
915d28fe RS |
417 | ;; ========================================================================= |
418 | ;; == Moves | |
419 | ;; ========================================================================= | |
420 | ||
421 | ;; ------------------------------------------------------------------------- | |
422 | ;; ---- Moves of single vectors | |
423 | ;; ------------------------------------------------------------------------- | |
424 | ;; Includes: | |
425 | ;; - MOV (including aliases) | |
426 | ;; - LD1B (contiguous form) | |
427 | ;; - LD1D ( " " ) | |
428 | ;; - LD1H ( " " ) | |
429 | ;; - LD1W ( " " ) | |
430 | ;; - LDR | |
431 | ;; - ST1B (contiguous form) | |
432 | ;; - ST1D ( " " ) | |
433 | ;; - ST1H ( " " ) | |
434 | ;; - ST1W ( " " ) | |
435 | ;; - STR | |
436 | ;; ------------------------------------------------------------------------- | |
437 | ||
43cacb12 RS |
438 | (define_expand "mov<mode>" |
439 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
440 | (match_operand:SVE_ALL 1 "general_operand"))] | |
441 | "TARGET_SVE" | |
442 | { | |
443 | /* Use the predicated load and store patterns where possible. | |
444 | This is required for big-endian targets (see the comment at the | |
445 | head of the file) and increases the addressing choices for | |
446 | little-endian. */ | |
447 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
ea403d8b | 448 | && can_create_pseudo_p ()) |
43cacb12 RS |
449 | { |
450 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
451 | DONE; | |
452 | } | |
453 | ||
454 | if (CONSTANT_P (operands[1])) | |
455 | { | |
4aeb1ba7 | 456 | aarch64_expand_mov_immediate (operands[0], operands[1]); |
43cacb12 RS |
457 | DONE; |
458 | } | |
002092be RS |
459 | |
460 | /* Optimize subregs on big-endian targets: we can use REV[BHW] | |
461 | instead of going through memory. */ | |
462 | if (BYTES_BIG_ENDIAN | |
ea403d8b | 463 | && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) |
002092be RS |
464 | DONE; |
465 | } | |
466 | ) | |
467 | ||
915d28fe RS |
468 | (define_expand "movmisalign<mode>" |
469 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
470 | (match_operand:SVE_ALL 1 "general_operand"))] | |
471 | "TARGET_SVE" | |
002092be | 472 | { |
915d28fe RS |
473 | /* Equivalent to a normal move for our purpooses. */ |
474 | emit_move_insn (operands[0], operands[1]); | |
002092be | 475 | DONE; |
43cacb12 RS |
476 | } |
477 | ) | |
478 | ||
479 | ;; Unpredicated moves (little-endian). Only allow memory operations | |
480 | ;; during and after RA; before RA we want the predicated load and | |
481 | ;; store patterns to be used instead. | |
482 | (define_insn "*aarch64_sve_mov<mode>_le" | |
483 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
484 | (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
485 | "TARGET_SVE | |
486 | && !BYTES_BIG_ENDIAN | |
487 | && ((lra_in_progress || reload_completed) | |
488 | || (register_operand (operands[0], <MODE>mode) | |
489 | && nonmemory_operand (operands[1], <MODE>mode)))" | |
490 | "@ | |
491 | ldr\t%0, %1 | |
492 | str\t%1, %0 | |
493 | mov\t%0.d, %1.d | |
494 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
495 | ) | |
496 | ||
497 | ;; Unpredicated moves (big-endian). Memory accesses require secondary | |
498 | ;; reloads. | |
499 | (define_insn "*aarch64_sve_mov<mode>_be" | |
500 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
501 | (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
502 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
503 | "@ | |
504 | mov\t%0.d, %1.d | |
505 | * return aarch64_output_sve_mov_immediate (operands[1]);" | |
506 | ) | |
507 | ||
508 | ;; Handle big-endian memory reloads. We use byte PTRUE for all modes | |
509 | ;; to try to encourage reuse. | |
1bbffb87 | 510 | ;; This pattern needs constraints due to TARGET_SECONDARY_RELOAD hook. |
43cacb12 RS |
511 | (define_expand "aarch64_sve_reload_be" |
512 | [(parallel | |
513 | [(set (match_operand 0) | |
ea403d8b | 514 | (match_operand 1)) |
43cacb12 RS |
515 | (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] |
516 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
517 | { | |
518 | /* Create a PTRUE. */ | |
519 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
520 | ||
521 | /* Refer to the PTRUE in the appropriate mode for this move. */ | |
522 | machine_mode mode = GET_MODE (operands[0]); | |
523 | machine_mode pred_mode | |
524 | = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require (); | |
525 | rtx pred = gen_lowpart (pred_mode, operands[2]); | |
526 | ||
527 | /* Emit a predicated load or store. */ | |
528 | aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); | |
529 | DONE; | |
530 | } | |
531 | ) | |
532 | ||
915d28fe RS |
533 | ;; A predicated move in which the predicate is known to be all-true. |
534 | ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move, | |
535 | ;; so changes to this pattern will need changes there as well. | |
0c63a8ee | 536 | (define_insn_and_split "@aarch64_pred_mov<mode>" |
9c6b4601 | 537 | [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m") |
43cacb12 | 538 | (unspec:SVE_ALL |
9c6b4601 RS |
539 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
540 | (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")] | |
06308276 | 541 | UNSPEC_PRED_X))] |
43cacb12 RS |
542 | "TARGET_SVE |
543 | && (register_operand (operands[0], <MODE>mode) | |
544 | || register_operand (operands[2], <MODE>mode))" | |
545 | "@ | |
9c6b4601 | 546 | # |
43cacb12 RS |
547 | ld1<Vesize>\t%0.<Vetype>, %1/z, %2 |
548 | st1<Vesize>\t%2.<Vetype>, %1, %0" | |
9c6b4601 RS |
549 | "&& register_operand (operands[0], <MODE>mode) |
550 | && register_operand (operands[2], <MODE>mode)" | |
551 | [(set (match_dup 0) (match_dup 2))] | |
43cacb12 RS |
552 | ) |
553 | ||
915d28fe RS |
554 | ;; A pattern for optimizing SUBREGs that have a reinterpreting effect |
555 | ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move | |
556 | ;; for details. We use a special predicate for operand 2 to reduce | |
557 | ;; the number of patterns. | |
558 | (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" | |
559 | [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") | |
43cacb12 | 560 | (unspec:SVE_ALL |
915d28fe RS |
561 | [(match_operand:VNx16BI 1 "register_operand" "Upl") |
562 | (match_operand 2 "aarch64_any_register_operand" "w")] | |
563 | UNSPEC_REV_SUBREG))] | |
564 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
565 | "#" | |
566 | "&& reload_completed" | |
567 | [(const_int 0)] | |
f307441a | 568 | { |
915d28fe RS |
569 | aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); |
570 | DONE; | |
f307441a RS |
571 | } |
572 | ) | |
573 | ||
4aeb1ba7 RS |
574 | ;; Reinterpret operand 1 in operand 0's mode, without changing its contents. |
575 | ;; This is equivalent to a subreg on little-endian targets but not for | |
576 | ;; big-endian; see the comment at the head of the file for details. | |
577 | (define_expand "@aarch64_sve_reinterpret<mode>" | |
578 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
579 | (unspec:SVE_ALL [(match_operand 1 "aarch64_any_register_operand")] | |
580 | UNSPEC_REINTERPRET))] | |
581 | "TARGET_SVE" | |
582 | { | |
583 | if (!BYTES_BIG_ENDIAN) | |
584 | { | |
585 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1])); | |
586 | DONE; | |
587 | } | |
588 | } | |
589 | ) | |
590 | ||
591 | ;; A pattern for handling type punning on big-endian targets. We use a | |
592 | ;; special predicate for operand 1 to reduce the number of patterns. | |
593 | (define_insn_and_split "*aarch64_sve_reinterpret<mode>" | |
594 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
595 | (unspec:SVE_ALL [(match_operand 1 "aarch64_any_register_operand" "0")] | |
596 | UNSPEC_REINTERPRET))] | |
597 | "TARGET_SVE" | |
598 | "#" | |
599 | "&& reload_completed" | |
600 | [(set (match_dup 0) (match_dup 1))] | |
601 | { | |
602 | emit_note (NOTE_INSN_DELETED); | |
603 | DONE; | |
604 | } | |
605 | ) | |
606 | ||
915d28fe RS |
607 | ;; ------------------------------------------------------------------------- |
608 | ;; ---- Moves of multiple vectors | |
609 | ;; ------------------------------------------------------------------------- | |
610 | ;; All patterns in this section are synthetic and split to real | |
611 | ;; instructions after reload. | |
612 | ;; ------------------------------------------------------------------------- | |
f307441a | 613 | |
9f4cbab8 RS |
614 | (define_expand "mov<mode>" |
615 | [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") | |
616 | (match_operand:SVE_STRUCT 1 "general_operand"))] | |
617 | "TARGET_SVE" | |
618 | { | |
619 | /* Big-endian loads and stores need to be done via LD1 and ST1; | |
620 | see the comment at the head of the file for details. */ | |
621 | if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
622 | && BYTES_BIG_ENDIAN) | |
623 | { | |
624 | gcc_assert (can_create_pseudo_p ()); | |
625 | aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
626 | DONE; | |
627 | } | |
628 | ||
629 | if (CONSTANT_P (operands[1])) | |
630 | { | |
631 | aarch64_expand_mov_immediate (operands[0], operands[1]); | |
632 | DONE; | |
633 | } | |
634 | } | |
635 | ) | |
636 | ||
637 | ;; Unpredicated structure moves (little-endian). | |
638 | (define_insn "*aarch64_sve_mov<mode>_le" | |
639 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
640 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
641 | "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
642 | "#" | |
643 | [(set_attr "length" "<insn_length>")] | |
644 | ) | |
645 | ||
646 | ;; Unpredicated structure moves (big-endian). Memory accesses require | |
647 | ;; secondary reloads. | |
915d28fe | 648 | (define_insn "*aarch64_sve_mov<mode>_be" |
9f4cbab8 RS |
649 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") |
650 | (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
651 | "TARGET_SVE && BYTES_BIG_ENDIAN" | |
652 | "#" | |
653 | [(set_attr "length" "<insn_length>")] | |
654 | ) | |
655 | ||
656 | ;; Split unpredicated structure moves into pieces. This is the same | |
657 | ;; for both big-endian and little-endian code, although it only needs | |
658 | ;; to handle memory operands for little-endian code. | |
659 | (define_split | |
660 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") | |
661 | (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] | |
662 | "TARGET_SVE && reload_completed" | |
663 | [(const_int 0)] | |
664 | { | |
665 | rtx dest = operands[0]; | |
666 | rtx src = operands[1]; | |
667 | if (REG_P (dest) && REG_P (src)) | |
668 | aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); | |
669 | else | |
670 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
671 | { | |
672 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, | |
673 | i * BYTES_PER_SVE_VECTOR); | |
674 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, | |
675 | i * BYTES_PER_SVE_VECTOR); | |
676 | emit_insn (gen_rtx_SET (subdest, subsrc)); | |
677 | } | |
678 | DONE; | |
679 | } | |
680 | ) | |
681 | ||
682 | ;; Predicated structure moves. This works for both endiannesses but in | |
683 | ;; practice is only useful for big-endian. | |
0c63a8ee | 684 | (define_insn_and_split "@aarch64_pred_mov<mode>" |
9c6b4601 | 685 | [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx") |
9f4cbab8 | 686 | (unspec:SVE_STRUCT |
9c6b4601 RS |
687 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
688 | (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")] | |
06308276 | 689 | UNSPEC_PRED_X))] |
9f4cbab8 RS |
690 | "TARGET_SVE |
691 | && (register_operand (operands[0], <MODE>mode) | |
692 | || register_operand (operands[2], <MODE>mode))" | |
693 | "#" | |
694 | "&& reload_completed" | |
695 | [(const_int 0)] | |
696 | { | |
697 | for (unsigned int i = 0; i < <vector_count>; ++i) | |
698 | { | |
699 | rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], | |
700 | <MODE>mode, | |
701 | i * BYTES_PER_SVE_VECTOR); | |
702 | rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], | |
703 | <MODE>mode, | |
704 | i * BYTES_PER_SVE_VECTOR); | |
705 | aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); | |
706 | } | |
707 | DONE; | |
708 | } | |
709 | [(set_attr "length" "<insn_length>")] | |
710 | ) | |
711 | ||
915d28fe RS |
712 | ;; ------------------------------------------------------------------------- |
713 | ;; ---- Moves of predicates | |
714 | ;; ------------------------------------------------------------------------- | |
715 | ;; Includes: | |
716 | ;; - MOV | |
717 | ;; - LDR | |
718 | ;; - PFALSE | |
719 | ;; - PTRUE | |
720 | ;; - STR | |
721 | ;; ------------------------------------------------------------------------- | |
722 | ||
43cacb12 RS |
723 | (define_expand "mov<mode>" |
724 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") | |
725 | (match_operand:PRED_ALL 1 "general_operand"))] | |
726 | "TARGET_SVE" | |
727 | { | |
728 | if (GET_CODE (operands[0]) == MEM) | |
729 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
0b1fe8cf RS |
730 | |
731 | if (CONSTANT_P (operands[1])) | |
732 | { | |
733 | aarch64_expand_mov_immediate (operands[0], operands[1]); | |
734 | DONE; | |
735 | } | |
43cacb12 RS |
736 | } |
737 | ) | |
738 | ||
739 | (define_insn "*aarch64_sve_mov<mode>" | |
1044fa32 | 740 | [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa") |
0b1fe8cf | 741 | (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))] |
43cacb12 RS |
742 | "TARGET_SVE |
743 | && (register_operand (operands[0], <MODE>mode) | |
744 | || register_operand (operands[1], <MODE>mode))" | |
745 | "@ | |
746 | mov\t%0.b, %1.b | |
747 | str\t%1, %0 | |
748 | ldr\t%0, %1 | |
1044fa32 | 749 | * return aarch64_output_sve_mov_immediate (operands[1]);" |
43cacb12 RS |
750 | ) |
751 | ||
915d28fe RS |
752 | ;; ========================================================================= |
753 | ;; == Loads | |
754 | ;; ========================================================================= | |
755 | ||
756 | ;; ------------------------------------------------------------------------- | |
757 | ;; ---- Normal contiguous loads | |
758 | ;; ------------------------------------------------------------------------- | |
759 | ;; Includes contiguous forms of: | |
760 | ;; - LD1B | |
761 | ;; - LD1D | |
762 | ;; - LD1H | |
763 | ;; - LD1W | |
764 | ;; - LD2B | |
765 | ;; - LD2D | |
766 | ;; - LD2H | |
767 | ;; - LD2W | |
768 | ;; - LD3B | |
769 | ;; - LD3D | |
770 | ;; - LD3H | |
771 | ;; - LD3W | |
772 | ;; - LD4B | |
773 | ;; - LD4D | |
774 | ;; - LD4H | |
775 | ;; - LD4W | |
776 | ;; ------------------------------------------------------------------------- | |
777 | ||
778 | ;; Predicated LD1. | |
779 | (define_insn "maskload<mode><vpred>" | |
780 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
781 | (unspec:SVE_ALL | |
782 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
783 | (match_operand:SVE_ALL 1 "memory_operand" "m")] | |
784 | UNSPEC_LD1_SVE))] | |
43cacb12 | 785 | "TARGET_SVE" |
915d28fe | 786 | "ld1<Vesize>\t%0.<Vetype>, %2/z, %1" |
43cacb12 RS |
787 | ) |
788 | ||
915d28fe RS |
789 | ;; Unpredicated LD[234]. |
790 | (define_expand "vec_load_lanes<mode><vsingle>" | |
791 | [(set (match_operand:SVE_STRUCT 0 "register_operand") | |
792 | (unspec:SVE_STRUCT | |
793 | [(match_dup 2) | |
794 | (match_operand:SVE_STRUCT 1 "memory_operand")] | |
795 | UNSPEC_LDN))] | |
43cacb12 RS |
796 | "TARGET_SVE" |
797 | { | |
915d28fe | 798 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
799 | } |
800 | ) | |
801 | ||
915d28fe RS |
802 | ;; Predicated LD[234]. |
803 | (define_insn "vec_mask_load_lanes<mode><vsingle>" | |
804 | [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") | |
805 | (unspec:SVE_STRUCT | |
806 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
807 | (match_operand:SVE_STRUCT 1 "memory_operand" "m")] | |
808 | UNSPEC_LDN))] | |
8711e791 | 809 | "TARGET_SVE" |
915d28fe | 810 | "ld<vector_count><Vesize>\t%0, %2/z, %1" |
8711e791 RS |
811 | ) |
812 | ||
915d28fe RS |
813 | ;; ------------------------------------------------------------------------- |
814 | ;; ---- Normal gather loads | |
815 | ;; ------------------------------------------------------------------------- | |
816 | ;; Includes gather forms of: | |
817 | ;; - LD1D | |
818 | ;; - LD1W | |
819 | ;; ------------------------------------------------------------------------- | |
820 | ||
821 | ;; Unpredicated gather loads. | |
822 | (define_expand "gather_load<mode>" | |
823 | [(set (match_operand:SVE_SD 0 "register_operand") | |
824 | (unspec:SVE_SD | |
825 | [(match_dup 5) | |
826 | (match_operand:DI 1 "aarch64_reg_or_zero") | |
827 | (match_operand:<V_INT_EQUIV> 2 "register_operand") | |
828 | (match_operand:DI 3 "const_int_operand") | |
829 | (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") | |
830 | (mem:BLK (scratch))] | |
831 | UNSPEC_LD1_GATHER))] | |
832 | "TARGET_SVE" | |
43cacb12 | 833 | { |
915d28fe | 834 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 | 835 | } |
43cacb12 RS |
836 | ) |
837 | ||
915d28fe RS |
838 | ;; Predicated gather loads for 32-bit elements. Operand 3 is true for |
839 | ;; unsigned extension and false for signed extension. | |
840 | (define_insn "mask_gather_load<mode>" | |
841 | [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w") | |
842 | (unspec:SVE_S | |
843 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
844 | (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
845 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w") | |
846 | (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
847 | (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
848 | (mem:BLK (scratch))] | |
849 | UNSPEC_LD1_GATHER))] | |
850 | "TARGET_SVE" | |
851 | "@ | |
852 | ld1w\t%0.s, %5/z, [%2.s] | |
853 | ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] | |
854 | ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] | |
855 | ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
856 | ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
857 | ) | |
858 | ||
859 | ;; Predicated gather loads for 64-bit elements. The value of operand 3 | |
860 | ;; doesn't matter in this case. | |
861 | (define_insn "mask_gather_load<mode>" | |
862 | [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w") | |
863 | (unspec:SVE_D | |
864 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
865 | (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk") | |
866 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w") | |
867 | (match_operand:DI 3 "const_int_operand") | |
868 | (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
869 | (mem:BLK (scratch))] | |
870 | UNSPEC_LD1_GATHER))] | |
871 | "TARGET_SVE" | |
872 | "@ | |
873 | ld1d\t%0.d, %5/z, [%2.d] | |
874 | ld1d\t%0.d, %5/z, [%1, %2.d] | |
875 | ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
876 | ) | |
877 | ||
878 | ;; ========================================================================= | |
879 | ;; == Stores | |
880 | ;; ========================================================================= | |
881 | ||
882 | ;; ------------------------------------------------------------------------- | |
883 | ;; ---- Normal contiguous stores | |
884 | ;; ------------------------------------------------------------------------- | |
885 | ;; Includes contiguous forms of: | |
886 | ;; - ST1B | |
887 | ;; - ST1D | |
888 | ;; - ST1H | |
889 | ;; - ST1W | |
890 | ;; - ST2B | |
891 | ;; - ST2D | |
892 | ;; - ST2H | |
893 | ;; - ST2W | |
894 | ;; - ST3B | |
895 | ;; - ST3D | |
896 | ;; - ST3H | |
897 | ;; - ST3W | |
898 | ;; - ST4B | |
899 | ;; - ST4D | |
900 | ;; - ST4H | |
901 | ;; - ST4W | |
902 | ;; ------------------------------------------------------------------------- | |
903 | ||
904 | ;; Predicated ST1. | |
905 | (define_insn "maskstore<mode><vpred>" | |
906 | [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") | |
907 | (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
908 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
909 | (match_dup 0)] | |
910 | UNSPEC_ST1_SVE))] | |
911 | "TARGET_SVE" | |
912 | "st1<Vesize>\t%1.<Vetype>, %2, %0" | |
913 | ) | |
914 | ||
915 | ;; Unpredicated ST[234]. This is always a full update, so the dependence | |
916 | ;; on the old value of the memory location (via (match_dup 0)) is redundant. | |
917 | ;; There doesn't seem to be any obvious benefit to treating the all-true | |
918 | ;; case differently though. In particular, it's very unlikely that we'll | |
919 | ;; only find out during RTL that a store_lanes is dead. | |
920 | (define_expand "vec_store_lanes<mode><vsingle>" | |
921 | [(set (match_operand:SVE_STRUCT 0 "memory_operand") | |
922 | (unspec:SVE_STRUCT | |
923 | [(match_dup 2) | |
924 | (match_operand:SVE_STRUCT 1 "register_operand") | |
925 | (match_dup 0)] | |
926 | UNSPEC_STN))] | |
927 | "TARGET_SVE" | |
43cacb12 | 928 | { |
915d28fe | 929 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
930 | } |
931 | ) | |
932 | ||
915d28fe RS |
933 | ;; Predicated ST[234]. |
934 | (define_insn "vec_mask_store_lanes<mode><vsingle>" | |
935 | [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") | |
936 | (unspec:SVE_STRUCT | |
937 | [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
938 | (match_operand:SVE_STRUCT 1 "register_operand" "w") | |
939 | (match_dup 0)] | |
940 | UNSPEC_STN))] | |
941 | "TARGET_SVE" | |
942 | "st<vector_count><Vesize>\t%1, %2, %0" | |
943 | ) | |
944 | ||
945 | ;; ------------------------------------------------------------------------- | |
946 | ;; ---- Normal scatter stores | |
947 | ;; ------------------------------------------------------------------------- | |
948 | ;; Includes scatter forms of: | |
949 | ;; - ST1D | |
950 | ;; - ST1W | |
951 | ;; ------------------------------------------------------------------------- | |
952 | ||
953 | ;; Unpredicated scatter stores. | |
954 | (define_expand "scatter_store<mode>" | |
955 | [(set (mem:BLK (scratch)) | |
956 | (unspec:BLK | |
957 | [(match_dup 5) | |
958 | (match_operand:DI 0 "aarch64_reg_or_zero") | |
959 | (match_operand:<V_INT_EQUIV> 1 "register_operand") | |
960 | (match_operand:DI 2 "const_int_operand") | |
961 | (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") | |
962 | (match_operand:SVE_SD 4 "register_operand")] | |
963 | UNSPEC_ST1_SCATTER))] | |
964 | "TARGET_SVE" | |
43cacb12 | 965 | { |
915d28fe | 966 | operands[5] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
967 | } |
968 | ) | |
969 | ||
915d28fe RS |
970 | ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for |
971 | ;; unsigned extension and false for signed extension. | |
972 | (define_insn "mask_scatter_store<mode>" | |
973 | [(set (mem:BLK (scratch)) | |
974 | (unspec:BLK | |
975 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
976 | (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
977 | (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w") | |
978 | (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
979 | (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
980 | (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")] | |
981 | UNSPEC_ST1_SCATTER))] | |
43cacb12 RS |
982 | "TARGET_SVE" |
983 | "@ | |
915d28fe RS |
984 | st1w\t%4.s, %5, [%1.s] |
985 | st1w\t%4.s, %5, [%0, %1.s, sxtw] | |
986 | st1w\t%4.s, %5, [%0, %1.s, uxtw] | |
987 | st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] | |
988 | st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" | |
989 | ) | |
990 | ||
991 | ;; Predicated scatter stores for 64-bit elements. The value of operand 2 | |
992 | ;; doesn't matter in this case. | |
993 | (define_insn "mask_scatter_store<mode>" | |
994 | [(set (mem:BLK (scratch)) | |
995 | (unspec:BLK | |
996 | [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
997 | (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk") | |
998 | (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w") | |
999 | (match_operand:DI 2 "const_int_operand") | |
1000 | (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
1001 | (match_operand:SVE_D 4 "register_operand" "w, w, w")] | |
1002 | UNSPEC_ST1_SCATTER))] | |
1003 | "TARGET_SVE" | |
1004 | "@ | |
1005 | st1d\t%4.d, %5, [%1.d] | |
1006 | st1d\t%4.d, %5, [%0, %1.d] | |
1007 | st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" | |
43cacb12 RS |
1008 | ) |
1009 | ||
915d28fe RS |
1010 | ;; ========================================================================= |
1011 | ;; == Vector creation | |
1012 | ;; ========================================================================= | |
1013 | ||
1014 | ;; ------------------------------------------------------------------------- | |
1015 | ;; ---- [INT,FP] Duplicate element | |
1016 | ;; ------------------------------------------------------------------------- | |
1017 | ;; Includes: | |
1018 | ;; - MOV | |
1019 | ;; - LD1RB | |
1020 | ;; - LD1RD | |
1021 | ;; - LD1RH | |
1022 | ;; - LD1RW | |
1023 | ;; - LD1RQB | |
1024 | ;; - LD1RQD | |
1025 | ;; - LD1RQH | |
1026 | ;; - LD1RQW | |
1027 | ;; ------------------------------------------------------------------------- | |
1028 | ||
43cacb12 RS |
1029 | (define_expand "vec_duplicate<mode>" |
1030 | [(parallel | |
1031 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
1032 | (vec_duplicate:SVE_ALL | |
1033 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) | |
678faefc | 1034 | (clobber (scratch:VNx16BI))])] |
43cacb12 RS |
1035 | "TARGET_SVE" |
1036 | { | |
1037 | if (MEM_P (operands[1])) | |
1038 | { | |
16de3637 | 1039 | rtx ptrue = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
1040 | emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], |
1041 | CONST0_RTX (<MODE>mode))); | |
1042 | DONE; | |
1043 | } | |
1044 | } | |
1045 | ) | |
1046 | ||
1047 | ;; Accept memory operands for the benefit of combine, and also in case | |
1048 | ;; the scalar input gets spilled to memory during RA. We want to split | |
1049 | ;; the load at the first opportunity in order to allow the PTRUE to be | |
1050 | ;; optimized with surrounding code. | |
1051 | (define_insn_and_split "*vec_duplicate<mode>_reg" | |
1052 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") | |
1053 | (vec_duplicate:SVE_ALL | |
1054 | (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) | |
678faefc | 1055 | (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))] |
43cacb12 RS |
1056 | "TARGET_SVE" |
1057 | "@ | |
1058 | mov\t%0.<Vetype>, %<vwcore>1 | |
1059 | mov\t%0.<Vetype>, %<Vetype>1 | |
1060 | #" | |
1061 | "&& MEM_P (operands[1])" | |
1062 | [(const_int 0)] | |
1063 | { | |
1064 | if (GET_CODE (operands[2]) == SCRATCH) | |
678faefc RS |
1065 | operands[2] = gen_reg_rtx (VNx16BImode); |
1066 | emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
1067 | rtx gp = gen_lowpart (<VPRED>mode, operands[2]); | |
1068 | emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1], | |
43cacb12 RS |
1069 | CONST0_RTX (<MODE>mode))); |
1070 | DONE; | |
1071 | } | |
1072 | [(set_attr "length" "4,4,8")] | |
1073 | ) | |
1074 | ||
4aeb1ba7 RS |
1075 | ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version). |
1076 | (define_insn "@aarch64_vec_duplicate_vq<mode>_le" | |
1077 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
1078 | (vec_duplicate:SVE_ALL | |
1079 | (match_operand:<V128> 1 "register_operand" "w")))] | |
1080 | "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
1081 | { | |
1082 | operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); | |
1083 | return "dup\t%0.q, %1.q[0]"; | |
1084 | } | |
1085 | ) | |
1086 | ||
1087 | ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version). | |
1088 | ;; The SVE register layout puts memory lane N into (architectural) | |
1089 | ;; register lane N, whereas the Advanced SIMD layout puts the memory | |
1090 | ;; lsb into the register lsb. We therefore have to describe this in rtl | |
1091 | ;; terms as a reverse of the V128 vector followed by a duplicate. | |
1092 | (define_insn "@aarch64_vec_duplicate_vq<mode>_be" | |
1093 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
1094 | (vec_duplicate:SVE_ALL | |
1095 | (vec_select:<V128> | |
1096 | (match_operand:<V128> 1 "register_operand" "w") | |
1097 | (match_operand 2 "descending_int_parallel"))))] | |
1098 | "TARGET_SVE | |
1099 | && BYTES_BIG_ENDIAN | |
1100 | && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)), | |
1101 | GET_MODE_NUNITS (<V128>mode) - 1)" | |
1102 | { | |
1103 | operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); | |
1104 | return "dup\t%0.q, %1.q[0]"; | |
1105 | } | |
1106 | ) | |
1107 | ||
43cacb12 RS |
1108 | ;; This is used for vec_duplicate<mode>s from memory, but can also |
1109 | ;; be used by combine to optimize selects of a a vec_duplicate<mode> | |
1110 | ;; with zero. | |
1111 | (define_insn "sve_ld1r<mode>" | |
1112 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
1113 | (unspec:SVE_ALL | |
1114 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1115 | (vec_duplicate:SVE_ALL | |
1116 | (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) | |
1117 | (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] | |
1118 | UNSPEC_SEL))] | |
1119 | "TARGET_SVE" | |
1120 | "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" | |
1121 | ) | |
1122 | ||
4aeb1ba7 RS |
1123 | ;; Load 128 bits from memory under predicate control and duplicate to |
1124 | ;; fill a vector. | |
1125 | (define_insn "@aarch64_sve_ld1rq<mode>" | |
947b1372 RS |
1126 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
1127 | (unspec:SVE_ALL | |
4aeb1ba7 RS |
1128 | [(match_operand:<VPRED> 2 "register_operand" "Upl") |
1129 | (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")] | |
43cacb12 RS |
1130 | UNSPEC_LD1RQ))] |
1131 | "TARGET_SVE" | |
4aeb1ba7 RS |
1132 | { |
1133 | operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); | |
1134 | return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1"; | |
1135 | } | |
43cacb12 RS |
1136 | ) |
1137 | ||
915d28fe RS |
1138 | ;; ------------------------------------------------------------------------- |
1139 | ;; ---- [INT,FP] Initialize from individual elements | |
1140 | ;; ------------------------------------------------------------------------- | |
1141 | ;; Includes: | |
1142 | ;; - INSR | |
1143 | ;; ------------------------------------------------------------------------- | |
1144 | ||
1145 | (define_expand "vec_init<mode><Vel>" | |
1146 | [(match_operand:SVE_ALL 0 "register_operand") | |
1147 | (match_operand 1 "")] | |
43cacb12 RS |
1148 | "TARGET_SVE" |
1149 | { | |
915d28fe | 1150 | aarch64_sve_expand_vector_init (operands[0], operands[1]); |
43cacb12 RS |
1151 | DONE; |
1152 | } | |
1153 | ) | |
1154 | ||
915d28fe RS |
1155 | ;; Shift an SVE vector left and insert a scalar into element 0. |
1156 | (define_insn "vec_shl_insert_<mode>" | |
61ee25b9 | 1157 | [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??&w, ?&w") |
915d28fe | 1158 | (unspec:SVE_ALL |
61ee25b9 RS |
1159 | [(match_operand:SVE_ALL 1 "register_operand" "0, 0, w, w") |
1160 | (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")] | |
915d28fe RS |
1161 | UNSPEC_INSR))] |
1162 | "TARGET_SVE" | |
1163 | "@ | |
1164 | insr\t%0.<Vetype>, %<vwcore>2 | |
61ee25b9 RS |
1165 | insr\t%0.<Vetype>, %<Vetype>2 |
1166 | movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2 | |
1167 | movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2" | |
1168 | [(set_attr "movprfx" "*,*,yes,yes")] | |
915d28fe RS |
1169 | ) |
1170 | ||
1171 | ;; ------------------------------------------------------------------------- | |
1172 | ;; ---- [INT] Linear series | |
1173 | ;; ------------------------------------------------------------------------- | |
1174 | ;; Includes: | |
1175 | ;; - INDEX | |
1176 | ;; ------------------------------------------------------------------------- | |
1177 | ||
1178 | (define_insn "vec_series<mode>" | |
1179 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") | |
1180 | (vec_series:SVE_I | |
1181 | (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") | |
1182 | (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] | |
1183 | "TARGET_SVE" | |
1184 | "@ | |
1185 | index\t%0.<Vetype>, #%1, %<vw>2 | |
43cacb12 RS |
1186 | index\t%0.<Vetype>, %<vw>1, #%2 |
1187 | index\t%0.<Vetype>, %<vw>1, %<vw>2" | |
1188 | ) | |
1189 | ||
1190 | ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range | |
1191 | ;; of an INDEX instruction. | |
1192 | (define_insn "*vec_series<mode>_plus" | |
1193 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1194 | (plus:SVE_I | |
1195 | (vec_duplicate:SVE_I | |
1196 | (match_operand:<VEL> 1 "register_operand" "r")) | |
1197 | (match_operand:SVE_I 2 "immediate_operand")))] | |
1198 | "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" | |
1199 | { | |
1200 | operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); | |
1201 | return "index\t%0.<Vetype>, %<vw>1, #%2"; | |
1202 | } | |
1203 | ) | |
1204 | ||
915d28fe RS |
1205 | ;; ------------------------------------------------------------------------- |
1206 | ;; ---- [PRED] Duplicate element | |
1207 | ;; ------------------------------------------------------------------------- | |
1208 | ;; The patterns in this section are synthetic. | |
1209 | ;; ------------------------------------------------------------------------- | |
1210 | ||
1211 | ;; Implement a predicate broadcast by shifting the low bit of the scalar | |
1212 | ;; input into the top bit and using a WHILELO. An alternative would be to | |
1213 | ;; duplicate the input and do a compare with zero. | |
1214 | (define_expand "vec_duplicate<mode>" | |
1215 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
1216 | (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] | |
9f4cbab8 RS |
1217 | "TARGET_SVE" |
1218 | { | |
915d28fe RS |
1219 | rtx tmp = gen_reg_rtx (DImode); |
1220 | rtx op1 = gen_lowpart (DImode, operands[1]); | |
1221 | emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); | |
1222 | emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); | |
1223 | DONE; | |
9f4cbab8 RS |
1224 | } |
1225 | ) | |
1226 | ||
915d28fe RS |
1227 | ;; ========================================================================= |
1228 | ;; == Vector decomposition | |
1229 | ;; ========================================================================= | |
9f4cbab8 | 1230 | |
915d28fe RS |
1231 | ;; ------------------------------------------------------------------------- |
1232 | ;; ---- [INT,FP] Extract index | |
1233 | ;; ------------------------------------------------------------------------- | |
1234 | ;; Includes: | |
1235 | ;; - DUP (Advanced SIMD) | |
1236 | ;; - DUP (SVE) | |
1237 | ;; - EXT (SVE) | |
1238 | ;; - ST1 (Advanced SIMD) | |
1239 | ;; - UMOV (Advanced SIMD) | |
1240 | ;; ------------------------------------------------------------------------- | |
1241 | ||
1242 | (define_expand "vec_extract<mode><Vel>" | |
1243 | [(set (match_operand:<VEL> 0 "register_operand") | |
1244 | (vec_select:<VEL> | |
1245 | (match_operand:SVE_ALL 1 "register_operand") | |
1246 | (parallel [(match_operand:SI 2 "nonmemory_operand")])))] | |
9f4cbab8 RS |
1247 | "TARGET_SVE" |
1248 | { | |
915d28fe RS |
1249 | poly_int64 val; |
1250 | if (poly_int_rtx_p (operands[2], &val) | |
1251 | && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) | |
1252 | { | |
1253 | /* The last element can be extracted with a LASTB and a false | |
1254 | predicate. */ | |
1255 | rtx sel = aarch64_pfalse_reg (<VPRED>mode); | |
1256 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); | |
1257 | DONE; | |
1258 | } | |
1259 | if (!CONST_INT_P (operands[2])) | |
1260 | { | |
1261 | /* Create an index with operand[2] as the base and -1 as the step. | |
1262 | It will then be zero for the element we care about. */ | |
1263 | rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); | |
1264 | index = force_reg (<VEL_INT>mode, index); | |
1265 | rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); | |
1266 | emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); | |
1267 | ||
1268 | /* Get a predicate that is true for only that element. */ | |
1269 | rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); | |
1270 | rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); | |
1271 | rtx sel = gen_reg_rtx (<VPRED>mode); | |
1272 | emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); | |
1273 | ||
1274 | /* Select the element using LASTB. */ | |
1275 | emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); | |
1276 | DONE; | |
1277 | } | |
9f4cbab8 RS |
1278 | } |
1279 | ) | |
1280 | ||
915d28fe RS |
1281 | ;; Extract element zero. This is a special case because we want to force |
1282 | ;; the registers to be the same for the second alternative, and then | |
1283 | ;; split the instruction into nothing after RA. | |
1284 | (define_insn_and_split "*vec_extract<mode><Vel>_0" | |
1285 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
1286 | (vec_select:<VEL> | |
1287 | (match_operand:SVE_ALL 1 "register_operand" "w, 0, w") | |
1288 | (parallel [(const_int 0)])))] | |
9f4cbab8 | 1289 | "TARGET_SVE" |
915d28fe RS |
1290 | { |
1291 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); | |
1292 | switch (which_alternative) | |
1293 | { | |
1294 | case 0: | |
1295 | return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; | |
1296 | case 1: | |
1297 | return "#"; | |
1298 | case 2: | |
1299 | return "st1\\t{%1.<Vetype>}[0], %0"; | |
1300 | default: | |
1301 | gcc_unreachable (); | |
1302 | } | |
1303 | } | |
1304 | "&& reload_completed | |
1305 | && REG_P (operands[0]) | |
1306 | && REGNO (operands[0]) == REGNO (operands[1])" | |
1307 | [(const_int 0)] | |
1308 | { | |
1309 | emit_note (NOTE_INSN_DELETED); | |
1310 | DONE; | |
1311 | } | |
1312 | [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] | |
9f4cbab8 RS |
1313 | ) |
1314 | ||
915d28fe RS |
1315 | ;; Extract an element from the Advanced SIMD portion of the register. |
1316 | ;; We don't just reuse the aarch64-simd.md pattern because we don't | |
1317 | ;; want any change in lane number on big-endian targets. | |
1318 | (define_insn "*vec_extract<mode><Vel>_v128" | |
1319 | [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
1320 | (vec_select:<VEL> | |
1321 | (match_operand:SVE_ALL 1 "register_operand" "w, w, w") | |
1322 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
1323 | "TARGET_SVE | |
1324 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" | |
43cacb12 | 1325 | { |
915d28fe RS |
1326 | operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); |
1327 | switch (which_alternative) | |
1328 | { | |
1329 | case 0: | |
1330 | return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | |
1331 | case 1: | |
1332 | return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; | |
1333 | case 2: | |
1334 | return "st1\\t{%1.<Vetype>}[%2], %0"; | |
1335 | default: | |
1336 | gcc_unreachable (); | |
1337 | } | |
43cacb12 | 1338 | } |
915d28fe | 1339 | [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] |
43cacb12 RS |
1340 | ) |
1341 | ||
915d28fe RS |
1342 | ;; Extract an element in the range of DUP. This pattern allows the |
1343 | ;; source and destination to be different. | |
1344 | (define_insn "*vec_extract<mode><Vel>_dup" | |
1345 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
1346 | (vec_select:<VEL> | |
1347 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
1348 | (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
1349 | "TARGET_SVE | |
1350 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" | |
1351 | { | |
1352 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
1353 | return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; | |
1354 | } | |
43cacb12 RS |
1355 | ) |
1356 | ||
915d28fe RS |
1357 | ;; Extract an element outside the range of DUP. This pattern requires the |
1358 | ;; source and destination to be the same. | |
1359 | (define_insn "*vec_extract<mode><Vel>_ext" | |
06b3ba23 | 1360 | [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w") |
915d28fe | 1361 | (vec_select:<VEL> |
06b3ba23 | 1362 | (match_operand:SVE_ALL 1 "register_operand" "0, w") |
915d28fe RS |
1363 | (parallel [(match_operand:SI 2 "const_int_operand")])))] |
1364 | "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" | |
1365 | { | |
1366 | operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
1367 | operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); | |
06b3ba23 RS |
1368 | return (which_alternative == 0 |
1369 | ? "ext\t%0.b, %0.b, %0.b, #%2" | |
1370 | : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2"); | |
915d28fe | 1371 | } |
06b3ba23 | 1372 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
1373 | ) |
1374 | ||
915d28fe RS |
1375 | ;; ------------------------------------------------------------------------- |
1376 | ;; ---- [INT,FP] Extract active element | |
1377 | ;; ------------------------------------------------------------------------- | |
1378 | ;; Includes: | |
1379 | ;; - LASTB | |
1380 | ;; ------------------------------------------------------------------------- | |
1381 | ||
1382 | ;; Extract the last active element of operand 1 into operand 0. | |
1383 | ;; If no elements are active, extract the last inactive element instead. | |
1384 | (define_insn "extract_last_<mode>" | |
1385 | [(set (match_operand:<VEL> 0 "register_operand" "=r, w") | |
1386 | (unspec:<VEL> | |
1387 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1388 | (match_operand:SVE_ALL 2 "register_operand" "w, w")] | |
1389 | UNSPEC_LASTB))] | |
43cacb12 | 1390 | "TARGET_SVE" |
915d28fe RS |
1391 | "@ |
1392 | lastb\t%<vwcore>0, %1, %2.<Vetype> | |
1393 | lastb\t%<Vetype>0, %1, %2.<Vetype>" | |
43cacb12 RS |
1394 | ) |
1395 | ||
915d28fe RS |
1396 | ;; ------------------------------------------------------------------------- |
1397 | ;; ---- [PRED] Extract index | |
1398 | ;; ------------------------------------------------------------------------- | |
1399 | ;; The patterns in this section are synthetic. | |
1400 | ;; ------------------------------------------------------------------------- | |
1401 | ||
1402 | ;; Handle extractions from a predicate by converting to an integer vector | |
1403 | ;; and extracting from there. | |
1404 | (define_expand "vec_extract<vpred><Vel>" | |
1405 | [(match_operand:<VEL> 0 "register_operand") | |
1406 | (match_operand:<VPRED> 1 "register_operand") | |
1407 | (match_operand:SI 2 "nonmemory_operand") | |
1408 | ;; Dummy operand to which we can attach the iterator. | |
1409 | (reg:SVE_I V0_REGNUM)] | |
43cacb12 | 1410 | "TARGET_SVE" |
915d28fe RS |
1411 | { |
1412 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
d29f7dd5 RS |
1413 | emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], |
1414 | CONST1_RTX (<MODE>mode), | |
1415 | CONST0_RTX (<MODE>mode))); | |
915d28fe RS |
1416 | emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); |
1417 | DONE; | |
1418 | } | |
43cacb12 RS |
1419 | ) |
1420 | ||
915d28fe RS |
1421 | ;; ========================================================================= |
1422 | ;; == Unary arithmetic | |
1423 | ;; ========================================================================= | |
1424 | ||
1425 | ;; ------------------------------------------------------------------------- | |
1426 | ;; ---- [INT] General unary arithmetic corresponding to rtx codes | |
1427 | ;; ------------------------------------------------------------------------- | |
1428 | ;; Includes: | |
1429 | ;; - ABS | |
bca5a997 RS |
1430 | ;; - CLS (= clrsb) |
1431 | ;; - CLZ | |
915d28fe RS |
1432 | ;; - CNT (= popcount) |
1433 | ;; - NEG | |
1434 | ;; - NOT | |
1435 | ;; ------------------------------------------------------------------------- | |
1436 | ||
1437 | ;; Unpredicated integer unary arithmetic. | |
1438 | (define_expand "<optab><mode>2" | |
1439 | [(set (match_operand:SVE_I 0 "register_operand") | |
1440 | (unspec:SVE_I | |
1441 | [(match_dup 2) | |
1442 | (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] | |
06308276 | 1443 | UNSPEC_PRED_X))] |
43cacb12 | 1444 | "TARGET_SVE" |
915d28fe RS |
1445 | { |
1446 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
1447 | } | |
43cacb12 RS |
1448 | ) |
1449 | ||
915d28fe RS |
1450 | ;; Integer unary arithmetic predicated with a PTRUE. |
1451 | (define_insn "*<optab><mode>2" | |
1452 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1453 | (unspec:SVE_I | |
1454 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1455 | (SVE_INT_UNARY:SVE_I | |
1456 | (match_operand:SVE_I 2 "register_operand" "w"))] | |
06308276 | 1457 | UNSPEC_PRED_X))] |
43cacb12 | 1458 | "TARGET_SVE" |
915d28fe | 1459 | "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" |
43cacb12 RS |
1460 | ) |
1461 | ||
3c9f4963 RS |
1462 | ;; Predicated integer unary arithmetic, merging with the first input. |
1463 | (define_insn "*cond_<optab><mode>_2" | |
1464 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1465 | (unspec:SVE_I | |
1466 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1467 | (SVE_INT_UNARY:SVE_I | |
1468 | (match_operand:SVE_I 2 "register_operand" "0, w")) | |
1469 | (match_dup 2)] | |
1470 | UNSPEC_SEL))] | |
1471 | "TARGET_SVE" | |
1472 | "@ | |
1473 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype> | |
1474 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1475 | [(set_attr "movprfx" "*,yes")] | |
1476 | ) | |
1477 | ||
1478 | ;; Predicated integer unary arithmetic, merging with an independent value. | |
1479 | ;; | |
1480 | ;; The earlyclobber isn't needed for the first alternative, but omitting | |
1481 | ;; it would only help the case in which operands 2 and 3 are the same, | |
1482 | ;; which is handled above rather than here. Marking all the alternatives | |
1483 | ;; as earlyclobber helps to make the instruction more regular to the | |
1484 | ;; register allocator. | |
1485 | (define_insn "*cond_<optab><mode>_any" | |
1486 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") | |
1487 | (unspec:SVE_I | |
1488 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1489 | (SVE_INT_UNARY:SVE_I | |
1490 | (match_operand:SVE_I 2 "register_operand" "w, w, w")) | |
1491 | (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
1492 | UNSPEC_SEL))] | |
1493 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" | |
1494 | "@ | |
1495 | <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
1496 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
1497 | movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1498 | [(set_attr "movprfx" "*,yes,yes")] | |
1499 | ) | |
1500 | ||
d7a09c44 RS |
1501 | ;; ------------------------------------------------------------------------- |
1502 | ;; ---- [INT] General unary arithmetic corresponding to unspecs | |
1503 | ;; ------------------------------------------------------------------------- | |
1504 | ;; Includes | |
1505 | ;; - REVB | |
1506 | ;; - REVH | |
1507 | ;; - REVW | |
1508 | ;; ------------------------------------------------------------------------- | |
1509 | ||
1510 | ;; Predicated integer unary operations. | |
1511 | (define_insn "@aarch64_pred_<optab><mode>" | |
1512 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1513 | (unspec:SVE_I | |
1514 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1515 | (unspec:SVE_I | |
1516 | [(match_operand:SVE_I 2 "register_operand" "w")] | |
1517 | SVE_INT_UNARY)] | |
1518 | UNSPEC_PRED_X))] | |
1519 | "TARGET_SVE && <elem_bits> >= <min_elem_bits>" | |
1520 | "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1521 | ) | |
1522 | ||
d113ece6 RS |
1523 | ;; ------------------------------------------------------------------------- |
1524 | ;; ---- [INT] Zero extension | |
1525 | ;; ------------------------------------------------------------------------- | |
1526 | ;; Includes: | |
1527 | ;; - UXTB | |
1528 | ;; - UXTH | |
1529 | ;; - UXTW | |
1530 | ;; ------------------------------------------------------------------------- | |
1531 | ||
1532 | ;; Match UXT[BHW] as a conditional AND of a constant, merging with the | |
1533 | ;; first input. | |
1534 | (define_insn "*cond_uxt<mode>_2" | |
1535 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1536 | (unspec:SVE_I | |
1537 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1538 | (and:SVE_I | |
1539 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
1540 | (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate")) | |
1541 | (match_dup 2)] | |
1542 | UNSPEC_SEL))] | |
1543 | "TARGET_SVE" | |
1544 | "@ | |
1545 | uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype> | |
1546 | movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1547 | [(set_attr "movprfx" "*,yes")] | |
1548 | ) | |
1549 | ||
1550 | ;; Match UXT[BHW] as a conditional AND of a constant, merging with an | |
1551 | ;; independent value. | |
1552 | ;; | |
1553 | ;; The earlyclobber isn't needed for the first alternative, but omitting | |
1554 | ;; it would only help the case in which operands 2 and 4 are the same, | |
1555 | ;; which is handled above rather than here. Marking all the alternatives | |
1556 | ;; as early-clobber helps to make the instruction more regular to the | |
1557 | ;; register allocator. | |
1558 | (define_insn "*cond_uxt<mode>_any" | |
1559 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") | |
1560 | (unspec:SVE_I | |
1561 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1562 | (and:SVE_I | |
1563 | (match_operand:SVE_I 2 "register_operand" "w, w, w") | |
1564 | (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate")) | |
1565 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
1566 | UNSPEC_SEL))] | |
1567 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
1568 | "@ | |
1569 | uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype> | |
1570 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype> | |
1571 | movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1572 | [(set_attr "movprfx" "*,yes,yes")] | |
1573 | ) | |
1574 | ||
e0a0be93 RS |
1575 | ;; ------------------------------------------------------------------------- |
1576 | ;; ---- [INT] Logical inverse | |
1577 | ;; ------------------------------------------------------------------------- | |
1578 | ||
1579 | ;; Predicated logical inverse. | |
1580 | (define_insn "*cnot<mode>" | |
1581 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1582 | (unspec:SVE_I | |
1583 | [(unspec:<VPRED> | |
1584 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1585 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
1586 | (eq:<VPRED> | |
1587 | (match_operand:SVE_I 2 "register_operand" "w") | |
1588 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] | |
1589 | UNSPEC_PRED_Z) | |
1590 | (match_operand:SVE_I 4 "aarch64_simd_imm_one") | |
1591 | (match_dup 3)] | |
1592 | UNSPEC_SEL))] | |
1593 | "TARGET_SVE" | |
1594 | "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1595 | ) | |
1596 | ||
1597 | ;; Predicated logical inverse, merging with the first input. | |
1598 | (define_insn_and_rewrite "*cond_cnot<mode>_2" | |
1599 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1600 | (unspec:SVE_I | |
1601 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1602 | ;; Logical inverse of operand 2 (as above). | |
1603 | (unspec:SVE_I | |
1604 | [(unspec:<VPRED> | |
1605 | [(match_operand 5) | |
1606 | (const_int SVE_KNOWN_PTRUE) | |
1607 | (eq:<VPRED> | |
1608 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
1609 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] | |
1610 | UNSPEC_PRED_Z) | |
1611 | (match_operand:SVE_I 4 "aarch64_simd_imm_one") | |
1612 | (match_dup 3)] | |
1613 | UNSPEC_SEL) | |
1614 | (match_dup 2)] | |
1615 | UNSPEC_SEL))] | |
1616 | "TARGET_SVE" | |
1617 | "@ | |
1618 | cnot\t%0.<Vetype>, %1/m, %0.<Vetype> | |
1619 | movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1620 | "&& !CONSTANT_P (operands[5])" | |
1621 | { | |
1622 | operands[5] = CONSTM1_RTX (<VPRED>mode); | |
1623 | } | |
1624 | [(set_attr "movprfx" "*,yes")] | |
1625 | ) | |
1626 | ||
1627 | ;; Predicated logical inverse, merging with an independent value. | |
1628 | ;; | |
1629 | ;; The earlyclobber isn't needed for the first alternative, but omitting | |
1630 | ;; it would only help the case in which operands 2 and 6 are the same, | |
1631 | ;; which is handled above rather than here. Marking all the alternatives | |
1632 | ;; as earlyclobber helps to make the instruction more regular to the | |
1633 | ;; register allocator. | |
1634 | (define_insn_and_rewrite "*cond_cnot<mode>_any" | |
1635 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") | |
1636 | (unspec:SVE_I | |
1637 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1638 | ;; Logical inverse of operand 2 (as above). | |
1639 | (unspec:SVE_I | |
1640 | [(unspec:<VPRED> | |
1641 | [(match_operand 5) | |
1642 | (const_int SVE_KNOWN_PTRUE) | |
1643 | (eq:<VPRED> | |
1644 | (match_operand:SVE_I 2 "register_operand" "w, w, w") | |
1645 | (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] | |
1646 | UNSPEC_PRED_Z) | |
1647 | (match_operand:SVE_I 4 "aarch64_simd_imm_one") | |
1648 | (match_dup 3)] | |
1649 | UNSPEC_SEL) | |
1650 | (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
1651 | UNSPEC_SEL))] | |
1652 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])" | |
1653 | "@ | |
1654 | cnot\t%0.<Vetype>, %1/m, %2.<Vetype> | |
1655 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype> | |
1656 | movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1657 | "&& !CONSTANT_P (operands[5])" | |
1658 | { | |
1659 | operands[5] = CONSTM1_RTX (<VPRED>mode); | |
1660 | } | |
1661 | [(set_attr "movprfx" "*,yes,yes")] | |
1662 | ) | |
1663 | ||
915d28fe | 1664 | ;; ------------------------------------------------------------------------- |
d45b20a5 | 1665 | ;; ---- [FP] General unary arithmetic corresponding to unspecs |
915d28fe RS |
1666 | ;; ------------------------------------------------------------------------- |
1667 | ;; Includes: | |
1668 | ;; - FABS | |
1669 | ;; - FNEG | |
915d28fe RS |
1670 | ;; - FRINTA |
1671 | ;; - FRINTI | |
1672 | ;; - FRINTM | |
1673 | ;; - FRINTN | |
1674 | ;; - FRINTP | |
1675 | ;; - FRINTX | |
1676 | ;; - FRINTZ | |
d45b20a5 | 1677 | ;; - FSQRT |
915d28fe RS |
1678 | ;; ------------------------------------------------------------------------- |
1679 | ||
d45b20a5 RS |
1680 | ;; Unpredicated floating-point unary operations. |
1681 | (define_expand "<optab><mode>2" | |
915d28fe RS |
1682 | [(set (match_operand:SVE_F 0 "register_operand") |
1683 | (unspec:SVE_F | |
1684 | [(match_dup 2) | |
c9c5a809 | 1685 | (const_int SVE_RELAXED_GP) |
d45b20a5 RS |
1686 | (match_operand:SVE_F 1 "register_operand")] |
1687 | SVE_COND_FP_UNARY))] | |
915d28fe RS |
1688 | "TARGET_SVE" |
1689 | { | |
1690 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
1691 | } | |
1692 | ) | |
1693 | ||
d45b20a5 RS |
1694 | ;; Predicated floating-point unary operations. |
1695 | (define_insn "*<optab><mode>2" | |
915d28fe RS |
1696 | [(set (match_operand:SVE_F 0 "register_operand" "=w") |
1697 | (unspec:SVE_F | |
1698 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
c9c5a809 | 1699 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
d45b20a5 RS |
1700 | (match_operand:SVE_F 2 "register_operand" "w")] |
1701 | SVE_COND_FP_UNARY))] | |
915d28fe | 1702 | "TARGET_SVE" |
d45b20a5 | 1703 | "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" |
915d28fe RS |
1704 | ) |
1705 | ||
b21f7d53 RS |
1706 | ;; Predicated floating-point unary arithmetic, merging with the first input. |
1707 | (define_insn_and_rewrite "*cond_<optab><mode>_2" | |
1708 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
1709 | (unspec:SVE_F | |
1710 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1711 | (unspec:SVE_F | |
1712 | [(match_operand 3) | |
1713 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
1714 | (match_operand:SVE_F 2 "register_operand" "0, w")] | |
1715 | SVE_COND_FP_UNARY) | |
1716 | (match_dup 2)] | |
1717 | UNSPEC_SEL))] | |
1718 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])" | |
1719 | "@ | |
1720 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype> | |
1721 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1722 | "&& !rtx_equal_p (operands[1], operands[3])" | |
1723 | { | |
1724 | operands[3] = copy_rtx (operands[1]); | |
1725 | } | |
1726 | [(set_attr "movprfx" "*,yes")] | |
1727 | ) | |
1728 | ||
1729 | ;; Predicated floating-point unary arithmetic, merging with an independent | |
1730 | ;; value. | |
1731 | ;; | |
1732 | ;; The earlyclobber isn't needed for the first alternative, but omitting | |
1733 | ;; it would only help the case in which operands 2 and 3 are the same, | |
1734 | ;; which is handled above rather than here. Marking all the alternatives | |
1735 | ;; as earlyclobber helps to make the instruction more regular to the | |
1736 | ;; register allocator. | |
1737 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
1738 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, ?&w, ?&w") | |
1739 | (unspec:SVE_F | |
1740 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1741 | (unspec:SVE_F | |
1742 | [(match_operand 4) | |
1743 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
1744 | (match_operand:SVE_F 2 "register_operand" "w, w, w")] | |
1745 | SVE_COND_FP_UNARY) | |
1746 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
1747 | UNSPEC_SEL))] | |
1748 | "TARGET_SVE | |
1749 | && !rtx_equal_p (operands[2], operands[3]) | |
1750 | && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
1751 | "@ | |
1752 | <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
1753 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> | |
1754 | movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1755 | "&& !rtx_equal_p (operands[1], operands[4])" | |
1756 | { | |
1757 | operands[4] = copy_rtx (operands[1]); | |
1758 | } | |
1759 | [(set_attr "movprfx" "*,yes,yes")] | |
1760 | ) | |
1761 | ||
915d28fe RS |
1762 | ;; ------------------------------------------------------------------------- |
1763 | ;; ---- [PRED] Inverse | |
1764 | ;; ------------------------------------------------------------------------- | |
1765 | ;; Includes: | |
1766 | ;; - NOT | |
1767 | ;; ------------------------------------------------------------------------- | |
1768 | ||
1769 | ;; Unpredicated predicate inverse. | |
1770 | (define_expand "one_cmpl<mode>2" | |
1771 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
1772 | (and:PRED_ALL | |
1773 | (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) | |
1774 | (match_dup 2)))] | |
1775 | "TARGET_SVE" | |
1776 | { | |
1777 | operands[2] = aarch64_ptrue_reg (<MODE>mode); | |
1778 | } | |
1779 | ) | |
1780 | ||
1781 | ;; Predicated predicate inverse. | |
1782 | (define_insn "*one_cmpl<mode>3" | |
1783 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1784 | (and:PRED_ALL | |
1785 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1786 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1787 | "TARGET_SVE" | |
1788 | "not\t%0.b, %1/z, %2.b" | |
1789 | ) | |
1790 | ||
1791 | ;; ========================================================================= | |
1792 | ;; == Binary arithmetic | |
1793 | ;; ========================================================================= | |
1794 | ||
1795 | ;; ------------------------------------------------------------------------- | |
1796 | ;; ---- [INT] General binary arithmetic corresponding to rtx codes | |
1797 | ;; ------------------------------------------------------------------------- | |
f8c22a8b RS |
1798 | ;; Includes: |
1799 | ;; - ADD (merging form only) | |
1800 | ;; - AND (merging form only) | |
20103c0e | 1801 | ;; - ASR (merging form only) |
f8c22a8b | 1802 | ;; - EOR (merging form only) |
20103c0e RS |
1803 | ;; - LSL (merging form only) |
1804 | ;; - LSR (merging form only) | |
915d28fe | 1805 | ;; - MUL |
f8c22a8b | 1806 | ;; - ORR (merging form only) |
915d28fe RS |
1807 | ;; - SMAX |
1808 | ;; - SMIN | |
f8c22a8b | 1809 | ;; - SUB (merging form only) |
915d28fe RS |
1810 | ;; - UMAX |
1811 | ;; - UMIN | |
1812 | ;; ------------------------------------------------------------------------- | |
1813 | ||
f8c22a8b RS |
1814 | ;; Unpredicated integer binary operations that have an immediate form. |
1815 | (define_expand "<optab><mode>3" | |
1816 | [(set (match_operand:SVE_I 0 "register_operand") | |
1817 | (unspec:SVE_I | |
1818 | [(match_dup 3) | |
1819 | (SVE_INT_BINARY_IMM:SVE_I | |
1820 | (match_operand:SVE_I 1 "register_operand") | |
1821 | (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))] | |
1822 | UNSPEC_PRED_X))] | |
1823 | "TARGET_SVE" | |
1824 | { | |
1825 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
1826 | } | |
1827 | ) | |
1828 | ||
1829 | ;; Integer binary operations that have an immediate form, predicated | |
1830 | ;; with a PTRUE. We don't actually need the predicate for the first | |
1831 | ;; and third alternatives, but using Upa or X isn't likely to gain much | |
1832 | ;; and would make the instruction seem less uniform to the register | |
1833 | ;; allocator. | |
1834 | (define_insn_and_split "*<optab><mode>3" | |
1835 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w") | |
1836 | (unspec:SVE_I | |
1837 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
1838 | (SVE_INT_BINARY_IMM:SVE_I | |
1839 | (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w") | |
1840 | (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))] | |
1841 | UNSPEC_PRED_X))] | |
1842 | "TARGET_SVE" | |
1843 | "@ | |
1844 | # | |
1845 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1846 | # | |
1847 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1848 | ; Split the unpredicated form after reload, so that we don't have | |
1849 | ; the unnecessary PTRUE. | |
1850 | "&& reload_completed | |
1851 | && !register_operand (operands[3], <MODE>mode)" | |
1852 | [(set (match_dup 0) (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))] | |
1853 | "" | |
1854 | [(set_attr "movprfx" "*,*,yes,yes")] | |
1855 | ) | |
1856 | ||
1857 | ;; Unpredicated binary operations with a constant (post-RA only). | |
1858 | ;; These are generated by splitting a predicated instruction whose | |
1859 | ;; predicate is unused. | |
1860 | (define_insn "*post_ra_<optab><mode>3" | |
1861 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1862 | (SVE_INT_BINARY_IMM:SVE_I | |
1863 | (match_operand:SVE_I 1 "register_operand" "0, w") | |
1864 | (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))] | |
1865 | "TARGET_SVE && reload_completed" | |
1866 | "@ | |
1867 | <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2 | |
1868 | movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2" | |
1869 | [(set_attr "movprfx" "*,yes")] | |
1870 | ) | |
1871 | ||
915d28fe | 1872 | ;; Predicated integer operations with merging. |
b6c3aea1 | 1873 | (define_expand "@cond_<optab><mode>" |
915d28fe RS |
1874 | [(set (match_operand:SVE_I 0 "register_operand") |
1875 | (unspec:SVE_I | |
1876 | [(match_operand:<VPRED> 1 "register_operand") | |
1877 | (SVE_INT_BINARY:SVE_I | |
1878 | (match_operand:SVE_I 2 "register_operand") | |
d113ece6 | 1879 | (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>")) |
915d28fe RS |
1880 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] |
1881 | UNSPEC_SEL))] | |
1882 | "TARGET_SVE" | |
1883 | ) | |
1884 | ||
1885 | ;; Predicated integer operations, merging with the first input. | |
1886 | (define_insn "*cond_<optab><mode>_2" | |
1887 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1888 | (unspec:SVE_I | |
1889 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1890 | (SVE_INT_BINARY:SVE_I | |
1891 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
1892 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
1893 | (match_dup 2)] | |
1894 | UNSPEC_SEL))] | |
1895 | "TARGET_SVE" | |
1896 | "@ | |
1897 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1898 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1899 | [(set_attr "movprfx" "*,yes")] | |
1900 | ) | |
1901 | ||
1902 | ;; Predicated integer operations, merging with the second input. | |
1903 | (define_insn "*cond_<optab><mode>_3" | |
1904 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1905 | (unspec:SVE_I | |
1906 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1907 | (SVE_INT_BINARY:SVE_I | |
1908 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
1909 | (match_operand:SVE_I 3 "register_operand" "0, w")) | |
1910 | (match_dup 3)] | |
1911 | UNSPEC_SEL))] | |
1912 | "TARGET_SVE" | |
1913 | "@ | |
1914 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1915 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1916 | [(set_attr "movprfx" "*,yes")] | |
1917 | ) | |
1918 | ||
1919 | ;; Predicated integer operations, merging with an independent value. | |
1920 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
1921 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
1922 | (unspec:SVE_I | |
1923 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
1924 | (SVE_INT_BINARY:SVE_I | |
1925 | (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") | |
1926 | (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w")) | |
1927 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
1928 | UNSPEC_SEL))] | |
43cacb12 | 1929 | "TARGET_SVE |
915d28fe RS |
1930 | && !rtx_equal_p (operands[2], operands[4]) |
1931 | && !rtx_equal_p (operands[3], operands[4])" | |
1932 | "@ | |
1933 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1934 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1935 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1936 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1937 | #" | |
1938 | "&& reload_completed | |
1939 | && register_operand (operands[4], <MODE>mode) | |
1940 | && !rtx_equal_p (operands[0], operands[4])" | |
43cacb12 | 1941 | { |
915d28fe RS |
1942 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], |
1943 | operands[4], operands[1])); | |
1944 | operands[4] = operands[2] = operands[0]; | |
43cacb12 | 1945 | } |
915d28fe | 1946 | [(set_attr "movprfx" "yes")] |
43cacb12 RS |
1947 | ) |
1948 | ||
915d28fe RS |
1949 | ;; ------------------------------------------------------------------------- |
1950 | ;; ---- [INT] Addition | |
1951 | ;; ------------------------------------------------------------------------- | |
1952 | ;; Includes: | |
1953 | ;; - ADD | |
1954 | ;; - DECB | |
1955 | ;; - DECD | |
1956 | ;; - DECH | |
1957 | ;; - DECW | |
1958 | ;; - INCB | |
1959 | ;; - INCD | |
1960 | ;; - INCH | |
1961 | ;; - INCW | |
1962 | ;; - SUB | |
1963 | ;; ------------------------------------------------------------------------- | |
1964 | ||
43cacb12 | 1965 | (define_insn "add<mode>3" |
5e176a61 | 1966 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w") |
43cacb12 | 1967 | (plus:SVE_I |
5e176a61 RS |
1968 | (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w") |
1969 | (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))] | |
43cacb12 RS |
1970 | "TARGET_SVE" |
1971 | "@ | |
1972 | add\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
1973 | sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
0fdc30bc | 1974 | * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]); |
5e176a61 RS |
1975 | movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2 |
1976 | movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
43cacb12 | 1977 | add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
5e176a61 | 1978 | [(set_attr "movprfx" "*,*,*,yes,yes,*")] |
43cacb12 RS |
1979 | ) |
1980 | ||
915d28fe RS |
1981 | ;; Merging forms are handled through SVE_INT_BINARY. |
1982 | ||
1983 | ;; ------------------------------------------------------------------------- | |
1984 | ;; ---- [INT] Subtraction | |
1985 | ;; ------------------------------------------------------------------------- | |
1986 | ;; Includes: | |
1987 | ;; - SUB | |
1988 | ;; - SUBR | |
1989 | ;; ------------------------------------------------------------------------- | |
1990 | ||
43cacb12 | 1991 | (define_insn "sub<mode>3" |
5e176a61 | 1992 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
43cacb12 | 1993 | (minus:SVE_I |
5e176a61 RS |
1994 | (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa") |
1995 | (match_operand:SVE_I 2 "register_operand" "w, 0, w")))] | |
43cacb12 RS |
1996 | "TARGET_SVE" |
1997 | "@ | |
1998 | sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> | |
5e176a61 RS |
1999 | subr\t%0.<Vetype>, %0.<Vetype>, #%D1 |
2000 | movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1" | |
2001 | [(set_attr "movprfx" "*,*,yes")] | |
43cacb12 RS |
2002 | ) |
2003 | ||
915d28fe RS |
2004 | ;; Merging forms are handled through SVE_INT_BINARY. |
2005 | ||
a229966c RS |
2006 | ;; ------------------------------------------------------------------------- |
2007 | ;; ---- [INT] Take address | |
2008 | ;; ------------------------------------------------------------------------- | |
2009 | ;; Includes: | |
2010 | ;; - ADR | |
2011 | ;; ------------------------------------------------------------------------- | |
2012 | ||
2013 | ;; Unshifted ADR, with the offset being zero-extended from the low 32 bits. | |
2014 | (define_insn "*aarch64_adr_uxtw" | |
2015 | [(set (match_operand:VNx2DI 0 "register_operand" "=w") | |
2016 | (plus:VNx2DI | |
2017 | (and:VNx2DI | |
2018 | (match_operand:VNx2DI 2 "register_operand" "w") | |
2019 | (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) | |
2020 | (match_operand:VNx2DI 1 "register_operand" "w")))] | |
2021 | "TARGET_SVE" | |
2022 | "adr\t%0.d, [%1.d, %2.d, uxtw]" | |
2023 | ) | |
2024 | ||
2025 | ;; ADR with a nonzero shift. | |
2026 | (define_insn_and_rewrite "*aarch64_adr<mode>_shift" | |
2027 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
2028 | (plus:SVE_SDI | |
2029 | (unspec:SVE_SDI | |
2030 | [(match_operand 4) | |
2031 | (ashift:SVE_SDI | |
2032 | (match_operand:SVE_SDI 2 "register_operand" "w") | |
2033 | (match_operand:SVE_SDI 3 "const_1_to_3_operand"))] | |
2034 | UNSPEC_PRED_X) | |
2035 | (match_operand:SVE_SDI 1 "register_operand" "w")))] | |
2036 | "TARGET_SVE" | |
2037 | "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]" | |
2038 | "&& !CONSTANT_P (operands[4])" | |
2039 | { | |
2040 | operands[4] = CONSTM1_RTX (<VPRED>mode); | |
2041 | } | |
2042 | ) | |
2043 | ||
2044 | ;; Same, but with the index being zero-extended from the low 32 bits. | |
2045 | (define_insn_and_rewrite "*aarch64_adr_shift_uxtw" | |
2046 | [(set (match_operand:VNx2DI 0 "register_operand" "=w") | |
2047 | (plus:VNx2DI | |
2048 | (unspec:VNx2DI | |
2049 | [(match_operand 5) | |
2050 | (ashift:VNx2DI | |
2051 | (and:VNx2DI | |
2052 | (match_operand:VNx2DI 2 "register_operand" "w") | |
2053 | (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate")) | |
2054 | (match_operand:VNx2DI 3 "const_1_to_3_operand"))] | |
2055 | UNSPEC_PRED_X) | |
2056 | (match_operand:VNx2DI 1 "register_operand" "w")))] | |
2057 | "TARGET_SVE" | |
2058 | "adr\t%0.d, [%1.d, %2.d, uxtw %3]" | |
2059 | "&& !CONSTANT_P (operands[5])" | |
2060 | { | |
2061 | operands[5] = CONSTM1_RTX (VNx2BImode); | |
2062 | } | |
2063 | ) | |
2064 | ||
915d28fe RS |
2065 | ;; ------------------------------------------------------------------------- |
2066 | ;; ---- [INT] Absolute difference | |
2067 | ;; ------------------------------------------------------------------------- | |
2068 | ;; Includes: | |
2069 | ;; - SABD | |
2070 | ;; - UABD | |
2071 | ;; ------------------------------------------------------------------------- | |
2072 | ||
2073 | ;; Unpredicated integer absolute difference. | |
2074 | (define_expand "<su>abd<mode>_3" | |
2075 | [(use (match_operand:SVE_I 0 "register_operand")) | |
2076 | (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand") | |
2077 | (match_operand:SVE_I 2 "register_operand"))] | |
2078 | "TARGET_SVE" | |
2079 | { | |
2080 | rtx pred = aarch64_ptrue_reg (<VPRED>mode); | |
2081 | emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1], | |
2082 | operands[2])); | |
2083 | DONE; | |
2084 | } | |
2085 | ) | |
2086 | ||
2087 | ;; Predicated integer absolute difference. | |
2088 | (define_insn "aarch64_<su>abd<mode>_3" | |
2089 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
2090 | (unspec:SVE_I | |
2091 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2092 | (minus:SVE_I | |
2093 | (USMAX:SVE_I | |
9a8d9b3f | 2094 | (match_operand:SVE_I 2 "register_operand" "%0, w") |
915d28fe RS |
2095 | (match_operand:SVE_I 3 "register_operand" "w, w")) |
2096 | (<max_opp>:SVE_I | |
2097 | (match_dup 2) | |
2098 | (match_dup 3)))] | |
06308276 | 2099 | UNSPEC_PRED_X))] |
915d28fe RS |
2100 | "TARGET_SVE" |
2101 | "@ | |
2102 | <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2103 | movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2104 | [(set_attr "movprfx" "*,yes")] | |
2105 | ) | |
2106 | ||
9730c5cc RS |
2107 | ;; Predicated integer absolute difference, merging with the first input. |
2108 | (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2" | |
2109 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
2110 | (unspec:SVE_I | |
2111 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2112 | (minus:SVE_I | |
2113 | (unspec:SVE_I | |
2114 | [(match_operand 4) | |
2115 | (USMAX:SVE_I | |
2116 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
2117 | (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
2118 | UNSPEC_PRED_X) | |
2119 | (unspec:SVE_I | |
2120 | [(match_operand 5) | |
2121 | (<max_opp>:SVE_I | |
2122 | (match_dup 2) | |
2123 | (match_dup 3))] | |
2124 | UNSPEC_PRED_X)) | |
2125 | (match_dup 2)] | |
2126 | UNSPEC_SEL))] | |
2127 | "TARGET_SVE" | |
2128 | "@ | |
2129 | <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2130 | movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2131 | "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" | |
2132 | { | |
2133 | operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode); | |
2134 | } | |
2135 | [(set_attr "movprfx" "*,yes")] | |
2136 | ) | |
2137 | ||
2138 | ;; Predicated integer absolute difference, merging with an independent value. | |
2139 | (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any" | |
2140 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
2141 | (unspec:SVE_I | |
2142 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
2143 | (minus:SVE_I | |
2144 | (unspec:SVE_I | |
2145 | [(match_operand 5) | |
2146 | (USMAX:SVE_I | |
2147 | (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") | |
2148 | (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))] | |
2149 | UNSPEC_PRED_X) | |
2150 | (unspec:SVE_I | |
2151 | [(match_operand 6) | |
2152 | (<max_opp>:SVE_I | |
2153 | (match_dup 2) | |
2154 | (match_dup 3))] | |
2155 | UNSPEC_PRED_X)) | |
2156 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
2157 | UNSPEC_SEL))] | |
2158 | "TARGET_SVE | |
2159 | && !rtx_equal_p (operands[2], operands[4]) | |
2160 | && !rtx_equal_p (operands[3], operands[4])" | |
2161 | "@ | |
2162 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2163 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2164 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2165 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2166 | #" | |
2167 | "&& 1" | |
2168 | { | |
2169 | if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6])) | |
2170 | operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode); | |
2171 | else if (reload_completed | |
2172 | && register_operand (operands[4], <MODE>mode) | |
2173 | && !rtx_equal_p (operands[0], operands[4])) | |
2174 | { | |
2175 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2176 | operands[4], operands[1])); | |
2177 | operands[4] = operands[2] = operands[0]; | |
2178 | } | |
2179 | else | |
2180 | FAIL; | |
2181 | } | |
2182 | [(set_attr "movprfx" "yes")] | |
2183 | ) | |
2184 | ||
915d28fe RS |
2185 | ;; ------------------------------------------------------------------------- |
2186 | ;; ---- [INT] Highpart multiplication | |
2187 | ;; ------------------------------------------------------------------------- | |
2188 | ;; Includes: | |
2189 | ;; - SMULH | |
2190 | ;; - UMULH | |
2191 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 2192 | |
11e9443f RS |
2193 | ;; Unpredicated highpart multiplication. |
2194 | (define_expand "<su>mul<mode>3_highpart" | |
2195 | [(set (match_operand:SVE_I 0 "register_operand") | |
2196 | (unspec:SVE_I | |
2197 | [(match_dup 3) | |
2198 | (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") | |
2199 | (match_operand:SVE_I 2 "register_operand")] | |
2200 | MUL_HIGHPART)] | |
06308276 | 2201 | UNSPEC_PRED_X))] |
11e9443f RS |
2202 | "TARGET_SVE" |
2203 | { | |
16de3637 | 2204 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
11e9443f RS |
2205 | } |
2206 | ) | |
2207 | ||
2208 | ;; Predicated highpart multiplication. | |
2209 | (define_insn "*<su>mul<mode>3_highpart" | |
a08acce8 | 2210 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") |
11e9443f | 2211 | (unspec:SVE_I |
a08acce8 RH |
2212 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
2213 | (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w") | |
2214 | (match_operand:SVE_I 3 "register_operand" "w, w")] | |
11e9443f | 2215 | MUL_HIGHPART)] |
06308276 | 2216 | UNSPEC_PRED_X))] |
11e9443f | 2217 | "TARGET_SVE" |
a08acce8 RH |
2218 | "@ |
2219 | <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2220 | movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2221 | [(set_attr "movprfx" "*,yes")] | |
11e9443f RS |
2222 | ) |
2223 | ||
915d28fe RS |
2224 | ;; ------------------------------------------------------------------------- |
2225 | ;; ---- [INT] Division | |
2226 | ;; ------------------------------------------------------------------------- | |
2227 | ;; Includes: | |
2228 | ;; - SDIV | |
2229 | ;; - SDIVR | |
2230 | ;; - UDIV | |
2231 | ;; - UDIVR | |
2232 | ;; ------------------------------------------------------------------------- | |
2233 | ||
2234 | ;; Unpredicated integer division. | |
c38f7319 RS |
2235 | (define_expand "<optab><mode>3" |
2236 | [(set (match_operand:SVE_SDI 0 "register_operand") | |
2237 | (unspec:SVE_SDI | |
2238 | [(match_dup 3) | |
2239 | (SVE_INT_BINARY_SD:SVE_SDI | |
2240 | (match_operand:SVE_SDI 1 "register_operand") | |
2241 | (match_operand:SVE_SDI 2 "register_operand"))] | |
06308276 | 2242 | UNSPEC_PRED_X))] |
c38f7319 RS |
2243 | "TARGET_SVE" |
2244 | { | |
16de3637 | 2245 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
c38f7319 RS |
2246 | } |
2247 | ) | |
2248 | ||
915d28fe | 2249 | ;; Integer division predicated with a PTRUE. |
c38f7319 | 2250 | (define_insn "*<optab><mode>3" |
a08acce8 | 2251 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w") |
c38f7319 | 2252 | (unspec:SVE_SDI |
a08acce8 | 2253 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
c38f7319 | 2254 | (SVE_INT_BINARY_SD:SVE_SDI |
a08acce8 | 2255 | (match_operand:SVE_SDI 2 "register_operand" "0, w, w") |
f8c22a8b | 2256 | (match_operand:SVE_SDI 3 "register_operand" "w, 0, w"))] |
06308276 | 2257 | UNSPEC_PRED_X))] |
c38f7319 RS |
2258 | "TARGET_SVE" |
2259 | "@ | |
2260 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
a08acce8 RH |
2261 | <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
2262 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2263 | [(set_attr "movprfx" "*,*,yes")] | |
c38f7319 RS |
2264 | ) |
2265 | ||
915d28fe RS |
2266 | ;; Predicated integer division with merging. |
2267 | (define_expand "cond_<optab><mode>" | |
2268 | [(set (match_operand:SVE_SDI 0 "register_operand") | |
2269 | (unspec:SVE_SDI | |
2270 | [(match_operand:<VPRED> 1 "register_operand") | |
2271 | (SVE_INT_BINARY_SD:SVE_SDI | |
2272 | (match_operand:SVE_SDI 2 "register_operand") | |
2273 | (match_operand:SVE_SDI 3 "register_operand")) | |
2274 | (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")] | |
2275 | UNSPEC_SEL))] | |
43cacb12 | 2276 | "TARGET_SVE" |
43cacb12 RS |
2277 | ) |
2278 | ||
915d28fe RS |
2279 | ;; Predicated integer division, merging with the first input. |
2280 | (define_insn "*cond_<optab><mode>_2" | |
2281 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
2282 | (unspec:SVE_SDI | |
2283 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2284 | (SVE_INT_BINARY_SD:SVE_SDI | |
2285 | (match_operand:SVE_SDI 2 "register_operand" "0, w") | |
2286 | (match_operand:SVE_SDI 3 "register_operand" "w, w")) | |
2287 | (match_dup 2)] | |
2288 | UNSPEC_SEL))] | |
43cacb12 | 2289 | "TARGET_SVE" |
915d28fe RS |
2290 | "@ |
2291 | <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2292 | movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2293 | [(set_attr "movprfx" "*,yes")] | |
2294 | ) | |
2295 | ||
2296 | ;; Predicated integer division, merging with the second input. | |
2297 | (define_insn "*cond_<optab><mode>_3" | |
2298 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
2299 | (unspec:SVE_SDI | |
2300 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2301 | (SVE_INT_BINARY_SD:SVE_SDI | |
2302 | (match_operand:SVE_SDI 2 "register_operand" "w, w") | |
2303 | (match_operand:SVE_SDI 3 "register_operand" "0, w")) | |
2304 | (match_dup 3)] | |
2305 | UNSPEC_SEL))] | |
2306 | "TARGET_SVE" | |
2307 | "@ | |
2308 | <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2309 | movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
2310 | [(set_attr "movprfx" "*,yes")] | |
2311 | ) | |
2312 | ||
2313 | ;; Predicated integer division, merging with an independent value. | |
2314 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
2315 | [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
2316 | (unspec:SVE_SDI | |
2317 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
2318 | (SVE_INT_BINARY_SD:SVE_SDI | |
2319 | (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w") | |
2320 | (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w")) | |
2321 | (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
2322 | UNSPEC_SEL))] | |
2323 | "TARGET_SVE | |
2324 | && !rtx_equal_p (operands[2], operands[4]) | |
2325 | && !rtx_equal_p (operands[3], operands[4])" | |
2326 | "@ | |
2327 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2328 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2329 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2330 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2331 | #" | |
2332 | "&& reload_completed | |
2333 | && register_operand (operands[4], <MODE>mode) | |
2334 | && !rtx_equal_p (operands[0], operands[4])" | |
2335 | { | |
2336 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2337 | operands[4], operands[1])); | |
2338 | operands[4] = operands[2] = operands[0]; | |
2339 | } | |
2340 | [(set_attr "movprfx" "yes")] | |
43cacb12 RS |
2341 | ) |
2342 | ||
915d28fe RS |
2343 | ;; ------------------------------------------------------------------------- |
2344 | ;; ---- [INT] Binary logical operations | |
2345 | ;; ------------------------------------------------------------------------- | |
2346 | ;; Includes: | |
2347 | ;; - AND | |
2348 | ;; - EOR | |
2349 | ;; - ORR | |
2350 | ;; ------------------------------------------------------------------------- | |
2351 | ||
2352 | ;; Unpredicated integer binary logical operations. | |
43cacb12 | 2353 | (define_insn "<optab><mode>3" |
5e176a61 | 2354 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w") |
43cacb12 | 2355 | (LOGICAL:SVE_I |
5e176a61 RS |
2356 | (match_operand:SVE_I 1 "register_operand" "%0, w, w") |
2357 | (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] | |
43cacb12 RS |
2358 | "TARGET_SVE" |
2359 | "@ | |
2360 | <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 | |
5e176a61 | 2361 | movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 |
43cacb12 | 2362 | <logical>\t%0.d, %1.d, %2.d" |
5e176a61 | 2363 | [(set_attr "movprfx" "*,yes,*")] |
43cacb12 RS |
2364 | ) |
2365 | ||
915d28fe RS |
2366 | ;; Merging forms are handled through SVE_INT_BINARY. |
2367 | ||
2368 | ;; ------------------------------------------------------------------------- | |
2369 | ;; ---- [INT] Binary logical operations (inverted second input) | |
2370 | ;; ------------------------------------------------------------------------- | |
2371 | ;; Includes: | |
2372 | ;; - BIC | |
2373 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 2374 | |
35d6c591 | 2375 | (define_insn_and_rewrite "*bic<mode>3" |
43cacb12 RS |
2376 | [(set (match_operand:SVE_I 0 "register_operand" "=w") |
2377 | (and:SVE_I | |
35d6c591 RS |
2378 | (unspec:SVE_I |
2379 | [(match_operand 3) | |
2380 | (not:SVE_I (match_operand:SVE_I 2 "register_operand" "w"))] | |
06308276 | 2381 | UNSPEC_PRED_X) |
35d6c591 RS |
2382 | (match_operand:SVE_I 1 "register_operand" "w")))] |
2383 | "TARGET_SVE" | |
2384 | "bic\t%0.d, %1.d, %2.d" | |
2385 | "&& !CONSTANT_P (operands[3])" | |
2386 | { | |
2387 | operands[3] = CONSTM1_RTX (<VPRED>mode); | |
2388 | } | |
43cacb12 RS |
2389 | ) |
2390 | ||
1b187f36 RS |
2391 | ;; Predicated integer BIC, merging with the first input. |
2392 | (define_insn "*cond_bic<mode>_2" | |
2393 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
2394 | (unspec:SVE_I | |
2395 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2396 | (and:SVE_I | |
2397 | (not:SVE_I (match_operand:SVE_I 3 "register_operand" "w, w")) | |
2398 | (match_operand:SVE_I 2 "register_operand" "0, w")) | |
2399 | (match_dup 2)] | |
2400 | UNSPEC_SEL))] | |
2401 | "TARGET_SVE" | |
2402 | "@ | |
2403 | bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2404 | movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2405 | [(set_attr "movprfx" "*,yes")] | |
2406 | ) | |
2407 | ||
2408 | ;; Predicated integer BIC, merging with an independent value. | |
2409 | (define_insn_and_rewrite "*cond_bic<mode>_any" | |
2410 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w") | |
2411 | (unspec:SVE_I | |
2412 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
2413 | (and:SVE_I | |
2414 | (not:SVE_I (match_operand:SVE_I 3 "register_operand" "w, w, w, w")) | |
2415 | (match_operand:SVE_I 2 "register_operand" "0, w, w, w")) | |
2416 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] | |
2417 | UNSPEC_SEL))] | |
2418 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
2419 | "@ | |
2420 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2421 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2422 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2423 | #" | |
2424 | "&& reload_completed | |
2425 | && register_operand (operands[4], <MODE>mode) | |
2426 | && !rtx_equal_p (operands[0], operands[4])" | |
2427 | { | |
2428 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2429 | operands[4], operands[1])); | |
2430 | operands[4] = operands[2] = operands[0]; | |
2431 | } | |
2432 | [(set_attr "movprfx" "yes")] | |
2433 | ) | |
2434 | ||
915d28fe RS |
2435 | ;; ------------------------------------------------------------------------- |
2436 | ;; ---- [INT] Shifts | |
2437 | ;; ------------------------------------------------------------------------- | |
2438 | ;; Includes: | |
2439 | ;; - ASR | |
2440 | ;; - LSL | |
2441 | ;; - LSR | |
2442 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 2443 | |
915d28fe RS |
2444 | ;; Unpredicated shift by a scalar, which expands into one of the vector |
2445 | ;; shifts below. | |
2446 | (define_expand "<ASHIFT:optab><mode>3" | |
2447 | [(set (match_operand:SVE_I 0 "register_operand") | |
2448 | (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") | |
2449 | (match_operand:<VEL> 2 "general_operand")))] | |
43cacb12 RS |
2450 | "TARGET_SVE" |
2451 | { | |
915d28fe RS |
2452 | rtx amount; |
2453 | if (CONST_INT_P (operands[2])) | |
2454 | { | |
2455 | amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); | |
2456 | if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) | |
2457 | amount = force_reg (<MODE>mode, amount); | |
2458 | } | |
2459 | else | |
2460 | { | |
2461 | amount = gen_reg_rtx (<MODE>mode); | |
2462 | emit_insn (gen_vec_duplicate<mode> (amount, | |
2463 | convert_to_mode (<VEL>mode, | |
2464 | operands[2], 0))); | |
2465 | } | |
2466 | emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); | |
2467 | DONE; | |
43cacb12 RS |
2468 | } |
2469 | ) | |
2470 | ||
915d28fe | 2471 | ;; Unpredicated shift by a vector. |
43cacb12 RS |
2472 | (define_expand "v<optab><mode>3" |
2473 | [(set (match_operand:SVE_I 0 "register_operand") | |
2474 | (unspec:SVE_I | |
2475 | [(match_dup 3) | |
2476 | (ASHIFT:SVE_I | |
2477 | (match_operand:SVE_I 1 "register_operand") | |
2478 | (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] | |
06308276 | 2479 | UNSPEC_PRED_X))] |
43cacb12 RS |
2480 | "TARGET_SVE" |
2481 | { | |
16de3637 | 2482 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
2483 | } |
2484 | ) | |
2485 | ||
915d28fe RS |
2486 | ;; Shift by a vector, predicated with a PTRUE. We don't actually need |
2487 | ;; the predicate for the first alternative, but using Upa or X isn't | |
2488 | ;; likely to gain much and would make the instruction seem less uniform | |
2489 | ;; to the register allocator. | |
26004f51 | 2490 | (define_insn_and_split "*v<optab><mode>3" |
7d1f2401 | 2491 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w") |
43cacb12 | 2492 | (unspec:SVE_I |
7d1f2401 | 2493 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
43cacb12 | 2494 | (ASHIFT:SVE_I |
7d1f2401 RS |
2495 | (match_operand:SVE_I 2 "register_operand" "w, 0, w, w") |
2496 | (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))] | |
06308276 | 2497 | UNSPEC_PRED_X))] |
43cacb12 RS |
2498 | "TARGET_SVE" |
2499 | "@ | |
26004f51 | 2500 | # |
a08acce8 | 2501 | <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
7d1f2401 | 2502 | <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype> |
a08acce8 | 2503 | movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" |
26004f51 RS |
2504 | "&& reload_completed |
2505 | && !register_operand (operands[3], <MODE>mode)" | |
2506 | [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] | |
2507 | "" | |
7d1f2401 | 2508 | [(set_attr "movprfx" "*,*,*,yes")] |
43cacb12 RS |
2509 | ) |
2510 | ||
26004f51 RS |
2511 | ;; Unpredicated shift operations by a constant (post-RA only). |
2512 | ;; These are generated by splitting a predicated instruction whose | |
2513 | ;; predicate is unused. | |
2514 | (define_insn "*post_ra_v<optab><mode>3" | |
2515 | [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
2516 | (ASHIFT:SVE_I | |
2517 | (match_operand:SVE_I 1 "register_operand" "w") | |
2518 | (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))] | |
2519 | "TARGET_SVE && reload_completed" | |
2520 | "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" | |
2521 | ) | |
2522 | ||
20103c0e RS |
2523 | ;; Predicated integer shift, merging with the first input. |
2524 | (define_insn "*cond_<optab><mode>_2_const" | |
2525 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
2526 | (unspec:SVE_I | |
2527 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2528 | (ASHIFT:SVE_I | |
2529 | (match_operand:SVE_I 2 "register_operand" "0, w") | |
2530 | (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")) | |
2531 | (match_dup 2)] | |
2532 | UNSPEC_SEL))] | |
2533 | "TARGET_SVE" | |
2534 | "@ | |
2535 | <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2536 | movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" | |
2537 | [(set_attr "movprfx" "*,yes")] | |
2538 | ) | |
2539 | ||
2540 | ;; Predicated integer shift, merging with an independent value. | |
2541 | (define_insn_and_rewrite "*cond_<optab><mode>_any_const" | |
2542 | [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w") | |
2543 | (unspec:SVE_I | |
2544 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2545 | (ASHIFT:SVE_I | |
2546 | (match_operand:SVE_I 2 "register_operand" "w, w, w") | |
2547 | (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")) | |
2548 | (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
2549 | UNSPEC_SEL))] | |
2550 | "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" | |
2551 | "@ | |
2552 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2553 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2554 | #" | |
2555 | "&& reload_completed | |
2556 | && register_operand (operands[4], <MODE>mode) | |
2557 | && !rtx_equal_p (operands[0], operands[4])" | |
2558 | { | |
2559 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2560 | operands[4], operands[1])); | |
2561 | operands[4] = operands[2] = operands[0]; | |
2562 | } | |
2563 | [(set_attr "movprfx" "yes")] | |
2564 | ) | |
2565 | ||
915d28fe RS |
2566 | ;; ------------------------------------------------------------------------- |
2567 | ;; ---- [FP] General binary arithmetic corresponding to rtx codes | |
2568 | ;; ------------------------------------------------------------------------- | |
2569 | ;; Includes post-RA forms of: | |
2570 | ;; - FADD | |
2571 | ;; - FMUL | |
2572 | ;; - FSUB | |
2573 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 2574 | |
915d28fe RS |
2575 | ;; Unpredicated floating-point binary operations (post-RA only). |
2576 | ;; These are generated by splitting a predicated instruction whose | |
2577 | ;; predicate is unused. | |
2578 | (define_insn "*post_ra_<sve_fp_op><mode>3" | |
2579 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2580 | (SVE_UNPRED_FP_BINARY:SVE_F | |
2581 | (match_operand:SVE_F 1 "register_operand" "w") | |
2582 | (match_operand:SVE_F 2 "register_operand" "w")))] | |
2583 | "TARGET_SVE && reload_completed" | |
2584 | "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") | |
2585 | ||
2586 | ;; ------------------------------------------------------------------------- | |
2587 | ;; ---- [FP] General binary arithmetic corresponding to unspecs | |
2588 | ;; ------------------------------------------------------------------------- | |
2589 | ;; Includes merging forms of: | |
a19ba9e1 | 2590 | ;; - FADD (constant forms handled in the "Addition" section) |
915d28fe RS |
2591 | ;; - FDIV |
2592 | ;; - FDIVR | |
a19ba9e1 RS |
2593 | ;; - FMAXNM (including #0.0 and #1.0) |
2594 | ;; - FMINNM (including #0.0 and #1.0) | |
2595 | ;; - FMUL (including #0.5 and #2.0) | |
2596 | ;; - FSUB (constant forms handled in the "Addition" section) | |
2597 | ;; - FSUBR (constant forms handled in the "Subtraction" section) | |
915d28fe RS |
2598 | ;; ------------------------------------------------------------------------- |
2599 | ||
0254ed79 RS |
2600 | ;; Unpredicated floating-point binary operations. |
2601 | (define_expand "<optab><mode>3" | |
2602 | [(set (match_operand:SVE_F 0 "register_operand") | |
2603 | (unspec:SVE_F | |
2604 | [(match_dup 3) | |
2605 | (const_int SVE_RELAXED_GP) | |
2606 | (match_operand:SVE_F 1 "<sve_pred_fp_rhs1_operand>") | |
2607 | (match_operand:SVE_F 2 "<sve_pred_fp_rhs2_operand>")] | |
2608 | SVE_COND_FP_BINARY))] | |
2609 | "TARGET_SVE" | |
2610 | { | |
2611 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
2612 | } | |
2613 | ) | |
2614 | ||
2615 | ;; Predicated floating-point binary operations that have no immediate forms. | |
2616 | (define_insn "*<optab><mode>3" | |
2617 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
2618 | (unspec:SVE_F | |
2619 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2620 | (match_operand:SI 4 "aarch64_sve_gp_strictness") | |
2621 | (match_operand:SVE_F 2 "register_operand" "0, w, w") | |
2622 | (match_operand:SVE_F 3 "register_operand" "w, 0, w")] | |
2623 | SVE_COND_FP_BINARY_REG))] | |
2624 | "TARGET_SVE" | |
2625 | "@ | |
2626 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2627 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2628 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2629 | [(set_attr "movprfx" "*,*,yes")] | |
2630 | ) | |
2631 | ||
915d28fe RS |
2632 | ;; Predicated floating-point operations with merging. |
2633 | (define_expand "cond_<optab><mode>" | |
2634 | [(set (match_operand:SVE_F 0 "register_operand") | |
2635 | (unspec:SVE_F | |
2636 | [(match_operand:<VPRED> 1 "register_operand") | |
2637 | (unspec:SVE_F | |
6fe679cc | 2638 | [(match_dup 1) |
c9c5a809 | 2639 | (const_int SVE_STRICT_GP) |
a19ba9e1 RS |
2640 | (match_operand:SVE_F 2 "<sve_pred_fp_rhs1_operand>") |
2641 | (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_operand>")] | |
915d28fe RS |
2642 | SVE_COND_FP_BINARY) |
2643 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] | |
2644 | UNSPEC_SEL))] | |
43cacb12 | 2645 | "TARGET_SVE" |
43cacb12 RS |
2646 | ) |
2647 | ||
915d28fe | 2648 | ;; Predicated floating-point operations, merging with the first input. |
c9c5a809 | 2649 | (define_insn_and_rewrite "*cond_<optab><mode>_2" |
915d28fe RS |
2650 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
2651 | (unspec:SVE_F | |
57d6f4d0 | 2652 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
915d28fe | 2653 | (unspec:SVE_F |
c9c5a809 RS |
2654 | [(match_operand 4) |
2655 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
6fe679cc | 2656 | (match_operand:SVE_F 2 "register_operand" "0, w") |
915d28fe RS |
2657 | (match_operand:SVE_F 3 "register_operand" "w, w")] |
2658 | SVE_COND_FP_BINARY) | |
2659 | (match_dup 2)] | |
2660 | UNSPEC_SEL))] | |
c9c5a809 | 2661 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" |
43cacb12 | 2662 | "@ |
915d28fe RS |
2663 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
2664 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
c9c5a809 RS |
2665 | "&& !rtx_equal_p (operands[1], operands[4])" |
2666 | { | |
2667 | operands[4] = copy_rtx (operands[1]); | |
2668 | } | |
915d28fe | 2669 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
2670 | ) |
2671 | ||
a19ba9e1 RS |
2672 | ;; Same for operations that take a 1-bit constant. |
2673 | (define_insn_and_rewrite "*cond_<optab><mode>_2_const" | |
2674 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?w") | |
2675 | (unspec:SVE_F | |
2676 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2677 | (unspec:SVE_F | |
2678 | [(match_operand 4) | |
2679 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
2680 | (match_operand:SVE_F 2 "register_operand" "0, w") | |
2681 | (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] | |
2682 | SVE_COND_FP_BINARY_I1) | |
2683 | (match_dup 2)] | |
2684 | UNSPEC_SEL))] | |
2685 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
2686 | "@ | |
2687 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2688 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" | |
2689 | "&& !rtx_equal_p (operands[1], operands[4])" | |
2690 | { | |
2691 | operands[4] = copy_rtx (operands[1]); | |
2692 | } | |
2693 | [(set_attr "movprfx" "*,yes")] | |
2694 | ) | |
2695 | ||
915d28fe | 2696 | ;; Predicated floating-point operations, merging with the second input. |
c9c5a809 | 2697 | (define_insn_and_rewrite "*cond_<optab><mode>_3" |
915d28fe RS |
2698 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
2699 | (unspec:SVE_F | |
57d6f4d0 | 2700 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
915d28fe | 2701 | (unspec:SVE_F |
c9c5a809 RS |
2702 | [(match_operand 4) |
2703 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
6fe679cc | 2704 | (match_operand:SVE_F 2 "register_operand" "w, w") |
915d28fe RS |
2705 | (match_operand:SVE_F 3 "register_operand" "0, w")] |
2706 | SVE_COND_FP_BINARY) | |
2707 | (match_dup 3)] | |
2708 | UNSPEC_SEL))] | |
c9c5a809 | 2709 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" |
43cacb12 | 2710 | "@ |
915d28fe RS |
2711 | <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> |
2712 | movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
c9c5a809 RS |
2713 | "&& !rtx_equal_p (operands[1], operands[4])" |
2714 | { | |
2715 | operands[4] = copy_rtx (operands[1]); | |
2716 | } | |
915d28fe | 2717 | [(set_attr "movprfx" "*,yes")] |
cee99fa0 RS |
2718 | ) |
2719 | ||
915d28fe RS |
2720 | ;; Predicated floating-point operations, merging with an independent value. |
2721 | (define_insn_and_rewrite "*cond_<optab><mode>_any" | |
2722 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
2723 | (unspec:SVE_F | |
2724 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
2725 | (unspec:SVE_F | |
c9c5a809 RS |
2726 | [(match_operand 5) |
2727 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
6fe679cc | 2728 | (match_operand:SVE_F 2 "register_operand" "0, w, w, w, w") |
915d28fe RS |
2729 | (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")] |
2730 | SVE_COND_FP_BINARY) | |
2731 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
2732 | UNSPEC_SEL))] | |
2733 | "TARGET_SVE | |
2734 | && !rtx_equal_p (operands[2], operands[4]) | |
c9c5a809 RS |
2735 | && !rtx_equal_p (operands[3], operands[4]) |
2736 | && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" | |
cee99fa0 | 2737 | "@ |
915d28fe RS |
2738 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
2739 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2740 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2741 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2742 | #" | |
c9c5a809 | 2743 | "&& 1" |
915d28fe | 2744 | { |
c9c5a809 RS |
2745 | if (reload_completed |
2746 | && register_operand (operands[4], <MODE>mode) | |
2747 | && !rtx_equal_p (operands[0], operands[4])) | |
2748 | { | |
2749 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2750 | operands[4], operands[1])); | |
2751 | operands[4] = operands[2] = operands[0]; | |
2752 | } | |
2753 | else if (!rtx_equal_p (operands[1], operands[5])) | |
2754 | operands[5] = copy_rtx (operands[1]); | |
2755 | else | |
2756 | FAIL; | |
915d28fe RS |
2757 | } |
2758 | [(set_attr "movprfx" "yes")] | |
cee99fa0 RS |
2759 | ) |
2760 | ||
a19ba9e1 RS |
2761 | ;; Same for operations that take a 1-bit constant. |
2762 | (define_insn_and_rewrite "*cond_<optab><mode>_any_const" | |
2763 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?w") | |
2764 | (unspec:SVE_F | |
2765 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2766 | (unspec:SVE_F | |
2767 | [(match_operand 5) | |
2768 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
2769 | (match_operand:SVE_F 2 "register_operand" "w, w, w") | |
2770 | (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] | |
2771 | SVE_COND_FP_BINARY_I1) | |
2772 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
2773 | UNSPEC_SEL))] | |
2774 | "TARGET_SVE | |
2775 | && !rtx_equal_p (operands[2], operands[4]) | |
2776 | && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" | |
2777 | "@ | |
2778 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2779 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2780 | #" | |
2781 | "&& 1" | |
2782 | { | |
2783 | if (reload_completed | |
2784 | && register_operand (operands[4], <MODE>mode) | |
2785 | && !rtx_equal_p (operands[0], operands[4])) | |
2786 | { | |
2787 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2788 | operands[4], operands[1])); | |
2789 | operands[4] = operands[2] = operands[0]; | |
2790 | } | |
2791 | else if (!rtx_equal_p (operands[1], operands[5])) | |
2792 | operands[5] = copy_rtx (operands[1]); | |
2793 | else | |
2794 | FAIL; | |
2795 | } | |
2796 | [(set_attr "movprfx" "yes")] | |
2797 | ) | |
2798 | ||
915d28fe RS |
2799 | ;; ------------------------------------------------------------------------- |
2800 | ;; ---- [FP] Addition | |
2801 | ;; ------------------------------------------------------------------------- | |
2802 | ;; Includes: | |
2803 | ;; - FADD | |
2804 | ;; - FSUB | |
2805 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 2806 | |
c9c5a809 | 2807 | ;; Predicated floating-point addition. |
915d28fe | 2808 | (define_insn_and_split "*add<mode>3" |
5e176a61 | 2809 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, ?&w, ?&w") |
915d28fe | 2810 | (unspec:SVE_F |
5e176a61 RS |
2811 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") |
2812 | (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, i, i") | |
2813 | (match_operand:SVE_F 2 "register_operand" "%0, 0, w, w, w") | |
2814 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, vsA, vsN")] | |
6fe679cc | 2815 | UNSPEC_COND_FADD))] |
cee99fa0 | 2816 | "TARGET_SVE" |
915d28fe RS |
2817 | "@ |
2818 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2819 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
5e176a61 RS |
2820 | # |
2821 | movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2822 | movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3" | |
915d28fe RS |
2823 | ; Split the unpredicated form after reload, so that we don't have |
2824 | ; the unnecessary PTRUE. | |
2825 | "&& reload_completed | |
2826 | && register_operand (operands[3], <MODE>mode)" | |
2827 | [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))] | |
5e176a61 RS |
2828 | "" |
2829 | [(set_attr "movprfx" "*,*,*,yes,yes")] | |
cee99fa0 RS |
2830 | ) |
2831 | ||
a19ba9e1 RS |
2832 | ;; Predicated floating-point addition of a constant, merging with the |
2833 | ;; first input. | |
2834 | (define_insn_and_rewrite "*cond_add<mode>_2_const" | |
2835 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?w, ?w") | |
2836 | (unspec:SVE_F | |
2837 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
2838 | (unspec:SVE_F | |
2839 | [(match_operand 4) | |
2840 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
2841 | (match_operand:SVE_F 2 "register_operand" "0, 0, w, w") | |
2842 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] | |
2843 | UNSPEC_COND_FADD) | |
2844 | (match_dup 2)] | |
2845 | UNSPEC_SEL))] | |
2846 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
2847 | "@ | |
2848 | fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2849 | fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2850 | movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2851 | movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3" | |
2852 | "&& !rtx_equal_p (operands[1], operands[4])" | |
2853 | { | |
2854 | operands[4] = copy_rtx (operands[1]); | |
2855 | } | |
2856 | [(set_attr "movprfx" "*,*,yes,yes")] | |
2857 | ) | |
2858 | ||
2859 | ;; Predicated floating-point addition of a constant, merging with an | |
2860 | ;; independent value. | |
2861 | (define_insn_and_rewrite "*cond_add<mode>_any_const" | |
2862 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w, ?w, ?w") | |
2863 | (unspec:SVE_F | |
2864 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
2865 | (unspec:SVE_F | |
2866 | [(match_operand 5) | |
2867 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
2868 | (match_operand:SVE_F 2 "register_operand" "w, w, w, w, w, w") | |
2869 | (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] | |
2870 | UNSPEC_COND_FADD) | |
2871 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] | |
2872 | UNSPEC_SEL))] | |
2873 | "TARGET_SVE | |
2874 | && !rtx_equal_p (operands[2], operands[4]) | |
2875 | && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" | |
2876 | "@ | |
2877 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2878 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2879 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2880 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2881 | # | |
2882 | #" | |
2883 | "&& 1" | |
2884 | { | |
2885 | if (reload_completed | |
2886 | && register_operand (operands[4], <MODE>mode) | |
2887 | && !rtx_equal_p (operands[0], operands[4])) | |
2888 | { | |
2889 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], | |
2890 | operands[4], operands[1])); | |
2891 | operands[4] = operands[2] = operands[0]; | |
2892 | } | |
2893 | else if (!rtx_equal_p (operands[1], operands[5])) | |
2894 | operands[5] = copy_rtx (operands[1]); | |
2895 | else | |
2896 | FAIL; | |
2897 | } | |
2898 | [(set_attr "movprfx" "yes")] | |
2899 | ) | |
2900 | ||
2901 | ;; Register merging forms are handled through SVE_COND_FP_BINARY. | |
cee99fa0 | 2902 | |
915d28fe RS |
2903 | ;; ------------------------------------------------------------------------- |
2904 | ;; ---- [FP] Subtraction | |
2905 | ;; ------------------------------------------------------------------------- | |
2906 | ;; Includes: | |
915d28fe RS |
2907 | ;; - FSUB |
2908 | ;; - FSUBR | |
2909 | ;; ------------------------------------------------------------------------- | |
cee99fa0 | 2910 | |
c9c5a809 | 2911 | ;; Predicated floating-point subtraction. |
915d28fe | 2912 | (define_insn_and_split "*sub<mode>3" |
2ae21bd1 | 2913 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
915d28fe | 2914 | (unspec:SVE_F |
2ae21bd1 RS |
2915 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
2916 | (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, i") | |
2917 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "vsA, w, vsA") | |
2918 | (match_operand:SVE_F 3 "register_operand" "0, w, 0")] | |
6fe679cc | 2919 | UNSPEC_COND_FSUB))] |
2ae21bd1 | 2920 | "TARGET_SVE" |
f22d7973 | 2921 | "@ |
915d28fe | 2922 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 |
2ae21bd1 RS |
2923 | # |
2924 | movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2" | |
915d28fe RS |
2925 | ; Split the unpredicated form after reload, so that we don't have |
2926 | ; the unnecessary PTRUE. | |
2927 | "&& reload_completed | |
2ae21bd1 | 2928 | && register_operand (operands[2], <MODE>mode)" |
915d28fe | 2929 | [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))] |
2ae21bd1 RS |
2930 | "" |
2931 | [(set_attr "movprfx" "*,*,yes")] | |
f22d7973 RS |
2932 | ) |
2933 | ||
a19ba9e1 RS |
2934 | ;; Predicated floating-point subtraction from a constant, merging with the |
2935 | ;; second input. | |
2936 | (define_insn_and_rewrite "*cond_sub<mode>_3_const" | |
2937 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?w") | |
2938 | (unspec:SVE_F | |
2939 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2940 | (unspec:SVE_F | |
2941 | [(match_operand 4) | |
2942 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
2943 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") | |
2944 | (match_operand:SVE_F 3 "register_operand" "0, w")] | |
2945 | UNSPEC_COND_FSUB) | |
2946 | (match_dup 3)] | |
2947 | UNSPEC_SEL))] | |
2948 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
2949 | "@ | |
2950 | fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
2951 | movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2" | |
2952 | "&& !rtx_equal_p (operands[1], operands[4])" | |
2953 | { | |
2954 | operands[4] = copy_rtx (operands[1]); | |
2955 | } | |
2956 | [(set_attr "movprfx" "*,yes")] | |
2957 | ) | |
2958 | ||
2959 | ;; Predicated floating-point subtraction from a constant, merging with an | |
2960 | ;; independent value. | |
2961 | (define_insn_and_rewrite "*cond_sub<mode>_any_const" | |
2962 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?w") | |
2963 | (unspec:SVE_F | |
2964 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2965 | (unspec:SVE_F | |
2966 | [(match_operand 5) | |
2967 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
2968 | (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") | |
2969 | (match_operand:SVE_F 3 "register_operand" "w, w, w")] | |
2970 | UNSPEC_COND_FSUB) | |
2971 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
2972 | UNSPEC_SEL))] | |
2973 | "TARGET_SVE | |
2974 | && !rtx_equal_p (operands[3], operands[4]) | |
2975 | && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" | |
2976 | "@ | |
2977 | movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
2978 | movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
2979 | #" | |
2980 | "&& 1" | |
2981 | { | |
2982 | if (reload_completed | |
2983 | && register_operand (operands[4], <MODE>mode) | |
2984 | && !rtx_equal_p (operands[0], operands[4])) | |
2985 | { | |
2986 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], | |
2987 | operands[4], operands[1])); | |
2988 | operands[4] = operands[3] = operands[0]; | |
2989 | } | |
2990 | else if (!rtx_equal_p (operands[1], operands[5])) | |
2991 | operands[5] = copy_rtx (operands[1]); | |
2992 | else | |
2993 | FAIL; | |
2994 | } | |
2995 | [(set_attr "movprfx" "yes")] | |
2996 | ) | |
2997 | ||
2998 | ;; Register merging forms are handled through SVE_COND_FP_BINARY. | |
43cacb12 | 2999 | |
915d28fe RS |
3000 | ;; ------------------------------------------------------------------------- |
3001 | ;; ---- [FP] Absolute difference | |
3002 | ;; ------------------------------------------------------------------------- | |
3003 | ;; Includes: | |
3004 | ;; - FABD | |
3005 | ;; ------------------------------------------------------------------------- | |
3006 | ||
3007 | ;; Predicated floating-point absolute difference. | |
c9c5a809 | 3008 | (define_insn_and_rewrite "*fabd<mode>3" |
5e176a61 | 3009 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
915d28fe | 3010 | (unspec:SVE_F |
5e176a61 | 3011 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
c9c5a809 | 3012 | (match_operand:SI 4 "aarch64_sve_gp_strictness") |
6fe679cc | 3013 | (unspec:SVE_F |
c9c5a809 RS |
3014 | [(match_operand 5) |
3015 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
5e176a61 RS |
3016 | (match_operand:SVE_F 2 "register_operand" "%0, w") |
3017 | (match_operand:SVE_F 3 "register_operand" "w, w")] | |
6fe679cc RS |
3018 | UNSPEC_COND_FSUB)] |
3019 | UNSPEC_COND_FABS))] | |
c9c5a809 | 3020 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" |
5e176a61 RS |
3021 | "@ |
3022 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3023 | movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
c9c5a809 RS |
3024 | "&& !rtx_equal_p (operands[1], operands[5])" |
3025 | { | |
3026 | operands[5] = copy_rtx (operands[1]); | |
3027 | } | |
5e176a61 | 3028 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
3029 | ) |
3030 | ||
bf30864e RS |
3031 | ;; Predicated floating-point absolute difference, merging with the first |
3032 | ;; input. | |
3033 | (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2" | |
3034 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
3035 | (unspec:SVE_F | |
3036 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3037 | (unspec:SVE_F | |
3038 | [(match_operand 4) | |
3039 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
3040 | (unspec:SVE_F | |
3041 | [(match_operand 6) | |
3042 | (match_operand:SI 7 "aarch64_sve_gp_strictness") | |
3043 | (match_operand:SVE_F 2 "register_operand" "0, w") | |
3044 | (match_operand:SVE_F 3 "register_operand" "w, w")] | |
3045 | UNSPEC_COND_FSUB)] | |
3046 | UNSPEC_COND_FABS) | |
3047 | (match_dup 2)] | |
3048 | UNSPEC_SEL))] | |
3049 | "TARGET_SVE | |
3050 | && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) | |
3051 | && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" | |
3052 | "@ | |
3053 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3054 | movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
3055 | "&& (!rtx_equal_p (operands[1], operands[4]) | |
3056 | || !rtx_equal_p (operands[1], operands[6]))" | |
3057 | { | |
3058 | operands[4] = copy_rtx (operands[1]); | |
3059 | operands[6] = copy_rtx (operands[1]); | |
3060 | } | |
3061 | [(set_attr "movprfx" "*,yes")] | |
3062 | ) | |
3063 | ||
3064 | ;; Predicated floating-point absolute difference, merging with the second | |
3065 | ;; input. | |
3066 | (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3" | |
3067 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
3068 | (unspec:SVE_F | |
3069 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3070 | (unspec:SVE_F | |
3071 | [(match_operand 4) | |
3072 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
3073 | (unspec:SVE_F | |
3074 | [(match_operand 6) | |
3075 | (match_operand:SI 7 "aarch64_sve_gp_strictness") | |
3076 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
3077 | (match_operand:SVE_F 3 "register_operand" "0, w")] | |
3078 | UNSPEC_COND_FSUB)] | |
3079 | UNSPEC_COND_FABS) | |
3080 | (match_dup 3)] | |
3081 | UNSPEC_SEL))] | |
3082 | "TARGET_SVE | |
3083 | && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) | |
3084 | && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" | |
3085 | "@ | |
3086 | fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
3087 | movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
3088 | "&& (!rtx_equal_p (operands[1], operands[4]) | |
3089 | || !rtx_equal_p (operands[1], operands[6]))" | |
3090 | { | |
3091 | operands[4] = copy_rtx (operands[1]); | |
3092 | operands[6] = copy_rtx (operands[1]); | |
3093 | } | |
3094 | [(set_attr "movprfx" "*,yes")] | |
3095 | ) | |
3096 | ||
3097 | ;; Predicated floating-point absolute difference, merging with an | |
3098 | ;; independent value. | |
3099 | (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any" | |
3100 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") | |
3101 | (unspec:SVE_F | |
3102 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
3103 | (unspec:SVE_F | |
3104 | [(match_operand 5) | |
3105 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
3106 | (unspec:SVE_F | |
3107 | [(match_operand 7) | |
3108 | (match_operand:SI 8 "aarch64_sve_gp_strictness") | |
3109 | (match_operand:SVE_F 2 "register_operand" "0, w, w, w, w") | |
3110 | (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")] | |
3111 | UNSPEC_COND_FSUB)] | |
3112 | UNSPEC_COND_FABS) | |
3113 | (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] | |
3114 | UNSPEC_SEL))] | |
3115 | "TARGET_SVE | |
3116 | && !rtx_equal_p (operands[2], operands[4]) | |
3117 | && !rtx_equal_p (operands[3], operands[4]) | |
3118 | && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) | |
3119 | && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" | |
3120 | "@ | |
3121 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3122 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
3123 | movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3124 | movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
3125 | #" | |
3126 | "&& 1" | |
3127 | { | |
3128 | if (reload_completed | |
3129 | && register_operand (operands[4], <MODE>mode) | |
3130 | && !rtx_equal_p (operands[0], operands[4])) | |
3131 | { | |
3132 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], | |
3133 | operands[4], operands[1])); | |
3134 | operands[4] = operands[3] = operands[0]; | |
3135 | } | |
3136 | else if (!rtx_equal_p (operands[1], operands[5]) | |
3137 | || !rtx_equal_p (operands[1], operands[7])) | |
3138 | { | |
3139 | operands[5] = copy_rtx (operands[1]); | |
3140 | operands[7] = copy_rtx (operands[1]); | |
3141 | } | |
3142 | else | |
3143 | FAIL; | |
3144 | } | |
3145 | [(set_attr "movprfx" "yes")] | |
3146 | ) | |
3147 | ||
915d28fe RS |
3148 | ;; ------------------------------------------------------------------------- |
3149 | ;; ---- [FP] Multiplication | |
3150 | ;; ------------------------------------------------------------------------- | |
3151 | ;; Includes: | |
3152 | ;; - FMUL | |
3153 | ;; ------------------------------------------------------------------------- | |
3154 | ||
c9c5a809 | 3155 | ;; Predicated floating-point multiplication. |
915d28fe | 3156 | (define_insn_and_split "*mul<mode>3" |
5e176a61 | 3157 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") |
915d28fe | 3158 | (unspec:SVE_F |
5e176a61 RS |
3159 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") |
3160 | (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, i") | |
3161 | (match_operand:SVE_F 2 "register_operand" "%0, w, 0") | |
3162 | (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w, vsM")] | |
6fe679cc | 3163 | UNSPEC_COND_FMUL))] |
43cacb12 | 3164 | "TARGET_SVE" |
915d28fe RS |
3165 | "@ |
3166 | fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
5e176a61 RS |
3167 | # |
3168 | movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" | |
915d28fe RS |
3169 | ; Split the unpredicated form after reload, so that we don't have |
3170 | ; the unnecessary PTRUE. | |
3171 | "&& reload_completed | |
3172 | && register_operand (operands[3], <MODE>mode)" | |
3173 | [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))] | |
5e176a61 RS |
3174 | "" |
3175 | [(set_attr "movprfx" "*,*,yes")] | |
43cacb12 RS |
3176 | ) |
3177 | ||
a19ba9e1 RS |
3178 | ;; Merging forms are handled through SVE_COND_FP_BINARY and |
3179 | ;; SVE_COND_FP_BINARY_I1. | |
915d28fe | 3180 | |
915d28fe RS |
3181 | ;; ------------------------------------------------------------------------- |
3182 | ;; ---- [FP] Binary logical operations | |
3183 | ;; ------------------------------------------------------------------------- | |
3184 | ;; Includes | |
3185 | ;; - AND | |
3186 | ;; - EOR | |
3187 | ;; - ORR | |
3188 | ;; ------------------------------------------------------------------------- | |
3189 | ||
3190 | ;; Binary logical operations on floating-point modes. We avoid subregs | |
3191 | ;; by providing this, but we need to use UNSPECs since rtx logical ops | |
3192 | ;; aren't defined for floating-point modes. | |
3193 | (define_insn "*<optab><mode>3" | |
3194 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
3195 | (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") | |
3196 | (match_operand:SVE_F 2 "register_operand" "w")] | |
3197 | LOGICALF))] | |
43cacb12 | 3198 | "TARGET_SVE" |
915d28fe | 3199 | "<logicalf_op>\t%0.d, %1.d, %2.d" |
43cacb12 RS |
3200 | ) |
3201 | ||
915d28fe RS |
3202 | ;; ------------------------------------------------------------------------- |
3203 | ;; ---- [FP] Sign copying | |
3204 | ;; ------------------------------------------------------------------------- | |
3205 | ;; The patterns in this section are synthetic. | |
3206 | ;; ------------------------------------------------------------------------- | |
3207 | ||
3208 | (define_expand "copysign<mode>3" | |
3209 | [(match_operand:SVE_F 0 "register_operand") | |
3210 | (match_operand:SVE_F 1 "register_operand") | |
3211 | (match_operand:SVE_F 2 "register_operand")] | |
43cacb12 RS |
3212 | "TARGET_SVE" |
3213 | { | |
915d28fe RS |
3214 | rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); |
3215 | rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode); | |
3216 | rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); | |
3217 | int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; | |
43cacb12 | 3218 | |
915d28fe RS |
3219 | rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); |
3220 | rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); | |
3221 | ||
3222 | emit_insn (gen_and<v_int_equiv>3 | |
3223 | (sign, arg2, | |
3224 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
3225 | HOST_WIDE_INT_M1U | |
3226 | << bits))); | |
3227 | emit_insn (gen_and<v_int_equiv>3 | |
3228 | (mant, arg1, | |
3229 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
3230 | ~(HOST_WIDE_INT_M1U | |
3231 | << bits)))); | |
3232 | emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant)); | |
3233 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); | |
3234 | DONE; | |
43cacb12 RS |
3235 | } |
3236 | ) | |
3237 | ||
915d28fe RS |
3238 | (define_expand "xorsign<mode>3" |
3239 | [(match_operand:SVE_F 0 "register_operand") | |
3240 | (match_operand:SVE_F 1 "register_operand") | |
3241 | (match_operand:SVE_F 2 "register_operand")] | |
43cacb12 RS |
3242 | "TARGET_SVE" |
3243 | { | |
915d28fe RS |
3244 | rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); |
3245 | rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); | |
3246 | int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; | |
3247 | ||
3248 | rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); | |
3249 | rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); | |
3250 | ||
3251 | emit_insn (gen_and<v_int_equiv>3 | |
3252 | (sign, arg2, | |
3253 | aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, | |
3254 | HOST_WIDE_INT_M1U | |
3255 | << bits))); | |
3256 | emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign)); | |
3257 | emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); | |
3258 | DONE; | |
43cacb12 RS |
3259 | } |
3260 | ) | |
3261 | ||
915d28fe RS |
3262 | ;; ------------------------------------------------------------------------- |
3263 | ;; ---- [FP] Maximum and minimum | |
3264 | ;; ------------------------------------------------------------------------- | |
3265 | ;; Includes: | |
915d28fe | 3266 | ;; - FMAXNM |
915d28fe RS |
3267 | ;; - FMINNM |
3268 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 3269 | |
0254ed79 RS |
3270 | ;; Unpredicated fmax/fmin (the libm functions). The optabs for the |
3271 | ;; smin/smax rtx codes are handled in the generic section above. | |
43cacb12 RS |
3272 | (define_expand "<maxmin_uns><mode>3" |
3273 | [(set (match_operand:SVE_F 0 "register_operand") | |
3274 | (unspec:SVE_F | |
3275 | [(match_dup 3) | |
c9c5a809 | 3276 | (const_int SVE_RELAXED_GP) |
214c42fa | 3277 | (match_operand:SVE_F 1 "register_operand") |
75079ddf | 3278 | (match_operand:SVE_F 2 "aarch64_sve_float_maxmin_operand")] |
214c42fa | 3279 | SVE_COND_FP_MAXMIN_PUBLIC))] |
43cacb12 RS |
3280 | "TARGET_SVE" |
3281 | { | |
16de3637 | 3282 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
3283 | } |
3284 | ) | |
3285 | ||
214c42fa RS |
3286 | ;; Predicated floating-point maximum/minimum. |
3287 | (define_insn "*<optab><mode>3" | |
75079ddf | 3288 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w, ?&w") |
43cacb12 | 3289 | (unspec:SVE_F |
75079ddf | 3290 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") |
c9c5a809 | 3291 | (match_operand:SI 4 "aarch64_sve_gp_strictness") |
75079ddf RS |
3292 | (match_operand:SVE_F 2 "register_operand" "%0, 0, w, w") |
3293 | (match_operand:SVE_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")] | |
214c42fa | 3294 | SVE_COND_FP_MAXMIN_PUBLIC))] |
43cacb12 | 3295 | "TARGET_SVE" |
a08acce8 | 3296 | "@ |
75079ddf | 3297 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 |
214c42fa | 3298 | <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> |
75079ddf | 3299 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 |
214c42fa | 3300 | movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" |
75079ddf | 3301 | [(set_attr "movprfx" "*,*,yes,yes")] |
43cacb12 RS |
3302 | ) |
3303 | ||
a19ba9e1 RS |
3304 | ;; Merging forms are handled through SVE_COND_FP_BINARY and |
3305 | ;; SVE_COND_FP_BINARY_I1. | |
915d28fe RS |
3306 | |
3307 | ;; ------------------------------------------------------------------------- | |
3308 | ;; ---- [PRED] Binary logical operations | |
3309 | ;; ------------------------------------------------------------------------- | |
3310 | ;; Includes: | |
3311 | ;; - AND | |
3312 | ;; - ANDS | |
3313 | ;; - EOR | |
3314 | ;; - EORS | |
3315 | ;; - ORR | |
3316 | ;; - ORRS | |
3317 | ;; ------------------------------------------------------------------------- | |
3318 | ||
3319 | ;; Predicate AND. We can reuse one of the inputs as the GP. | |
3320 | (define_insn "and<mode>3" | |
3321 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
3322 | (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") | |
3323 | (match_operand:PRED_ALL 2 "register_operand" "Upa")))] | |
9d4ac06e | 3324 | "TARGET_SVE" |
915d28fe | 3325 | "and\t%0.b, %1/z, %1.b, %2.b" |
a08acce8 | 3326 | ) |
9d4ac06e | 3327 | |
915d28fe RS |
3328 | ;; Unpredicated predicate EOR and ORR. |
3329 | (define_expand "<optab><mode>3" | |
3330 | [(set (match_operand:PRED_ALL 0 "register_operand") | |
3331 | (and:PRED_ALL | |
3332 | (LOGICAL_OR:PRED_ALL | |
3333 | (match_operand:PRED_ALL 1 "register_operand") | |
3334 | (match_operand:PRED_ALL 2 "register_operand")) | |
3335 | (match_dup 3)))] | |
6c4fd4a9 | 3336 | "TARGET_SVE" |
915d28fe RS |
3337 | { |
3338 | operands[3] = aarch64_ptrue_reg (<MODE>mode); | |
3339 | } | |
a08acce8 | 3340 | ) |
6c4fd4a9 | 3341 | |
915d28fe | 3342 | ;; Predicated predicate AND, EOR and ORR. |
34467289 | 3343 | (define_insn "@aarch64_pred_<optab><mode>_z" |
915d28fe RS |
3344 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
3345 | (and:PRED_ALL | |
3346 | (LOGICAL:PRED_ALL | |
3347 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
3348 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
3349 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
3350 | "TARGET_SVE" | |
3351 | "<logical>\t%0.b, %1/z, %2.b, %3.b" | |
3352 | ) | |
3353 | ||
3354 | ;; Perform a logical operation on operands 2 and 3, using operand 1 as | |
34467289 RS |
3355 | ;; the GP. Store the result in operand 0 and set the flags in the same |
3356 | ;; way as for PTEST. | |
915d28fe RS |
3357 | (define_insn "*<optab><mode>3_cc" |
3358 | [(set (reg:CC_NZC CC_REGNUM) | |
3359 | (unspec:CC_NZC | |
34467289 RS |
3360 | [(match_operand:VNx16BI 1 "register_operand" "Upa") |
3361 | (match_operand 4) | |
3362 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe RS |
3363 | (and:PRED_ALL |
3364 | (LOGICAL:PRED_ALL | |
3365 | (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
3366 | (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
34467289 RS |
3367 | (match_dup 4))] |
3368 | UNSPEC_PTEST)) | |
915d28fe RS |
3369 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
3370 | (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) | |
34467289 | 3371 | (match_dup 4)))] |
915d28fe RS |
3372 | "TARGET_SVE" |
3373 | "<logical>s\t%0.b, %1/z, %2.b, %3.b" | |
3374 | ) | |
3375 | ||
3376 | ;; ------------------------------------------------------------------------- | |
3377 | ;; ---- [PRED] Binary logical operations (inverted second input) | |
3378 | ;; ------------------------------------------------------------------------- | |
3379 | ;; Includes: | |
3380 | ;; - BIC | |
3381 | ;; - ORN | |
3382 | ;; ------------------------------------------------------------------------- | |
3383 | ||
3384 | ;; Predicated predicate BIC and ORN. | |
3385 | (define_insn "*<nlogical><mode>3" | |
3386 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
3387 | (and:PRED_ALL | |
3388 | (NLOGICAL:PRED_ALL | |
35d6c591 RS |
3389 | (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")) |
3390 | (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
915d28fe RS |
3391 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] |
3392 | "TARGET_SVE" | |
35d6c591 | 3393 | "<nlogical>\t%0.b, %1/z, %2.b, %3.b" |
915d28fe RS |
3394 | ) |
3395 | ||
3396 | ;; ------------------------------------------------------------------------- | |
3397 | ;; ---- [PRED] Binary logical operations (inverted result) | |
3398 | ;; ------------------------------------------------------------------------- | |
3399 | ;; Includes: | |
3400 | ;; - NAND | |
3401 | ;; - NOR | |
3402 | ;; ------------------------------------------------------------------------- | |
3403 | ||
3404 | ;; Predicated predicate NAND and NOR. | |
3405 | (define_insn "*<logical_nn><mode>3" | |
3406 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
3407 | (and:PRED_ALL | |
3408 | (NLOGICAL:PRED_ALL | |
3409 | (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
3410 | (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
3411 | (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
3412 | "TARGET_SVE" | |
3413 | "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" | |
3414 | ) | |
3415 | ||
3416 | ;; ========================================================================= | |
3417 | ;; == Ternary arithmetic | |
3418 | ;; ========================================================================= | |
3419 | ||
3420 | ;; ------------------------------------------------------------------------- | |
3421 | ;; ---- [INT] MLA and MAD | |
3422 | ;; ------------------------------------------------------------------------- | |
3423 | ;; Includes: | |
3424 | ;; - MAD | |
3425 | ;; - MLA | |
3426 | ;; ------------------------------------------------------------------------- | |
3427 | ||
b6c3aea1 RS |
3428 | ;; Unpredicated integer addition of product. |
3429 | (define_expand "fma<mode>4" | |
3430 | [(set (match_operand:SVE_I 0 "register_operand") | |
3431 | (plus:SVE_I | |
3432 | (unspec:SVE_I | |
3433 | [(match_dup 4) | |
3434 | (mult:SVE_I (match_operand:SVE_I 1 "register_operand") | |
3435 | (match_operand:SVE_I 2 "nonmemory_operand"))] | |
3436 | UNSPEC_PRED_X) | |
3437 | (match_operand:SVE_I 3 "register_operand")))] | |
3438 | "TARGET_SVE" | |
3439 | { | |
3440 | if (aarch64_prepare_sve_int_fma (operands, PLUS)) | |
3441 | DONE; | |
3442 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); | |
3443 | } | |
3444 | ) | |
3445 | ||
915d28fe | 3446 | ;; Predicated integer addition of product. |
b6c3aea1 | 3447 | (define_insn "*fma<mode>4" |
915d28fe RS |
3448 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
3449 | (plus:SVE_I | |
3450 | (unspec:SVE_I | |
3451 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
3452 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
3453 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
06308276 | 3454 | UNSPEC_PRED_X) |
915d28fe | 3455 | (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] |
a08acce8 RH |
3456 | "TARGET_SVE" |
3457 | "@ | |
915d28fe RS |
3458 | mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
3459 | mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3460 | movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
3461 | [(set_attr "movprfx" "*,*,yes")] | |
a08acce8 RH |
3462 | ) |
3463 | ||
b6c3aea1 RS |
3464 | ;; Predicated integer addition of product with merging. |
3465 | (define_expand "cond_fma<mode>" | |
3466 | [(set (match_operand:SVE_I 0 "register_operand") | |
3467 | (unspec:SVE_I | |
3468 | [(match_operand:<VPRED> 1 "register_operand") | |
3469 | (plus:SVE_I | |
3470 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand") | |
3471 | (match_operand:SVE_I 3 "general_operand")) | |
3472 | (match_operand:SVE_I 4 "register_operand")) | |
3473 | (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] | |
3474 | UNSPEC_SEL))] | |
3475 | "TARGET_SVE" | |
3476 | { | |
3477 | if (aarch64_prepare_sve_cond_int_fma (operands, PLUS)) | |
3478 | DONE; | |
3479 | /* Swap the multiplication operands if the fallback value is the | |
3480 | second of the two. */ | |
3481 | if (rtx_equal_p (operands[3], operands[5])) | |
3482 | std::swap (operands[2], operands[3]); | |
3483 | } | |
3484 | ) | |
3485 | ||
3486 | ;; Predicated integer addition of product, merging with the first input. | |
3487 | (define_insn "*cond_fma<mode>_2" | |
3488 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
3489 | (unspec:SVE_I | |
3490 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3491 | (plus:SVE_I | |
3492 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "0, w") | |
3493 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
3494 | (match_operand:SVE_I 4 "register_operand" "w, w")) | |
3495 | (match_dup 2)] | |
3496 | UNSPEC_SEL))] | |
3497 | "TARGET_SVE" | |
3498 | "@ | |
3499 | mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
3500 | movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
3501 | [(set_attr "movprfx" "*,yes")] | |
3502 | ) | |
3503 | ||
3504 | ;; Predicated integer addition of product, merging with the third input. | |
3505 | (define_insn "*cond_fma<mode>_4" | |
3506 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
3507 | (unspec:SVE_I | |
3508 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3509 | (plus:SVE_I | |
3510 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w") | |
3511 | (match_operand:SVE_I 3 "register_operand" "w, w")) | |
3512 | (match_operand:SVE_I 4 "register_operand" "0, w")) | |
3513 | (match_dup 4)] | |
3514 | UNSPEC_SEL))] | |
3515 | "TARGET_SVE" | |
3516 | "@ | |
3517 | mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3518 | movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
3519 | [(set_attr "movprfx" "*,yes")] | |
3520 | ) | |
3521 | ||
3522 | ;; Predicated integer addition of product, merging with an independent value. | |
3523 | (define_insn_and_rewrite "*cond_fma<mode>_any" | |
3524 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") | |
3525 | (unspec:SVE_I | |
3526 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
3527 | (plus:SVE_I | |
3528 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") | |
3529 | (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")) | |
3530 | (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")) | |
3531 | (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] | |
3532 | UNSPEC_SEL))] | |
3533 | "TARGET_SVE | |
3534 | && !rtx_equal_p (operands[2], operands[5]) | |
3535 | && !rtx_equal_p (operands[3], operands[5]) | |
3536 | && !rtx_equal_p (operands[4], operands[5])" | |
3537 | "@ | |
3538 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3539 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3540 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
3541 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> | |
3542 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3543 | #" | |
3544 | "&& reload_completed | |
3545 | && register_operand (operands[5], <MODE>mode) | |
3546 | && !rtx_equal_p (operands[0], operands[5])" | |
3547 | { | |
3548 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
3549 | operands[5], operands[1])); | |
3550 | operands[5] = operands[4] = operands[0]; | |
3551 | } | |
3552 | [(set_attr "movprfx" "yes")] | |
3553 | ) | |
3554 | ||
915d28fe RS |
3555 | ;; ------------------------------------------------------------------------- |
3556 | ;; ---- [INT] MLS and MSB | |
3557 | ;; ------------------------------------------------------------------------- | |
3558 | ;; Includes: | |
3559 | ;; - MLS | |
3560 | ;; - MSB | |
3561 | ;; ------------------------------------------------------------------------- | |
3562 | ||
b6c3aea1 RS |
3563 | ;; Unpredicated integer subtraction of product. |
3564 | (define_expand "fnma<mode>4" | |
3565 | [(set (match_operand:SVE_I 0 "register_operand") | |
3566 | (minus:SVE_I | |
3567 | (match_operand:SVE_I 3 "register_operand") | |
3568 | (unspec:SVE_I | |
3569 | [(match_dup 4) | |
3570 | (mult:SVE_I (match_operand:SVE_I 1 "register_operand") | |
3571 | (match_operand:SVE_I 2 "general_operand"))] | |
3572 | UNSPEC_PRED_X)))] | |
3573 | "TARGET_SVE" | |
3574 | { | |
3575 | if (aarch64_prepare_sve_int_fma (operands, MINUS)) | |
3576 | DONE; | |
3577 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); | |
3578 | } | |
3579 | ) | |
3580 | ||
915d28fe | 3581 | ;; Predicated integer subtraction of product. |
b6c3aea1 | 3582 | (define_insn "*fnma<mode>3" |
915d28fe RS |
3583 | [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") |
3584 | (minus:SVE_I | |
3585 | (match_operand:SVE_I 4 "register_operand" "w, 0, w") | |
3586 | (unspec:SVE_I | |
3587 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
3588 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
3589 | (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
06308276 | 3590 | UNSPEC_PRED_X)))] |
915d28fe RS |
3591 | "TARGET_SVE" |
3592 | "@ | |
3593 | msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
3594 | mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3595 | movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
3596 | [(set_attr "movprfx" "*,*,yes")] | |
3597 | ) | |
3598 | ||
b6c3aea1 RS |
3599 | ;; Predicated integer subtraction of product with merging. |
3600 | (define_expand "cond_fnma<mode>" | |
3601 | [(set (match_operand:SVE_I 0 "register_operand") | |
3602 | (unspec:SVE_I | |
3603 | [(match_operand:<VPRED> 1 "register_operand") | |
3604 | (minus:SVE_I | |
3605 | (match_operand:SVE_I 4 "register_operand") | |
3606 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand") | |
3607 | (match_operand:SVE_I 3 "general_operand"))) | |
3608 | (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] | |
3609 | UNSPEC_SEL))] | |
3610 | "TARGET_SVE" | |
3611 | { | |
3612 | if (aarch64_prepare_sve_cond_int_fma (operands, MINUS)) | |
3613 | DONE; | |
3614 | /* Swap the multiplication operands if the fallback value is the | |
3615 | second of the two. */ | |
3616 | if (rtx_equal_p (operands[3], operands[5])) | |
3617 | std::swap (operands[2], operands[3]); | |
3618 | } | |
3619 | ) | |
3620 | ||
3621 | ;; Predicated integer subtraction of product, merging with the first input. | |
3622 | (define_insn "*cond_fnma<mode>_2" | |
3623 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
3624 | (unspec:SVE_I | |
3625 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3626 | (minus:SVE_I | |
3627 | (match_operand:SVE_I 4 "register_operand" "w, w") | |
3628 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "0, w") | |
3629 | (match_operand:SVE_I 3 "register_operand" "w, w"))) | |
3630 | (match_dup 2)] | |
3631 | UNSPEC_SEL))] | |
3632 | "TARGET_SVE" | |
3633 | "@ | |
3634 | msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
3635 | movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
3636 | [(set_attr "movprfx" "*,yes")] | |
3637 | ) | |
3638 | ||
3639 | ;; Predicated integer subtraction of product, merging with the third input. | |
3640 | (define_insn "*cond_fnma<mode>_4" | |
3641 | [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
3642 | (unspec:SVE_I | |
3643 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
3644 | (minus:SVE_I | |
3645 | (match_operand:SVE_I 4 "register_operand" "0, w") | |
3646 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w") | |
3647 | (match_operand:SVE_I 3 "register_operand" "w, w"))) | |
3648 | (match_dup 4)] | |
3649 | UNSPEC_SEL))] | |
3650 | "TARGET_SVE" | |
3651 | "@ | |
3652 | mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3653 | movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
3654 | [(set_attr "movprfx" "*,yes")] | |
3655 | ) | |
3656 | ||
3657 | ;; Predicated integer subtraction of product, merging with an | |
3658 | ;; independent value. | |
3659 | (define_insn_and_rewrite "*cond_fnma<mode>_any" | |
3660 | [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") | |
3661 | (unspec:SVE_I | |
3662 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") | |
3663 | (minus:SVE_I | |
3664 | (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w") | |
3665 | (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") | |
3666 | (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))) | |
3667 | (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] | |
3668 | UNSPEC_SEL))] | |
3669 | "TARGET_SVE | |
3670 | && !rtx_equal_p (operands[2], operands[5]) | |
3671 | && !rtx_equal_p (operands[3], operands[5]) | |
3672 | && !rtx_equal_p (operands[4], operands[5])" | |
3673 | "@ | |
3674 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3675 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3676 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
3677 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> | |
3678 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3679 | #" | |
3680 | "&& reload_completed | |
3681 | && register_operand (operands[5], <MODE>mode) | |
3682 | && !rtx_equal_p (operands[0], operands[5])" | |
3683 | { | |
3684 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
3685 | operands[5], operands[1])); | |
3686 | operands[5] = operands[4] = operands[0]; | |
3687 | } | |
3688 | [(set_attr "movprfx" "yes")] | |
3689 | ) | |
3690 | ||
915d28fe RS |
3691 | ;; ------------------------------------------------------------------------- |
3692 | ;; ---- [INT] Dot product | |
3693 | ;; ------------------------------------------------------------------------- | |
3694 | ;; Includes: | |
3695 | ;; - SDOT | |
3696 | ;; - UDOT | |
3697 | ;; ------------------------------------------------------------------------- | |
3698 | ||
3699 | ;; Four-element integer dot-product with accumulation. | |
3700 | (define_insn "<sur>dot_prod<vsi2qi>" | |
a08acce8 | 3701 | [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") |
915d28fe RS |
3702 | (plus:SVE_SDI |
3703 | (unspec:SVE_SDI | |
3704 | [(match_operand:<VSI2QI> 1 "register_operand" "w, w") | |
3705 | (match_operand:<VSI2QI> 2 "register_operand" "w, w")] | |
3706 | DOTPROD) | |
3707 | (match_operand:SVE_SDI 3 "register_operand" "0, w")))] | |
a08acce8 RH |
3708 | "TARGET_SVE" |
3709 | "@ | |
915d28fe RS |
3710 | <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth> |
3711 | movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>" | |
a08acce8 RH |
3712 | [(set_attr "movprfx" "*,yes")] |
3713 | ) | |
3714 | ||
915d28fe RS |
3715 | ;; ------------------------------------------------------------------------- |
3716 | ;; ---- [INT] Sum of absolute differences | |
3717 | ;; ------------------------------------------------------------------------- | |
3718 | ;; The patterns in this section are synthetic. | |
3719 | ;; ------------------------------------------------------------------------- | |
3720 | ||
3721 | ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in | |
3722 | ;; operands 1 and 2. The sequence also has to perform a widening reduction of | |
3723 | ;; the difference into a vector and accumulate that into operand 3 before | |
3724 | ;; copying that into the result operand 0. | |
3725 | ;; Perform that with a sequence of: | |
3726 | ;; MOV ones.b, #1 | |
3727 | ;; [SU]ABD diff.b, p0/m, op1.b, op2.b | |
3728 | ;; MOVPRFX op0, op3 // If necessary | |
3729 | ;; UDOT op0.s, diff.b, ones.b | |
3730 | (define_expand "<sur>sad<vsi2qi>" | |
3731 | [(use (match_operand:SVE_SDI 0 "register_operand")) | |
3732 | (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand")) | |
3733 | (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL) | |
3734 | (use (match_operand:SVE_SDI 3 "register_operand"))] | |
3735 | "TARGET_SVE" | |
3736 | { | |
3737 | rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); | |
3738 | rtx diff = gen_reg_rtx (<VSI2QI>mode); | |
3739 | emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2])); | |
3740 | emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3])); | |
3741 | DONE; | |
3742 | } | |
3743 | ) | |
3744 | ||
3745 | ;; ------------------------------------------------------------------------- | |
3746 | ;; ---- [FP] General ternary arithmetic corresponding to unspecs | |
3747 | ;; ------------------------------------------------------------------------- | |
3748 | ;; Includes merging patterns for: | |
3749 | ;; - FMAD | |
3750 | ;; - FMLA | |
3751 | ;; - FMLS | |
3752 | ;; - FMSB | |
3753 | ;; - FNMAD | |
3754 | ;; - FNMLA | |
3755 | ;; - FNMLS | |
3756 | ;; - FNMSB | |
3757 | ;; ------------------------------------------------------------------------- | |
3758 | ||
0d80d083 RS |
3759 | ;; Unpredicated floating-point ternary operations. |
3760 | (define_expand "<optab><mode>4" | |
3761 | [(set (match_operand:SVE_F 0 "register_operand") | |
3762 | (unspec:SVE_F | |
3763 | [(match_dup 4) | |
c9c5a809 | 3764 | (const_int SVE_RELAXED_GP) |
0d80d083 RS |
3765 | (match_operand:SVE_F 1 "register_operand") |
3766 | (match_operand:SVE_F 2 "register_operand") | |
3767 | (match_operand:SVE_F 3 "register_operand")] | |
3768 | SVE_COND_FP_TERNARY))] | |
3769 | "TARGET_SVE" | |
3770 | { | |
3771 | operands[4] = aarch64_ptrue_reg (<VPRED>mode); | |
3772 | } | |
3773 | ) | |
3774 | ||
3775 | ;; Predicated floating-point ternary operations. | |
3776 | (define_insn "*<optab><mode>4" | |
3777 | [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
3778 | (unspec:SVE_F | |
3779 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
c9c5a809 | 3780 | (match_operand:SI 5 "aarch64_sve_gp_strictness") |
0d80d083 RS |
3781 | (match_operand:SVE_F 2 "register_operand" "%w, 0, w") |
3782 | (match_operand:SVE_F 3 "register_operand" "w, w, w") | |
3783 | (match_operand:SVE_F 4 "register_operand" "0, w, w")] | |
3784 | SVE_COND_FP_TERNARY))] | |
3785 | "TARGET_SVE" | |
3786 | "@ | |
3787 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
3788 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
3789 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
3790 | [(set_attr "movprfx" "*,*,yes")] | |
3791 | ) | |
3792 | ||
915d28fe RS |
3793 | ;; Predicated floating-point ternary operations with merging. |
3794 | (define_expand "cond_<optab><mode>" | |
3795 | [(set (match_operand:SVE_F 0 "register_operand") | |
3796 | (unspec:SVE_F | |
3797 | [(match_operand:<VPRED> 1 "register_operand") | |
3798 | (unspec:SVE_F | |
0d80d083 | 3799 | [(match_dup 1) |
c9c5a809 | 3800 | (const_int SVE_STRICT_GP) |
0d80d083 | 3801 | (match_operand:SVE_F 2 "register_operand") |
915d28fe RS |
3802 | (match_operand:SVE_F 3 "register_operand") |
3803 | (match_operand:SVE_F 4 "register_operand")] | |
3804 | SVE_COND_FP_TERNARY) | |
3805 | (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")] | |
3806 | UNSPEC_SEL))] | |
3807 | "TARGET_SVE" | |
3808 | { | |
3809 | /* Swap the multiplication operands if the fallback value is the | |
3810 | second of the two. */ | |
3811 | if (rtx_equal_p (operands[3], operands[5])) | |
3812 | std::swap (operands[2], operands[3]); | |
3813 | }) | |
3814 | ||
3815 | ;; Predicated floating-point ternary operations, merging with the | |
3816 | ;; first input. | |
c9c5a809 | 3817 | (define_insn_and_rewrite "*cond_<optab><mode>_2" |
915d28fe RS |
3818 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
3819 | (unspec:SVE_F | |
a08acce8 | 3820 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
915d28fe | 3821 | (unspec:SVE_F |
c9c5a809 RS |
3822 | [(match_operand 5) |
3823 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
0d80d083 | 3824 | (match_operand:SVE_F 2 "register_operand" "0, w") |
915d28fe RS |
3825 | (match_operand:SVE_F 3 "register_operand" "w, w") |
3826 | (match_operand:SVE_F 4 "register_operand" "w, w")] | |
3827 | SVE_COND_FP_TERNARY) | |
3828 | (match_dup 2)] | |
a08acce8 | 3829 | UNSPEC_SEL))] |
c9c5a809 | 3830 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" |
a08acce8 | 3831 | "@ |
915d28fe RS |
3832 | <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> |
3833 | movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
c9c5a809 RS |
3834 | "&& !rtx_equal_p (operands[1], operands[5])" |
3835 | { | |
3836 | operands[5] = copy_rtx (operands[1]); | |
3837 | } | |
a08acce8 RH |
3838 | [(set_attr "movprfx" "*,yes")] |
3839 | ) | |
3840 | ||
915d28fe RS |
3841 | ;; Predicated floating-point ternary operations, merging with the |
3842 | ;; third input. | |
c9c5a809 | 3843 | (define_insn_and_rewrite "*cond_<optab><mode>_4" |
915d28fe RS |
3844 | [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") |
3845 | (unspec:SVE_F | |
a08acce8 | 3846 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") |
915d28fe | 3847 | (unspec:SVE_F |
c9c5a809 RS |
3848 | [(match_operand 5) |
3849 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
0d80d083 | 3850 | (match_operand:SVE_F 2 "register_operand" "w, w") |
915d28fe RS |
3851 | (match_operand:SVE_F 3 "register_operand" "w, w") |
3852 | (match_operand:SVE_F 4 "register_operand" "0, w")] | |
3853 | SVE_COND_FP_TERNARY) | |
3854 | (match_dup 4)] | |
a08acce8 | 3855 | UNSPEC_SEL))] |
c9c5a809 | 3856 | "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" |
a08acce8 | 3857 | "@ |
915d28fe RS |
3858 | <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
3859 | movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
c9c5a809 RS |
3860 | "&& !rtx_equal_p (operands[1], operands[5])" |
3861 | { | |
3862 | operands[5] = copy_rtx (operands[1]); | |
3863 | } | |
a08acce8 RH |
3864 | [(set_attr "movprfx" "*,yes")] |
3865 | ) | |
3866 | ||
915d28fe RS |
3867 | ;; Predicated floating-point ternary operations, merging with an |
3868 | ;; independent value. | |
f4fde1b3 | 3869 | (define_insn_and_rewrite "*cond_<optab><mode>_any" |
432b29c1 | 3870 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") |
915d28fe | 3871 | (unspec:SVE_F |
432b29c1 | 3872 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") |
915d28fe | 3873 | (unspec:SVE_F |
c9c5a809 RS |
3874 | [(match_operand 6) |
3875 | (match_operand:SI 7 "aarch64_sve_gp_strictness") | |
432b29c1 RS |
3876 | (match_operand:SVE_F 2 "register_operand" "w, w, 0, w, w, w") |
3877 | (match_operand:SVE_F 3 "register_operand" "w, w, w, 0, w, w") | |
3878 | (match_operand:SVE_F 4 "register_operand" "w, 0, w, w, w, w")] | |
915d28fe | 3879 | SVE_COND_FP_TERNARY) |
432b29c1 | 3880 | (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] |
0d2b3bca | 3881 | UNSPEC_SEL))] |
f4fde1b3 | 3882 | "TARGET_SVE |
915d28fe RS |
3883 | && !rtx_equal_p (operands[2], operands[5]) |
3884 | && !rtx_equal_p (operands[3], operands[5]) | |
c9c5a809 RS |
3885 | && !rtx_equal_p (operands[4], operands[5]) |
3886 | && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" | |
32cf949c | 3887 | "@ |
915d28fe | 3888 | movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
432b29c1 RS |
3889 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
3890 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
3891 | movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> | |
915d28fe | 3892 | movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> |
32cf949c | 3893 | #" |
c9c5a809 | 3894 | "&& 1" |
f4fde1b3 | 3895 | { |
c9c5a809 RS |
3896 | if (reload_completed |
3897 | && register_operand (operands[5], <MODE>mode) | |
3898 | && !rtx_equal_p (operands[0], operands[5])) | |
3899 | { | |
3900 | emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], | |
3901 | operands[5], operands[1])); | |
3902 | operands[5] = operands[4] = operands[0]; | |
3903 | } | |
3904 | else if (!rtx_equal_p (operands[1], operands[6])) | |
3905 | operands[6] = copy_rtx (operands[1]); | |
3906 | else | |
3907 | FAIL; | |
f4fde1b3 | 3908 | } |
32cf949c | 3909 | [(set_attr "movprfx" "yes")] |
0d2b3bca RS |
3910 | ) |
3911 | ||
915d28fe RS |
3912 | ;; ========================================================================= |
3913 | ;; == Comparisons and selects | |
3914 | ;; ========================================================================= | |
3915 | ||
3916 | ;; ------------------------------------------------------------------------- | |
3917 | ;; ---- [INT,FP] Select based on predicates | |
3918 | ;; ------------------------------------------------------------------------- | |
3919 | ;; Includes merging patterns for: | |
d29f7dd5 | 3920 | ;; - FMOV |
915d28fe RS |
3921 | ;; - MOV |
3922 | ;; - SEL | |
3923 | ;; ------------------------------------------------------------------------- | |
3924 | ||
3925 | ;; vcond_mask operand order: true, false, mask | |
3926 | ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) | |
3927 | ;; SEL operand order: mask, true, false | |
d29f7dd5 RS |
3928 | (define_expand "vcond_mask_<mode><vpred>" |
3929 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
915d28fe | 3930 | (unspec:SVE_ALL |
d29f7dd5 RS |
3931 | [(match_operand:<VPRED> 3 "register_operand") |
3932 | (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") | |
3933 | (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] | |
915d28fe RS |
3934 | UNSPEC_SEL))] |
3935 | "TARGET_SVE" | |
d29f7dd5 RS |
3936 | { |
3937 | if (register_operand (operands[1], <MODE>mode)) | |
3938 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
3939 | } | |
915d28fe RS |
3940 | ) |
3941 | ||
d29f7dd5 RS |
3942 | ;; Selects between: |
3943 | ;; - two registers | |
3944 | ;; - a duplicated immediate and a register | |
3945 | ;; - a duplicated immediate and zero | |
3946 | (define_insn "*vcond_mask_<mode><vpred>" | |
3947 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w") | |
3948 | (unspec:SVE_ALL | |
3949 | [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl") | |
3950 | (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc") | |
3951 | (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")] | |
915d28fe | 3952 | UNSPEC_SEL))] |
d29f7dd5 RS |
3953 | "TARGET_SVE |
3954 | && (!register_operand (operands[1], <MODE>mode) | |
3955 | || register_operand (operands[2], <MODE>mode))" | |
3956 | "@ | |
3957 | sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype> | |
3958 | mov\t%0.<Vetype>, %3/m, #%I1 | |
3959 | mov\t%0.<Vetype>, %3/z, #%I1 | |
3960 | fmov\t%0.<Vetype>, %3/m, #%1 | |
3961 | movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1 | |
3962 | movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1 | |
3963 | movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1" | |
3964 | [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] | |
43cacb12 RS |
3965 | ) |
3966 | ||
88a37c4d RS |
3967 | ;; Optimize selects between a duplicated scalar variable and another vector, |
3968 | ;; the latter of which can be a zero constant or a variable. Treat duplicates | |
3969 | ;; of GPRs as being more expensive than duplicates of FPRs, since they | |
3970 | ;; involve a cross-file move. | |
3971 | (define_insn "*aarch64_sel_dup<mode>" | |
3972 | [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w") | |
3973 | (unspec:SVE_ALL | |
3974 | [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl") | |
3975 | (vec_duplicate:SVE_ALL | |
3976 | (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w")) | |
3977 | (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")] | |
3978 | UNSPEC_SEL))] | |
3979 | "TARGET_SVE" | |
3980 | "@ | |
3981 | mov\t%0.<Vetype>, %3/m, %<vwcore>1 | |
3982 | mov\t%0.<Vetype>, %3/m, %<Vetype>1 | |
3983 | movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1 | |
3984 | movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1 | |
3985 | movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1 | |
3986 | movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1" | |
3987 | [(set_attr "movprfx" "*,*,yes,yes,yes,yes")] | |
3988 | ) | |
3989 | ||
915d28fe RS |
3990 | ;; ------------------------------------------------------------------------- |
3991 | ;; ---- [INT,FP] Compare and select | |
3992 | ;; ------------------------------------------------------------------------- | |
3993 | ;; The patterns in this section are synthetic. | |
3994 | ;; ------------------------------------------------------------------------- | |
43cacb12 | 3995 | |
915d28fe RS |
3996 | ;; Integer (signed) vcond. Don't enforce an immediate range here, since it |
3997 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
3998 | (define_expand "vcond<mode><v_int_equiv>" | |
3999 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
4000 | (if_then_else:SVE_ALL | |
4001 | (match_operator 3 "comparison_operator" | |
4002 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
4003 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
d29f7dd5 RS |
4004 | (match_operand:SVE_ALL 1 "nonmemory_operand") |
4005 | (match_operand:SVE_ALL 2 "nonmemory_operand")))] | |
898f07b0 RS |
4006 | "TARGET_SVE" |
4007 | { | |
915d28fe RS |
4008 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); |
4009 | DONE; | |
898f07b0 RS |
4010 | } |
4011 | ) | |
4012 | ||
915d28fe RS |
4013 | ;; Integer vcondu. Don't enforce an immediate range here, since it |
4014 | ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
4015 | (define_expand "vcondu<mode><v_int_equiv>" | |
4016 | [(set (match_operand:SVE_ALL 0 "register_operand") | |
4017 | (if_then_else:SVE_ALL | |
4018 | (match_operator 3 "comparison_operator" | |
4019 | [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
4020 | (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
d29f7dd5 RS |
4021 | (match_operand:SVE_ALL 1 "nonmemory_operand") |
4022 | (match_operand:SVE_ALL 2 "nonmemory_operand")))] | |
898f07b0 | 4023 | "TARGET_SVE" |
915d28fe RS |
4024 | { |
4025 | aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
4026 | DONE; | |
4027 | } | |
898f07b0 RS |
4028 | ) |
4029 | ||
915d28fe RS |
4030 | ;; Floating-point vcond. All comparisons except FCMUO allow a zero operand; |
4031 | ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero. | |
4032 | (define_expand "vcond<mode><v_fp_equiv>" | |
a70965b1 RS |
4033 | [(set (match_operand:SVE_HSD 0 "register_operand") |
4034 | (if_then_else:SVE_HSD | |
915d28fe RS |
4035 | (match_operator 3 "comparison_operator" |
4036 | [(match_operand:<V_FP_EQUIV> 4 "register_operand") | |
4037 | (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) | |
d29f7dd5 RS |
4038 | (match_operand:SVE_HSD 1 "nonmemory_operand") |
4039 | (match_operand:SVE_HSD 2 "nonmemory_operand")))] | |
b781a135 RS |
4040 | "TARGET_SVE" |
4041 | { | |
915d28fe RS |
4042 | aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); |
4043 | DONE; | |
b781a135 RS |
4044 | } |
4045 | ) | |
4046 | ||
915d28fe RS |
4047 | ;; ------------------------------------------------------------------------- |
4048 | ;; ---- [INT] Comparisons | |
4049 | ;; ------------------------------------------------------------------------- | |
4050 | ;; Includes merging patterns for: | |
4051 | ;; - CMPEQ | |
4052 | ;; - CMPGE | |
4053 | ;; - CMPGT | |
4054 | ;; - CMPHI | |
4055 | ;; - CMPHS | |
4056 | ;; - CMPLE | |
4057 | ;; - CMPLO | |
4058 | ;; - CMPLS | |
4059 | ;; - CMPLT | |
4060 | ;; - CMPNE | |
4061 | ;; ------------------------------------------------------------------------- | |
b781a135 | 4062 | |
915d28fe RS |
4063 | ;; Signed integer comparisons. Don't enforce an immediate range here, since |
4064 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
4065 | ;; instead. | |
4066 | (define_expand "vec_cmp<mode><vpred>" | |
4067 | [(parallel | |
4068 | [(set (match_operand:<VPRED> 0 "register_operand") | |
4069 | (match_operator:<VPRED> 1 "comparison_operator" | |
4070 | [(match_operand:SVE_I 2 "register_operand") | |
4071 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
4072 | (clobber (reg:CC_NZC CC_REGNUM))])] | |
b781a135 | 4073 | "TARGET_SVE" |
915d28fe RS |
4074 | { |
4075 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
4076 | operands[2], operands[3]); | |
4077 | DONE; | |
4078 | } | |
b781a135 RS |
4079 | ) |
4080 | ||
915d28fe RS |
4081 | ;; Unsigned integer comparisons. Don't enforce an immediate range here, since |
4082 | ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
4083 | ;; instead. | |
4084 | (define_expand "vec_cmpu<mode><vpred>" | |
4085 | [(parallel | |
4086 | [(set (match_operand:<VPRED> 0 "register_operand") | |
4087 | (match_operator:<VPRED> 1 "comparison_operator" | |
4088 | [(match_operand:SVE_I 2 "register_operand") | |
4089 | (match_operand:SVE_I 3 "nonmemory_operand")])) | |
4090 | (clobber (reg:CC_NZC CC_REGNUM))])] | |
43cacb12 RS |
4091 | "TARGET_SVE" |
4092 | { | |
915d28fe RS |
4093 | aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), |
4094 | operands[2], operands[3]); | |
4095 | DONE; | |
43cacb12 RS |
4096 | } |
4097 | ) | |
4098 | ||
00fa90d9 RS |
4099 | ;; Predicated integer comparisons. |
4100 | (define_insn "@aarch64_pred_cmp<cmp_op><mode>" | |
915d28fe RS |
4101 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
4102 | (unspec:<VPRED> | |
4103 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
00fa90d9 | 4104 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") |
915d28fe | 4105 | (SVE_INT_CMP:<VPRED> |
00fa90d9 RS |
4106 | (match_operand:SVE_I 3 "register_operand" "w, w") |
4107 | (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
4108 | UNSPEC_PRED_Z)) | |
915d28fe | 4109 | (clobber (reg:CC_NZC CC_REGNUM))] |
43cacb12 RS |
4110 | "TARGET_SVE" |
4111 | "@ | |
00fa90d9 RS |
4112 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4 |
4113 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" | |
43cacb12 RS |
4114 | ) |
4115 | ||
00fa90d9 RS |
4116 | ;; Predicated integer comparisons in which both the flag and predicate |
4117 | ;; results are interesting. | |
4118 | (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc" | |
915d28fe RS |
4119 | [(set (reg:CC_NZC CC_REGNUM) |
4120 | (unspec:CC_NZC | |
34467289 RS |
4121 | [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") |
4122 | (match_operand 4) | |
4123 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe | 4124 | (unspec:<VPRED> |
00fa90d9 RS |
4125 | [(match_operand 6) |
4126 | (match_operand:SI 7 "aarch64_sve_ptrue_flag") | |
915d28fe RS |
4127 | (SVE_INT_CMP:<VPRED> |
4128 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
4129 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
00fa90d9 | 4130 | UNSPEC_PRED_Z)] |
34467289 | 4131 | UNSPEC_PTEST)) |
915d28fe RS |
4132 | (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
4133 | (unspec:<VPRED> | |
00fa90d9 RS |
4134 | [(match_dup 6) |
4135 | (match_dup 7) | |
915d28fe RS |
4136 | (SVE_INT_CMP:<VPRED> |
4137 | (match_dup 2) | |
4138 | (match_dup 3))] | |
00fa90d9 RS |
4139 | UNSPEC_PRED_Z))] |
4140 | "TARGET_SVE | |
4141 | && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" | |
915d28fe RS |
4142 | "@ |
4143 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
4144 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
00fa90d9 RS |
4145 | "&& !rtx_equal_p (operands[4], operands[6])" |
4146 | { | |
4147 | operands[6] = copy_rtx (operands[4]); | |
4148 | operands[7] = operands[5]; | |
4149 | } | |
43cacb12 RS |
4150 | ) |
4151 | ||
00fa90d9 RS |
4152 | ;; Predicated integer comparisons in which only the flags result is |
4153 | ;; interesting. | |
4154 | (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" | |
915d28fe RS |
4155 | [(set (reg:CC_NZC CC_REGNUM) |
4156 | (unspec:CC_NZC | |
34467289 RS |
4157 | [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") |
4158 | (match_operand 4) | |
4159 | (match_operand:SI 5 "aarch64_sve_ptrue_flag") | |
915d28fe | 4160 | (unspec:<VPRED> |
00fa90d9 RS |
4161 | [(match_operand 6) |
4162 | (match_operand:SI 7 "aarch64_sve_ptrue_flag") | |
915d28fe RS |
4163 | (SVE_INT_CMP:<VPRED> |
4164 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
4165 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
00fa90d9 | 4166 | UNSPEC_PRED_Z)] |
34467289 | 4167 | UNSPEC_PTEST)) |
915d28fe | 4168 | (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] |
00fa90d9 RS |
4169 | "TARGET_SVE |
4170 | && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" | |
43cacb12 | 4171 | "@ |
915d28fe RS |
4172 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 |
4173 | cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
00fa90d9 RS |
4174 | "&& !rtx_equal_p (operands[4], operands[6])" |
4175 | { | |
4176 | operands[6] = copy_rtx (operands[4]); | |
4177 | operands[7] = operands[5]; | |
4178 | } | |
43cacb12 RS |
4179 | ) |
4180 | ||
915d28fe RS |
4181 | ;; Predicated integer comparisons, formed by combining a PTRUE-predicated |
4182 | ;; comparison with an AND. Split the instruction into its preferred form | |
00fa90d9 RS |
4183 | ;; at the earliest opportunity, in order to get rid of the redundant |
4184 | ;; operand 4. | |
4185 | (define_insn_and_split "*cmp<cmp_op><mode>_and" | |
915d28fe | 4186 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") |
00fa90d9 RS |
4187 | (and:<VPRED> |
4188 | (unspec:<VPRED> | |
4189 | [(match_operand 4) | |
4190 | (const_int SVE_KNOWN_PTRUE) | |
4191 | (SVE_INT_CMP:<VPRED> | |
4192 | (match_operand:SVE_I 2 "register_operand" "w, w") | |
4193 | (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
4194 | UNSPEC_PRED_Z) | |
4195 | (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) | |
915d28fe RS |
4196 | (clobber (reg:CC_NZC CC_REGNUM))] |
4197 | "TARGET_SVE" | |
4198 | "#" | |
4199 | "&& 1" | |
4200 | [(parallel | |
4201 | [(set (match_dup 0) | |
00fa90d9 RS |
4202 | (unspec:<VPRED> |
4203 | [(match_dup 1) | |
4204 | (const_int SVE_MAYBE_NOT_PTRUE) | |
4205 | (SVE_INT_CMP:<VPRED> | |
4206 | (match_dup 2) | |
4207 | (match_dup 3))] | |
4208 | UNSPEC_PRED_Z)) | |
915d28fe | 4209 | (clobber (reg:CC_NZC CC_REGNUM))])] |
43cacb12 RS |
4210 | ) |
4211 | ||
915d28fe RS |
4212 | ;; ------------------------------------------------------------------------- |
4213 | ;; ---- [INT] While tests | |
4214 | ;; ------------------------------------------------------------------------- | |
4215 | ;; Includes: | |
4216 | ;; - WHILELO | |
4217 | ;; ------------------------------------------------------------------------- | |
740c1ed7 | 4218 | |
915d28fe RS |
4219 | ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I], |
4220 | ;; with the comparison being unsigned. | |
0b1fe8cf | 4221 | (define_insn "@while_ult<GPI:mode><PRED_ALL:mode>" |
915d28fe RS |
4222 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
4223 | (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
4224 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
4225 | UNSPEC_WHILE_LO)) | |
4226 | (clobber (reg:CC_NZC CC_REGNUM))] | |
43cacb12 | 4227 | "TARGET_SVE" |
915d28fe | 4228 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" |
43cacb12 RS |
4229 | ) |
4230 | ||
915d28fe | 4231 | ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. |
34467289 RS |
4232 | ;; Handle the case in which both results are useful. The GP operands |
4233 | ;; to the PTEST aren't needed, so we allow them to be anything. | |
915d28fe RS |
4234 | (define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc" |
4235 | [(set (reg:CC_NZC CC_REGNUM) | |
4236 | (unspec:CC_NZC | |
34467289 RS |
4237 | [(match_operand 3) |
4238 | (match_operand 4) | |
4239 | (const_int SVE_KNOWN_PTRUE) | |
915d28fe | 4240 | (unspec:PRED_ALL |
34467289 RS |
4241 | [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") |
4242 | (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
915d28fe | 4243 | UNSPEC_WHILE_LO)] |
34467289 | 4244 | UNSPEC_PTEST)) |
915d28fe | 4245 | (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
34467289 RS |
4246 | (unspec:PRED_ALL [(match_dup 1) |
4247 | (match_dup 2)] | |
915d28fe | 4248 | UNSPEC_WHILE_LO))] |
43cacb12 | 4249 | "TARGET_SVE" |
34467289 | 4250 | "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" |
915d28fe RS |
4251 | ;; Force the compiler to drop the unused predicate operand, so that we |
4252 | ;; don't have an unnecessary PTRUE. | |
34467289 | 4253 | "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" |
915d28fe | 4254 | { |
34467289 RS |
4255 | operands[3] = CONSTM1_RTX (VNx16BImode); |
4256 | operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); | |
915d28fe | 4257 | } |
43cacb12 RS |
4258 | ) |
4259 | ||
915d28fe | 4260 | ;; ------------------------------------------------------------------------- |
42b4e87d | 4261 | ;; ---- [FP] Direct comparisons |
915d28fe RS |
4262 | ;; ------------------------------------------------------------------------- |
4263 | ;; Includes: | |
4264 | ;; - FCMEQ | |
4265 | ;; - FCMGE | |
4266 | ;; - FCMGT | |
4267 | ;; - FCMLE | |
4268 | ;; - FCMLT | |
4269 | ;; - FCMNE | |
4270 | ;; - FCMUO | |
4271 | ;; ------------------------------------------------------------------------- | |
4272 | ||
4273 | ;; Floating-point comparisons. All comparisons except FCMUO allow a zero | |
4274 | ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO | |
4275 | ;; with zero. | |
4276 | (define_expand "vec_cmp<mode><vpred>" | |
4277 | [(set (match_operand:<VPRED> 0 "register_operand") | |
4278 | (match_operator:<VPRED> 1 "comparison_operator" | |
4279 | [(match_operand:SVE_F 2 "register_operand") | |
4280 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] | |
43cacb12 RS |
4281 | "TARGET_SVE" |
4282 | { | |
915d28fe RS |
4283 | aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), |
4284 | operands[2], operands[3], false); | |
4285 | DONE; | |
43cacb12 RS |
4286 | } |
4287 | ) | |
4288 | ||
4a942af6 | 4289 | ;; Predicated floating-point comparisons. |
915d28fe RS |
4290 | (define_insn "*fcm<cmp_op><mode>" |
4291 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
4292 | (unspec:<VPRED> | |
4293 | [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
4a942af6 RS |
4294 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") |
4295 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
4296 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] | |
4297 | SVE_COND_FP_CMP_I0))] | |
43cacb12 RS |
4298 | "TARGET_SVE" |
4299 | "@ | |
915d28fe RS |
4300 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 |
4301 | fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
43cacb12 RS |
4302 | ) |
4303 | ||
915d28fe RS |
4304 | ;; Same for unordered comparisons. |
4305 | (define_insn "*fcmuo<mode>" | |
4306 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
4307 | (unspec:<VPRED> | |
4308 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
4a942af6 RS |
4309 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") |
4310 | (match_operand:SVE_F 2 "register_operand" "w") | |
4311 | (match_operand:SVE_F 3 "register_operand" "w")] | |
4312 | UNSPEC_COND_FCMUO))] | |
43cacb12 | 4313 | "TARGET_SVE" |
915d28fe | 4314 | "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" |
43cacb12 RS |
4315 | ) |
4316 | ||
915d28fe RS |
4317 | ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed |
4318 | ;; with another predicate P. This does not have the same trapping behavior | |
4319 | ;; as predicating the comparison itself on P, but it's a legitimate fold, | |
4320 | ;; since we can drop any potentially-trapping operations whose results | |
4321 | ;; are not needed. | |
4322 | ;; | |
4323 | ;; Split the instruction into its preferred form (below) at the earliest | |
4324 | ;; opportunity, in order to get rid of the redundant operand 1. | |
4325 | (define_insn_and_split "*fcm<cmp_op><mode>_and_combine" | |
4326 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
4327 | (and:<VPRED> | |
4328 | (unspec:<VPRED> | |
4329 | [(match_operand:<VPRED> 1) | |
4a942af6 RS |
4330 | (const_int SVE_KNOWN_PTRUE) |
4331 | (match_operand:SVE_F 2 "register_operand" "w, w") | |
4332 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] | |
4333 | SVE_COND_FP_CMP_I0) | |
915d28fe | 4334 | (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] |
43cacb12 | 4335 | "TARGET_SVE" |
915d28fe RS |
4336 | "#" |
4337 | "&& 1" | |
4338 | [(set (match_dup 0) | |
4a942af6 RS |
4339 | (unspec:<VPRED> |
4340 | [(match_dup 4) | |
4341 | (const_int SVE_MAYBE_NOT_PTRUE) | |
4342 | (match_dup 2) | |
4343 | (match_dup 3)] | |
4344 | SVE_COND_FP_CMP_I0))] | |
43cacb12 RS |
4345 | ) |
4346 | ||
915d28fe RS |
4347 | ;; Same for unordered comparisons. |
4348 | (define_insn_and_split "*fcmuo<mode>_and_combine" | |
4349 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
4350 | (and:<VPRED> | |
4351 | (unspec:<VPRED> | |
4352 | [(match_operand:<VPRED> 1) | |
4a942af6 RS |
4353 | (const_int SVE_KNOWN_PTRUE) |
4354 | (match_operand:SVE_F 2 "register_operand" "w") | |
4355 | (match_operand:SVE_F 3 "register_operand" "w")] | |
4356 | UNSPEC_COND_FCMUO) | |
915d28fe | 4357 | (match_operand:<VPRED> 4 "register_operand" "Upl")))] |
43cacb12 | 4358 | "TARGET_SVE" |
915d28fe RS |
4359 | "#" |
4360 | "&& 1" | |
4361 | [(set (match_dup 0) | |
915d28fe | 4362 | (unspec:<VPRED> |
4a942af6 RS |
4363 | [(match_dup 4) |
4364 | (const_int SVE_MAYBE_NOT_PTRUE) | |
4365 | (match_dup 2) | |
4366 | (match_dup 3)] | |
4367 | UNSPEC_COND_FCMUO))] | |
43cacb12 RS |
4368 | ) |
4369 | ||
42b4e87d RS |
4370 | ;; ------------------------------------------------------------------------- |
4371 | ;; ---- [FP] Absolute comparisons | |
4372 | ;; ------------------------------------------------------------------------- | |
4373 | ;; Includes: | |
4374 | ;; - FACGE | |
4375 | ;; - FACGT | |
4376 | ;; - FACLE | |
4377 | ;; - FACLT | |
4378 | ;; ------------------------------------------------------------------------- | |
4379 | ||
4380 | ;; Predicated floating-point absolute comparisons. | |
4381 | (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>" | |
4382 | [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
4383 | (unspec:<VPRED> | |
4384 | [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
4385 | (match_operand:SI 4 "aarch64_sve_ptrue_flag") | |
4386 | (unspec:SVE_F | |
4387 | [(match_operand 5) | |
4388 | (match_operand:SI 6 "aarch64_sve_gp_strictness") | |
4389 | (match_operand:SVE_F 2 "register_operand" "w")] | |
4390 | UNSPEC_COND_FABS) | |
4391 | (unspec:SVE_F | |
4392 | [(match_operand 7) | |
4393 | (match_operand:SI 8 "aarch64_sve_gp_strictness") | |
4394 | (match_operand:SVE_F 3 "register_operand" "w")] | |
4395 | UNSPEC_COND_FABS)] | |
4396 | SVE_COND_FP_ABS_CMP))] | |
4397 | "TARGET_SVE | |
4398 | && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) | |
4399 | && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" | |
4400 | "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
4401 | "&& (!rtx_equal_p (operands[1], operands[5]) | |
4402 | || !rtx_equal_p (operands[1], operands[7]))" | |
4403 | { | |
4404 | operands[5] = copy_rtx (operands[1]); | |
4405 | operands[7] = copy_rtx (operands[1]); | |
4406 | } | |
4407 | ) | |
4408 | ||
915d28fe RS |
4409 | ;; ------------------------------------------------------------------------- |
4410 | ;; ---- [PRED] Test bits | |
4411 | ;; ------------------------------------------------------------------------- | |
4412 | ;; Includes: | |
4413 | ;; - PTEST | |
4414 | ;; ------------------------------------------------------------------------- | |
4415 | ||
4416 | ;; Branch based on predicate equality or inequality. | |
4417 | (define_expand "cbranch<mode>4" | |
4418 | [(set (pc) | |
4419 | (if_then_else | |
4420 | (match_operator 0 "aarch64_equality_operator" | |
4421 | [(match_operand:PRED_ALL 1 "register_operand") | |
4422 | (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) | |
4423 | (label_ref (match_operand 3 "")) | |
4424 | (pc)))] | |
4425 | "" | |
43cacb12 | 4426 | { |
34467289 RS |
4427 | rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>)); |
4428 | rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue); | |
4429 | rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); | |
915d28fe RS |
4430 | rtx pred; |
4431 | if (operands[2] == CONST0_RTX (<MODE>mode)) | |
4432 | pred = operands[1]; | |
4433 | else | |
4434 | { | |
4435 | pred = gen_reg_rtx (<MODE>mode); | |
34467289 RS |
4436 | emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1], |
4437 | operands[2])); | |
915d28fe | 4438 | } |
34467289 | 4439 | emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred)); |
915d28fe RS |
4440 | operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); |
4441 | operands[2] = const0_rtx; | |
43cacb12 RS |
4442 | } |
4443 | ) | |
4444 | ||
34467289 RS |
4445 | ;; See "Description of UNSPEC_PTEST" above for details. |
4446 | (define_insn "aarch64_ptest<mode>" | |
915d28fe | 4447 | [(set (reg:CC_NZC CC_REGNUM) |
34467289 RS |
4448 | (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") |
4449 | (match_operand 1) | |
4450 | (match_operand:SI 2 "aarch64_sve_ptrue_flag") | |
4451 | (match_operand:PRED_ALL 3 "register_operand" "Upa")] | |
4452 | UNSPEC_PTEST))] | |
43cacb12 | 4453 | "TARGET_SVE" |
34467289 | 4454 | "ptest\t%0, %3.b" |
43cacb12 RS |
4455 | ) |
4456 | ||
915d28fe RS |
4457 | ;; ========================================================================= |
4458 | ;; == Reductions | |
4459 | ;; ========================================================================= | |
4460 | ||
4461 | ;; ------------------------------------------------------------------------- | |
4462 | ;; ---- [INT,FP] Conditional reductions | |
4463 | ;; ------------------------------------------------------------------------- | |
4464 | ;; Includes: | |
4465 | ;; - CLASTB | |
4466 | ;; ------------------------------------------------------------------------- | |
4467 | ||
4468 | ;; Set operand 0 to the last active element in operand 3, or to tied | |
4469 | ;; operand 1 if no elements are active. | |
4470 | (define_insn "fold_extract_last_<mode>" | |
801790b3 | 4471 | [(set (match_operand:<VEL> 0 "register_operand" "=?r, w") |
915d28fe RS |
4472 | (unspec:<VEL> |
4473 | [(match_operand:<VEL> 1 "register_operand" "0, 0") | |
4474 | (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") | |
4475 | (match_operand:SVE_ALL 3 "register_operand" "w, w")] | |
4476 | UNSPEC_CLASTB))] | |
3db85990 | 4477 | "TARGET_SVE" |
915d28fe RS |
4478 | "@ |
4479 | clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> | |
801790b3 | 4480 | clastb\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" |
3db85990 ST |
4481 | ) |
4482 | ||
915d28fe RS |
4483 | ;; ------------------------------------------------------------------------- |
4484 | ;; ---- [INT] Tree reductions | |
4485 | ;; ------------------------------------------------------------------------- | |
4486 | ;; Includes: | |
4487 | ;; - ANDV | |
4488 | ;; - EORV | |
4489 | ;; - ORV | |
4490 | ;; - SMAXV | |
4491 | ;; - SMINV | |
4492 | ;; - UADDV | |
4493 | ;; - UMAXV | |
4494 | ;; - UMINV | |
4495 | ;; ------------------------------------------------------------------------- | |
4496 | ||
4497 | ;; Unpredicated integer add reduction. | |
4498 | (define_expand "reduc_plus_scal_<mode>" | |
4499 | [(set (match_operand:<VEL> 0 "register_operand") | |
4500 | (unspec:<VEL> [(match_dup 2) | |
4501 | (match_operand:SVE_I 1 "register_operand")] | |
4502 | UNSPEC_ADDV))] | |
43cacb12 RS |
4503 | "TARGET_SVE" |
4504 | { | |
16de3637 | 4505 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
43cacb12 RS |
4506 | } |
4507 | ) | |
4508 | ||
915d28fe RS |
4509 | ;; Predicated integer add reduction. The result is always 64-bits. |
4510 | (define_insn "*reduc_plus_scal_<mode>" | |
4511 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
4512 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
4513 | (match_operand:SVE_I 2 "register_operand" "w")] | |
4514 | UNSPEC_ADDV))] | |
43cacb12 | 4515 | "TARGET_SVE" |
915d28fe | 4516 | "uaddv\t%d0, %1, %2.<Vetype>" |
43cacb12 RS |
4517 | ) |
4518 | ||
b0760a40 | 4519 | ;; Unpredicated integer reductions. |
915d28fe RS |
4520 | (define_expand "reduc_<optab>_scal_<mode>" |
4521 | [(set (match_operand:<VEL> 0 "register_operand") | |
4522 | (unspec:<VEL> [(match_dup 2) | |
4523 | (match_operand:SVE_I 1 "register_operand")] | |
b0760a40 | 4524 | SVE_INT_REDUCTION))] |
43cacb12 | 4525 | "TARGET_SVE" |
915d28fe RS |
4526 | { |
4527 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
4528 | } | |
43cacb12 RS |
4529 | ) |
4530 | ||
b0760a40 | 4531 | ;; Predicated integer reductions. |
915d28fe RS |
4532 | (define_insn "*reduc_<optab>_scal_<mode>" |
4533 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
4534 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
4535 | (match_operand:SVE_I 2 "register_operand" "w")] | |
b0760a40 | 4536 | SVE_INT_REDUCTION))] |
43cacb12 | 4537 | "TARGET_SVE" |
b0760a40 | 4538 | "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>" |
43cacb12 RS |
4539 | ) |
4540 | ||
915d28fe RS |
4541 | ;; ------------------------------------------------------------------------- |
4542 | ;; ---- [FP] Tree reductions | |
4543 | ;; ------------------------------------------------------------------------- | |
4544 | ;; Includes: | |
4545 | ;; - FADDV | |
4546 | ;; - FMAXNMV | |
4547 | ;; - FMAXV | |
4548 | ;; - FMINNMV | |
4549 | ;; - FMINV | |
4550 | ;; ------------------------------------------------------------------------- | |
4551 | ||
b0760a40 RS |
4552 | ;; Unpredicated floating-point tree reductions. |
4553 | (define_expand "reduc_<optab>_scal_<mode>" | |
915d28fe RS |
4554 | [(set (match_operand:<VEL> 0 "register_operand") |
4555 | (unspec:<VEL> [(match_dup 2) | |
4556 | (match_operand:SVE_F 1 "register_operand")] | |
b0760a40 | 4557 | SVE_FP_REDUCTION))] |
43cacb12 | 4558 | "TARGET_SVE" |
915d28fe RS |
4559 | { |
4560 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
4561 | } | |
43cacb12 RS |
4562 | ) |
4563 | ||
b0760a40 RS |
4564 | ;; Predicated floating-point tree reductions. |
4565 | (define_insn "*reduc_<optab>_scal_<mode>" | |
915d28fe RS |
4566 | [(set (match_operand:<VEL> 0 "register_operand" "=w") |
4567 | (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
4568 | (match_operand:SVE_F 2 "register_operand" "w")] | |
b0760a40 | 4569 | SVE_FP_REDUCTION))] |
43cacb12 | 4570 | "TARGET_SVE" |
b0760a40 | 4571 | "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>" |
43cacb12 RS |
4572 | ) |
4573 | ||
915d28fe RS |
4574 | ;; ------------------------------------------------------------------------- |
4575 | ;; ---- [FP] Left-to-right reductions | |
4576 | ;; ------------------------------------------------------------------------- | |
4577 | ;; Includes: | |
4578 | ;; - FADDA | |
4579 | ;; ------------------------------------------------------------------------- | |
4580 | ||
4581 | ;; Unpredicated in-order FP reductions. | |
4582 | (define_expand "fold_left_plus_<mode>" | |
4583 | [(set (match_operand:<VEL> 0 "register_operand") | |
4584 | (unspec:<VEL> [(match_dup 3) | |
4585 | (match_operand:<VEL> 1 "register_operand") | |
4586 | (match_operand:SVE_F 2 "register_operand")] | |
4587 | UNSPEC_FADDA))] | |
43cacb12 | 4588 | "TARGET_SVE" |
915d28fe RS |
4589 | { |
4590 | operands[3] = aarch64_ptrue_reg (<VPRED>mode); | |
4591 | } | |
43cacb12 RS |
4592 | ) |
4593 | ||
915d28fe RS |
4594 | ;; Predicated in-order FP reductions. |
4595 | (define_insn "mask_fold_left_plus_<mode>" | |
4596 | [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
4597 | (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl") | |
4598 | (match_operand:<VEL> 1 "register_operand" "0") | |
4599 | (match_operand:SVE_F 2 "register_operand" "w")] | |
4600 | UNSPEC_FADDA))] | |
43cacb12 | 4601 | "TARGET_SVE" |
915d28fe | 4602 | "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>" |
43cacb12 RS |
4603 | ) |
4604 | ||
915d28fe RS |
4605 | ;; ========================================================================= |
4606 | ;; == Permutes | |
4607 | ;; ========================================================================= | |
4608 | ||
4609 | ;; ------------------------------------------------------------------------- | |
4610 | ;; ---- [INT,FP] General permutes | |
4611 | ;; ------------------------------------------------------------------------- | |
4612 | ;; Includes: | |
4613 | ;; - TBL | |
4614 | ;; ------------------------------------------------------------------------- | |
4615 | ||
4616 | (define_expand "vec_perm<mode>" | |
4617 | [(match_operand:SVE_ALL 0 "register_operand") | |
4618 | (match_operand:SVE_ALL 1 "register_operand") | |
4619 | (match_operand:SVE_ALL 2 "register_operand") | |
4620 | (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] | |
4621 | "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" | |
9bfb28ed | 4622 | { |
915d28fe RS |
4623 | aarch64_expand_sve_vec_perm (operands[0], operands[1], |
4624 | operands[2], operands[3]); | |
9bfb28ed RS |
4625 | DONE; |
4626 | } | |
4627 | ) | |
4628 | ||
915d28fe RS |
4629 | (define_insn "*aarch64_sve_tbl<mode>" |
4630 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
4631 | (unspec:SVE_ALL | |
4632 | [(match_operand:SVE_ALL 1 "register_operand" "w") | |
4633 | (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] | |
4634 | UNSPEC_TBL))] | |
43cacb12 | 4635 | "TARGET_SVE" |
915d28fe | 4636 | "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
43cacb12 RS |
4637 | ) |
4638 | ||
915d28fe RS |
4639 | ;; ------------------------------------------------------------------------- |
4640 | ;; ---- [INT,FP] Special-purpose unary permutes | |
4641 | ;; ------------------------------------------------------------------------- | |
4642 | ;; Includes: | |
4643 | ;; - DUP | |
4644 | ;; - REV | |
915d28fe RS |
4645 | ;; ------------------------------------------------------------------------- |
4646 | ||
4647 | ;; Duplicate one element of a vector. | |
4648 | (define_insn "*aarch64_sve_dup_lane<mode>" | |
4649 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
4650 | (vec_duplicate:SVE_ALL | |
4651 | (vec_select:<VEL> | |
4652 | (match_operand:SVE_ALL 1 "register_operand" "w") | |
4653 | (parallel [(match_operand:SI 2 "const_int_operand")]))))] | |
4654 | "TARGET_SVE | |
4655 | && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)" | |
4656 | "dup\t%0.<Vetype>, %1.<Vetype>[%2]" | |
4657 | ) | |
4658 | ||
4659 | ;; Reverse the order of elements within a full vector. | |
4660 | (define_insn "@aarch64_sve_rev<mode>" | |
4661 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
4662 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] | |
4663 | UNSPEC_REV))] | |
9bfb28ed | 4664 | "TARGET_SVE" |
915d28fe RS |
4665 | "rev\t%0.<Vetype>, %1.<Vetype>") |
4666 | ||
915d28fe RS |
4667 | ;; ------------------------------------------------------------------------- |
4668 | ;; ---- [INT,FP] Special-purpose binary permutes | |
4669 | ;; ------------------------------------------------------------------------- | |
4670 | ;; Includes: | |
4671 | ;; - TRN1 | |
4672 | ;; - TRN2 | |
4673 | ;; - UZP1 | |
4674 | ;; - UZP2 | |
4675 | ;; - ZIP1 | |
4676 | ;; - ZIP2 | |
4677 | ;; ------------------------------------------------------------------------- | |
4678 | ||
4679 | ;; Permutes that take half the elements from one vector and half the | |
4680 | ;; elements from the other. | |
3e2751ce | 4681 | (define_insn "aarch64_sve_<perm_insn><mode>" |
915d28fe RS |
4682 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w") |
4683 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") | |
4684 | (match_operand:SVE_ALL 2 "register_operand" "w")] | |
4685 | PERMUTE))] | |
9bfb28ed | 4686 | "TARGET_SVE" |
3e2751ce | 4687 | "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
915d28fe RS |
4688 | ) |
4689 | ||
4690 | ;; Concatenate two vectors and extract a subvector. Note that the | |
4691 | ;; immediate (third) operand is the lane index not the byte index. | |
4692 | (define_insn "*aarch64_sve_ext<mode>" | |
06b3ba23 RS |
4693 | [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w") |
4694 | (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0, w") | |
4695 | (match_operand:SVE_ALL 2 "register_operand" "w, w") | |
915d28fe RS |
4696 | (match_operand:SI 3 "const_int_operand")] |
4697 | UNSPEC_EXT))] | |
4698 | "TARGET_SVE | |
4699 | && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)" | |
9bfb28ed | 4700 | { |
915d28fe | 4701 | operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode)); |
06b3ba23 RS |
4702 | return (which_alternative == 0 |
4703 | ? "ext\\t%0.b, %0.b, %2.b, #%3" | |
4704 | : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3"); | |
43cacb12 | 4705 | } |
06b3ba23 | 4706 | [(set_attr "movprfx" "*,yes")] |
43cacb12 RS |
4707 | ) |
4708 | ||
915d28fe RS |
4709 | ;; ------------------------------------------------------------------------- |
4710 | ;; ---- [PRED] Special-purpose binary permutes | |
4711 | ;; ------------------------------------------------------------------------- | |
4712 | ;; Includes: | |
4713 | ;; - TRN1 | |
4714 | ;; - TRN2 | |
4715 | ;; - UZP1 | |
4716 | ;; - UZP2 | |
4717 | ;; - ZIP1 | |
4718 | ;; - ZIP2 | |
4719 | ;; ------------------------------------------------------------------------- | |
4720 | ||
4721 | ;; Permutes that take half the elements from one vector and half the | |
4722 | ;; elements from the other. | |
2803bc3b | 4723 | (define_insn "@aarch64_sve_<perm_insn><mode>" |
915d28fe RS |
4724 | [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") |
4725 | (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
4726 | (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
4727 | PERMUTE))] | |
43cacb12 | 4728 | "TARGET_SVE" |
3e2751ce | 4729 | "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
43cacb12 RS |
4730 | ) |
4731 | ||
915d28fe RS |
4732 | ;; ========================================================================= |
4733 | ;; == Conversions | |
4734 | ;; ========================================================================= | |
4735 | ||
4736 | ;; ------------------------------------------------------------------------- | |
4737 | ;; ---- [INT<-INT] Packs | |
4738 | ;; ------------------------------------------------------------------------- | |
4739 | ;; Includes: | |
4740 | ;; - UZP1 | |
4741 | ;; ------------------------------------------------------------------------- | |
4742 | ||
43cacb12 RS |
4743 | ;; Integer pack. Use UZP1 on the narrower type, which discards |
4744 | ;; the high part of each wide element. | |
4745 | (define_insn "vec_pack_trunc_<Vwide>" | |
4746 | [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") | |
4747 | (unspec:SVE_BHSI | |
4748 | [(match_operand:<VWIDE> 1 "register_operand" "w") | |
4749 | (match_operand:<VWIDE> 2 "register_operand" "w")] | |
4750 | UNSPEC_PACK))] | |
4751 | "TARGET_SVE" | |
4752 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
4753 | ) | |
4754 | ||
915d28fe RS |
4755 | ;; ------------------------------------------------------------------------- |
4756 | ;; ---- [INT<-INT] Unpacks | |
4757 | ;; ------------------------------------------------------------------------- | |
4758 | ;; Includes: | |
4759 | ;; - SUNPKHI | |
4760 | ;; - SUNPKLO | |
4761 | ;; - UUNPKHI | |
4762 | ;; - UUNPKLO | |
4763 | ;; ------------------------------------------------------------------------- | |
4764 | ||
4765 | ;; Unpack the low or high half of a vector, where "high" refers to | |
4766 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
4767 | ;; for little-endian. | |
4768 | (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>" | |
4769 | [(match_operand:<VWIDE> 0 "register_operand") | |
4770 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)] | |
43cacb12 RS |
4771 | "TARGET_SVE" |
4772 | { | |
915d28fe RS |
4773 | emit_insn ((<hi_lanes_optab> |
4774 | ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode> | |
4775 | : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>) | |
4776 | (operands[0], operands[1])); | |
4777 | DONE; | |
4778 | } | |
4779 | ) | |
4780 | ||
4781 | (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>" | |
4782 | [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
4783 | (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")] | |
4784 | UNPACK))] | |
4785 | "TARGET_SVE" | |
4786 | "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" | |
4787 | ) | |
4788 | ||
4789 | ;; ------------------------------------------------------------------------- | |
4790 | ;; ---- [INT<-FP] Conversions | |
4791 | ;; ------------------------------------------------------------------------- | |
4792 | ;; Includes: | |
4793 | ;; - FCVTZS | |
4794 | ;; - FCVTZU | |
4795 | ;; ------------------------------------------------------------------------- | |
4796 | ||
4797 | ;; Unpredicated conversion of floats to integers of the same size (HF to HI, | |
4798 | ;; SF to SI or DF to DI). | |
99361551 | 4799 | (define_expand "<optab><mode><v_int_equiv>2" |
915d28fe RS |
4800 | [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") |
4801 | (unspec:<V_INT_EQUIV> | |
4802 | [(match_dup 2) | |
99361551 RS |
4803 | (const_int SVE_RELAXED_GP) |
4804 | (match_operand:SVE_F 1 "register_operand")] | |
4805 | SVE_COND_FCVTI))] | |
915d28fe RS |
4806 | "TARGET_SVE" |
4807 | { | |
4808 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); | |
43cacb12 RS |
4809 | } |
4810 | ) | |
4811 | ||
95eb5537 RS |
4812 | ;; Predicated float-to-integer conversion, either to the same width or wider. |
4813 | (define_insn "*aarch64_sve_<optab>_nontrunc<SVE_F:mode><SVE_HSDI:mode>" | |
915d28fe RS |
4814 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") |
4815 | (unspec:SVE_HSDI | |
95eb5537 | 4816 | [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl") |
99361551 | 4817 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
95eb5537 | 4818 | (match_operand:SVE_F 2 "register_operand" "w")] |
99361551 | 4819 | SVE_COND_FCVTI))] |
95eb5537 RS |
4820 | "TARGET_SVE && <SVE_HSDI:elem_bits> >= <SVE_F:elem_bits>" |
4821 | "fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_F:Vetype>" | |
915d28fe RS |
4822 | ) |
4823 | ||
95eb5537 RS |
4824 | ;; Predicated narrowing float-to-integer conversion. |
4825 | (define_insn "*aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" | |
4826 | [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w") | |
4827 | (unspec:VNx4SI_ONLY | |
915d28fe | 4828 | [(match_operand:VNx2BI 1 "register_operand" "Upl") |
99361551 | 4829 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
95eb5537 | 4830 | (match_operand:VNx2DF_ONLY 2 "register_operand" "w")] |
99361551 | 4831 | SVE_COND_FCVTI))] |
915d28fe | 4832 | "TARGET_SVE" |
95eb5537 | 4833 | "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>" |
915d28fe RS |
4834 | ) |
4835 | ||
c5e16983 RS |
4836 | ;; Predicated float-to-integer conversion with merging, either to the same |
4837 | ;; width or wider. | |
4838 | ;; | |
4839 | ;; The first alternative doesn't need the earlyclobber, but the only case | |
4840 | ;; it would help is the uninteresting one in which operands 2 and 3 are | |
4841 | ;; the same register (despite having different modes). Making all the | |
4842 | ;; alternatives earlyclobber makes things more consistent for the | |
4843 | ;; register allocator. | |
4844 | (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_F:mode><SVE_HSDI:mode>" | |
4845 | [(set (match_operand:SVE_HSDI 0 "register_operand" "=&w, &w, ?&w") | |
4846 | (unspec:SVE_HSDI | |
4847 | [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
4848 | (unspec:SVE_HSDI | |
4849 | [(match_operand 4) | |
4850 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
4851 | (match_operand:SVE_F 2 "register_operand" "w, w, w")] | |
4852 | SVE_COND_FCVTI) | |
4853 | (match_operand:SVE_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
4854 | UNSPEC_SEL))] | |
4855 | "TARGET_SVE | |
4856 | && <SVE_HSDI:elem_bits> >= <SVE_F:elem_bits> | |
4857 | && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
4858 | "@ | |
4859 | fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_F:Vetype> | |
4860 | movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_F:Vetype> | |
4861 | movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_F:Vetype>" | |
4862 | "&& !rtx_equal_p (operands[1], operands[4])" | |
4863 | { | |
4864 | operands[4] = copy_rtx (operands[1]); | |
4865 | } | |
4866 | [(set_attr "movprfx" "*,yes,yes")] | |
4867 | ) | |
4868 | ||
915d28fe RS |
4869 | ;; ------------------------------------------------------------------------- |
4870 | ;; ---- [INT<-FP] Packs | |
4871 | ;; ------------------------------------------------------------------------- | |
4872 | ;; The patterns in this section are synthetic. | |
4873 | ;; ------------------------------------------------------------------------- | |
4874 | ||
43cacb12 RS |
4875 | ;; Convert two vectors of DF to SI and pack the results into a single vector. |
4876 | (define_expand "vec_pack_<su>fix_trunc_vnx2df" | |
4877 | [(set (match_dup 4) | |
4878 | (unspec:VNx4SI | |
4879 | [(match_dup 3) | |
99361551 RS |
4880 | (const_int SVE_RELAXED_GP) |
4881 | (match_operand:VNx2DF 1 "register_operand")] | |
4882 | SVE_COND_FCVTI)) | |
43cacb12 RS |
4883 | (set (match_dup 5) |
4884 | (unspec:VNx4SI | |
4885 | [(match_dup 3) | |
99361551 RS |
4886 | (const_int SVE_RELAXED_GP) |
4887 | (match_operand:VNx2DF 2 "register_operand")] | |
4888 | SVE_COND_FCVTI)) | |
43cacb12 RS |
4889 | (set (match_operand:VNx4SI 0 "register_operand") |
4890 | (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
4891 | "TARGET_SVE" | |
4892 | { | |
16de3637 | 4893 | operands[3] = aarch64_ptrue_reg (VNx2BImode); |
43cacb12 RS |
4894 | operands[4] = gen_reg_rtx (VNx4SImode); |
4895 | operands[5] = gen_reg_rtx (VNx4SImode); | |
4896 | } | |
4897 | ) | |
f1739b48 | 4898 | |
915d28fe RS |
4899 | ;; ------------------------------------------------------------------------- |
4900 | ;; ---- [INT<-FP] Unpacks | |
4901 | ;; ------------------------------------------------------------------------- | |
4902 | ;; No patterns here yet! | |
4903 | ;; ------------------------------------------------------------------------- | |
9d4ac06e | 4904 | |
915d28fe RS |
4905 | ;; ------------------------------------------------------------------------- |
4906 | ;; ---- [FP<-INT] Conversions | |
4907 | ;; ------------------------------------------------------------------------- | |
4908 | ;; Includes: | |
4909 | ;; - SCVTF | |
4910 | ;; - UCVTF | |
4911 | ;; ------------------------------------------------------------------------- | |
a08acce8 | 4912 | |
915d28fe RS |
4913 | ;; Unpredicated conversion of integers to floats of the same size |
4914 | ;; (HI to HF, SI to SF or DI to DF). | |
4915 | (define_expand "<optab><v_int_equiv><mode>2" | |
4916 | [(set (match_operand:SVE_F 0 "register_operand") | |
a08acce8 | 4917 | (unspec:SVE_F |
915d28fe | 4918 | [(match_dup 2) |
99361551 RS |
4919 | (const_int SVE_RELAXED_GP) |
4920 | (match_operand:<V_INT_EQUIV> 1 "register_operand")] | |
4921 | SVE_COND_ICVTF))] | |
a08acce8 | 4922 | "TARGET_SVE" |
f4fde1b3 | 4923 | { |
915d28fe | 4924 | operands[2] = aarch64_ptrue_reg (<VPRED>mode); |
f4fde1b3 | 4925 | } |
b41d1f6e RS |
4926 | ) |
4927 | ||
95eb5537 RS |
4928 | ;; Predicated integer-to-float conversion, either to the same width or |
4929 | ;; narrower. | |
4930 | (define_insn "*aarch64_sve_<optab>_nonextend<SVE_HSDI:mode><SVE_F:mode>" | |
4931 | [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
4932 | (unspec:SVE_F | |
4933 | [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl") | |
99361551 RS |
4934 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
4935 | (match_operand:SVE_HSDI 2 "register_operand" "w")] | |
4936 | SVE_COND_ICVTF))] | |
95eb5537 RS |
4937 | "TARGET_SVE && <SVE_HSDI:elem_bits> >= <SVE_F:elem_bits>" |
4938 | "<su>cvtf\t%0.<SVE_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>" | |
f1739b48 | 4939 | ) |
6c9c7b73 | 4940 | |
95eb5537 RS |
4941 | ;; Predicated widening integer-to-float conversion. |
4942 | (define_insn "aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" | |
4943 | [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w") | |
4944 | (unspec:VNx2DF_ONLY | |
915d28fe | 4945 | [(match_operand:VNx2BI 1 "register_operand" "Upl") |
99361551 | 4946 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
95eb5537 | 4947 | (match_operand:VNx4SI_ONLY 2 "register_operand" "w")] |
99361551 | 4948 | SVE_COND_ICVTF))] |
6c9c7b73 | 4949 | "TARGET_SVE" |
95eb5537 | 4950 | "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>" |
915d28fe | 4951 | ) |
6c9c7b73 | 4952 | |
c5e16983 RS |
4953 | ;; Predicated integer-to-float conversion with merging, either to the same |
4954 | ;; width or narrower. | |
4955 | ;; | |
4956 | ;; The first alternative doesn't need the earlyclobber, but the only case | |
4957 | ;; it would help is the uninteresting one in which operands 2 and 3 are | |
4958 | ;; the same register (despite having different modes). Making all the | |
4959 | ;; alternatives earlyclobber makes things more consistent for the | |
4960 | ;; register allocator. | |
4961 | (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_HSDI:mode><SVE_F:mode>" | |
4962 | [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w") | |
4963 | (unspec:SVE_F | |
4964 | [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
4965 | (unspec:SVE_F | |
4966 | [(match_operand 4) | |
4967 | (match_operand:SI 5 "aarch64_sve_gp_strictness") | |
4968 | (match_operand:SVE_HSDI 2 "register_operand" "w, w, w")] | |
4969 | SVE_COND_ICVTF) | |
4970 | (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] | |
4971 | UNSPEC_SEL))] | |
4972 | "TARGET_SVE | |
4973 | && <SVE_HSDI:elem_bits> >= <SVE_F:elem_bits> | |
4974 | && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" | |
4975 | "@ | |
4976 | <su>cvtf\t%0.<SVE_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> | |
4977 | movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> | |
4978 | movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>" | |
4979 | "&& !rtx_equal_p (operands[1], operands[4])" | |
4980 | { | |
4981 | operands[4] = copy_rtx (operands[1]); | |
4982 | } | |
4983 | [(set_attr "movprfx" "*,yes,yes")] | |
4984 | ) | |
4985 | ||
915d28fe RS |
4986 | ;; ------------------------------------------------------------------------- |
4987 | ;; ---- [FP<-INT] Packs | |
4988 | ;; ------------------------------------------------------------------------- | |
4989 | ;; No patterns here yet! | |
4990 | ;; ------------------------------------------------------------------------- | |
6c9c7b73 | 4991 | |
915d28fe RS |
4992 | ;; ------------------------------------------------------------------------- |
4993 | ;; ---- [FP<-INT] Unpacks | |
4994 | ;; ------------------------------------------------------------------------- | |
4995 | ;; The patterns in this section are synthetic. | |
4996 | ;; ------------------------------------------------------------------------- | |
4997 | ||
4998 | ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI | |
4999 | ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the | |
5000 | ;; unpacked VNx4SI to VNx2DF. | |
5001 | (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" | |
5002 | [(match_operand:VNx2DF 0 "register_operand") | |
5003 | (FLOATUORS:VNx2DF | |
5004 | (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] | |
5005 | UNPACK_UNSIGNED))] | |
5006 | "TARGET_SVE" | |
5007 | { | |
5008 | /* Use ZIP to do the unpack, since we don't care about the upper halves | |
5009 | and since it has the nice property of not needing any subregs. | |
5010 | If using UUNPK* turns out to be preferable, we could model it as | |
5011 | a ZIP whose first operand is zero. */ | |
5012 | rtx temp = gen_reg_rtx (VNx4SImode); | |
5013 | emit_insn ((<hi_lanes_optab> | |
5014 | ? gen_aarch64_sve_zip2vnx4si | |
5015 | : gen_aarch64_sve_zip1vnx4si) | |
5016 | (temp, operands[1], operands[1])); | |
5017 | rtx ptrue = aarch64_ptrue_reg (VNx2BImode); | |
99361551 | 5018 | rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); |
95eb5537 | 5019 | emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df |
99361551 | 5020 | (operands[0], ptrue, temp, strictness)); |
6c9c7b73 AM |
5021 | DONE; |
5022 | } | |
5023 | ) | |
5024 | ||
915d28fe RS |
5025 | ;; ------------------------------------------------------------------------- |
5026 | ;; ---- [FP<-FP] Packs | |
5027 | ;; ------------------------------------------------------------------------- | |
5028 | ;; Includes: | |
5029 | ;; - FCVT | |
5030 | ;; ------------------------------------------------------------------------- | |
5031 | ||
5032 | ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack | |
5033 | ;; the results into a single vector. | |
5034 | (define_expand "vec_pack_trunc_<Vwide>" | |
5035 | [(set (match_dup 4) | |
5036 | (unspec:SVE_HSF | |
5037 | [(match_dup 3) | |
99361551 RS |
5038 | (const_int SVE_RELAXED_GP) |
5039 | (match_operand:<VWIDE> 1 "register_operand")] | |
5040 | UNSPEC_COND_FCVT)) | |
915d28fe RS |
5041 | (set (match_dup 5) |
5042 | (unspec:SVE_HSF | |
5043 | [(match_dup 3) | |
99361551 RS |
5044 | (const_int SVE_RELAXED_GP) |
5045 | (match_operand:<VWIDE> 2 "register_operand")] | |
5046 | UNSPEC_COND_FCVT)) | |
915d28fe RS |
5047 | (set (match_operand:SVE_HSF 0 "register_operand") |
5048 | (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
6c9c7b73 AM |
5049 | "TARGET_SVE" |
5050 | { | |
915d28fe RS |
5051 | operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode); |
5052 | operands[4] = gen_reg_rtx (<MODE>mode); | |
5053 | operands[5] = gen_reg_rtx (<MODE>mode); | |
6c9c7b73 AM |
5054 | } |
5055 | ) | |
9feeafd7 | 5056 | |
95eb5537 RS |
5057 | ;; Predicated float-to-float truncation. |
5058 | (define_insn "*aarch64_sve_<optab>_trunc<SVE_SDF:mode><SVE_HSF:mode>" | |
915d28fe RS |
5059 | [(set (match_operand:SVE_HSF 0 "register_operand" "=w") |
5060 | (unspec:SVE_HSF | |
95eb5537 | 5061 | [(match_operand:<SVE_SDF:VPRED> 1 "register_operand" "Upl") |
99361551 | 5062 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
95eb5537 RS |
5063 | (match_operand:SVE_SDF 2 "register_operand" "w")] |
5064 | SVE_COND_FCVT))] | |
5065 | "TARGET_SVE && <SVE_SDF:elem_bits> > <SVE_HSF:elem_bits>" | |
5066 | "fcvt\t%0.<SVE_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>" | |
9feeafd7 | 5067 | ) |
a9fad8fe | 5068 | |
915d28fe RS |
5069 | ;; ------------------------------------------------------------------------- |
5070 | ;; ---- [FP<-FP] Unpacks | |
5071 | ;; ------------------------------------------------------------------------- | |
5072 | ;; Includes: | |
5073 | ;; - FCVT | |
5074 | ;; ------------------------------------------------------------------------- | |
5075 | ||
5076 | ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. | |
5077 | ;; First unpack the source without conversion, then float-convert the | |
5078 | ;; unpacked source. | |
5079 | (define_expand "vec_unpacks_<perm_hilo>_<mode>" | |
5080 | [(match_operand:<VWIDE> 0 "register_operand") | |
5081 | (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] | |
5082 | UNPACK_UNSIGNED)] | |
a9fad8fe AM |
5083 | "TARGET_SVE" |
5084 | { | |
915d28fe RS |
5085 | /* Use ZIP to do the unpack, since we don't care about the upper halves |
5086 | and since it has the nice property of not needing any subregs. | |
5087 | If using UUNPK* turns out to be preferable, we could model it as | |
5088 | a ZIP whose first operand is zero. */ | |
5089 | rtx temp = gen_reg_rtx (<MODE>mode); | |
5090 | emit_insn ((<hi_lanes_optab> | |
5091 | ? gen_aarch64_sve_zip2<mode> | |
5092 | : gen_aarch64_sve_zip1<mode>) | |
5093 | (temp, operands[1], operands[1])); | |
5094 | rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode); | |
99361551 | 5095 | rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); |
95eb5537 | 5096 | emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide> |
99361551 | 5097 | (operands[0], ptrue, temp, strictness)); |
a9fad8fe AM |
5098 | DONE; |
5099 | } | |
5100 | ) | |
5101 | ||
95eb5537 RS |
5102 | ;; Predicated float-to-float extension. |
5103 | (define_insn "aarch64_sve_<optab>_nontrunc<SVE_HSF:mode><SVE_SDF:mode>" | |
5104 | [(set (match_operand:SVE_SDF 0 "register_operand" "=w") | |
5105 | (unspec:SVE_SDF | |
5106 | [(match_operand:<SVE_SDF:VPRED> 1 "register_operand" "Upl") | |
99361551 RS |
5107 | (match_operand:SI 3 "aarch64_sve_gp_strictness") |
5108 | (match_operand:SVE_HSF 2 "register_operand" "w")] | |
95eb5537 RS |
5109 | SVE_COND_FCVT))] |
5110 | "TARGET_SVE && <SVE_SDF:elem_bits> > <SVE_HSF:elem_bits>" | |
5111 | "fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_HSF:Vetype>" | |
a9fad8fe AM |
5112 | ) |
5113 | ||
915d28fe RS |
5114 | ;; ------------------------------------------------------------------------- |
5115 | ;; ---- [PRED<-PRED] Packs | |
5116 | ;; ------------------------------------------------------------------------- | |
5117 | ;; Includes: | |
5118 | ;; - UZP1 | |
5119 | ;; ------------------------------------------------------------------------- | |
a9fad8fe | 5120 | |
915d28fe RS |
5121 | ;; Predicate pack. Use UZP1 on the narrower type, which discards |
5122 | ;; the high part of each wide element. | |
5123 | (define_insn "vec_pack_trunc_<Vwide>" | |
5124 | [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") | |
5125 | (unspec:PRED_BHS | |
5126 | [(match_operand:<VWIDE> 1 "register_operand" "Upa") | |
5127 | (match_operand:<VWIDE> 2 "register_operand" "Upa")] | |
5128 | UNSPEC_PACK))] | |
a9fad8fe | 5129 | "TARGET_SVE" |
915d28fe | 5130 | "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" |
a9fad8fe | 5131 | ) |
3a0afad0 | 5132 | |
915d28fe RS |
5133 | ;; ------------------------------------------------------------------------- |
5134 | ;; ---- [PRED<-PRED] Unpacks | |
5135 | ;; ------------------------------------------------------------------------- | |
5136 | ;; Includes: | |
5137 | ;; - PUNPKHI | |
5138 | ;; - PUNPKLO | |
5139 | ;; ------------------------------------------------------------------------- | |
5140 | ||
5141 | ;; Unpack the low or high half of a predicate, where "high" refers to | |
5142 | ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
5143 | ;; for little-endian. | |
5144 | (define_expand "vec_unpack<su>_<perm_hilo>_<mode>" | |
5145 | [(match_operand:<VWIDE> 0 "register_operand") | |
5146 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] | |
5147 | UNPACK)] | |
3a0afad0 PK |
5148 | "TARGET_SVE" |
5149 | { | |
915d28fe RS |
5150 | emit_insn ((<hi_lanes_optab> |
5151 | ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> | |
5152 | : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) | |
5153 | (operands[0], operands[1])); | |
3a0afad0 PK |
5154 | DONE; |
5155 | } | |
5156 | ) | |
915d28fe RS |
5157 | |
5158 | (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>" | |
5159 | [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") | |
5160 | (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] | |
5161 | UNPACK_UNSIGNED))] | |
5162 | "TARGET_SVE" | |
5163 | "punpk<perm_hilo>\t%0.h, %1.b" | |
5164 | ) |