]> gcc.gnu.org Git - gcc.git/blame - gcc/config/aarch64/aarch64-sve.md
[AArch64] Rework SVE INC/DEC handling
[gcc.git] / gcc / config / aarch64 / aarch64-sve.md
CommitLineData
43cacb12 1;; Machine description for AArch64 SVE.
a5544970 2;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
43cacb12
RS
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
915d28fe
RS
21;; The file is organised into the following sections (search for the full
22;; line):
23;;
24;; == General notes
25;; ---- Note on the handling of big-endian SVE
34467289 26;; ---- Description of UNSPEC_PTEST
00fa90d9 27;; ---- Description of UNSPEC_PRED_Z
06308276 28;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
c9c5a809 29;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
915d28fe
RS
30;;
31;; == Moves
32;; ---- Moves of single vectors
33;; ---- Moves of multiple vectors
34;; ---- Moves of predicates
35;;
36;; == Loads
37;; ---- Normal contiguous loads
38;; ---- Normal gather loads
39;;
40;; == Stores
41;; ---- Normal contiguous stores
42;; ---- Normal scatter stores
43;;
44;; == Vector creation
45;; ---- [INT,FP] Duplicate element
46;; ---- [INT,FP] Initialize from individual elements
47;; ---- [INT] Linear series
48;; ---- [PRED] Duplicate element
49;;
50;; == Vector decomposition
51;; ---- [INT,FP] Extract index
52;; ---- [INT,FP] Extract active element
53;; ---- [PRED] Extract index
54;;
55;; == Unary arithmetic
56;; ---- [INT] General unary arithmetic corresponding to rtx codes
d7a09c44 57;; ---- [INT] General unary arithmetic corresponding to unspecs
d113ece6 58;; ---- [INT] Zero extension
e0a0be93 59;; ---- [INT] Logical inverse
d45b20a5 60;; ---- [FP] General unary arithmetic corresponding to unspecs
915d28fe
RS
61;; ---- [PRED] Inverse
62
63;; == Binary arithmetic
64;; ---- [INT] General binary arithmetic corresponding to rtx codes
65;; ---- [INT] Addition
66;; ---- [INT] Subtraction
a229966c 67;; ---- [INT] Take address
915d28fe 68;; ---- [INT] Absolute difference
915d28fe
RS
69;; ---- [INT] Highpart multiplication
70;; ---- [INT] Division
71;; ---- [INT] Binary logical operations
72;; ---- [INT] Binary logical operations (inverted second input)
73;; ---- [INT] Shifts
915d28fe
RS
74;; ---- [FP] General binary arithmetic corresponding to rtx codes
75;; ---- [FP] General binary arithmetic corresponding to unspecs
76;; ---- [FP] Addition
77;; ---- [FP] Subtraction
78;; ---- [FP] Absolute difference
79;; ---- [FP] Multiplication
915d28fe
RS
80;; ---- [FP] Binary logical operations
81;; ---- [FP] Sign copying
82;; ---- [FP] Maximum and minimum
83;; ---- [PRED] Binary logical operations
84;; ---- [PRED] Binary logical operations (inverted second input)
85;; ---- [PRED] Binary logical operations (inverted result)
86;;
87;; == Ternary arithmetic
88;; ---- [INT] MLA and MAD
89;; ---- [INT] MLS and MSB
90;; ---- [INT] Dot product
91;; ---- [INT] Sum of absolute differences
92;; ---- [FP] General ternary arithmetic corresponding to unspecs
915d28fe
RS
93;;
94;; == Comparisons and selects
95;; ---- [INT,FP] Select based on predicates
96;; ---- [INT,FP] Compare and select
97;; ---- [INT] Comparisons
98;; ---- [INT] While tests
42b4e87d
RS
99;; ---- [FP] Direct comparisons
100;; ---- [FP] Absolute comparisons
915d28fe
RS
101;; ---- [PRED] Test bits
102;;
103;; == Reductions
104;; ---- [INT,FP] Conditional reductions
105;; ---- [INT] Tree reductions
106;; ---- [FP] Tree reductions
107;; ---- [FP] Left-to-right reductions
108;;
109;; == Permutes
110;; ---- [INT,FP] General permutes
111;; ---- [INT,FP] Special-purpose unary permutes
112;; ---- [INT,FP] Special-purpose binary permutes
113;; ---- [PRED] Special-purpose binary permutes
114;;
115;; == Conversions
116;; ---- [INT<-INT] Packs
117;; ---- [INT<-INT] Unpacks
118;; ---- [INT<-FP] Conversions
119;; ---- [INT<-FP] Packs
120;; ---- [INT<-FP] Unpacks
121;; ---- [FP<-INT] Conversions
122;; ---- [FP<-INT] Packs
123;; ---- [FP<-INT] Unpacks
124;; ---- [FP<-FP] Packs
125;; ---- [FP<-FP] Unpacks
126;; ---- [PRED<-PRED] Packs
127;; ---- [PRED<-PRED] Unpacks
128
129;; =========================================================================
130;; == General notes
131;; =========================================================================
132;;
133;; -------------------------------------------------------------------------
134;; ---- Note on the handling of big-endian SVE
135;; -------------------------------------------------------------------------
43cacb12
RS
136;;
137;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
138;; same way as movdi or movti would: the first byte of memory goes
139;; into the most significant byte of the register and the last byte
140;; of memory goes into the least significant byte of the register.
141;; This is the most natural ordering for Advanced SIMD and matches
142;; the ABI layout for 64-bit and 128-bit vector types.
143;;
144;; As a result, the order of bytes within the register is what GCC
145;; expects for a big-endian target, and subreg offsets therefore work
146;; as expected, with the first element in memory having subreg offset 0
147;; and the last element in memory having the subreg offset associated
148;; with a big-endian lowpart. However, this ordering also means that
149;; GCC's lane numbering does not match the architecture's numbering:
150;; GCC always treats the element at the lowest address in memory
151;; (subreg offset 0) as element 0, while the architecture treats
152;; the least significant end of the register as element 0.
153;;
154;; The situation for SVE is different. We want the layout of the
155;; SVE register to be same for mov<mode> as it is for maskload<mode>:
156;; logically, a mov<mode> load must be indistinguishable from a
157;; maskload<mode> whose mask is all true. We therefore need the
158;; register layout to match LD1 rather than LDR. The ABI layout of
159;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
160;;
161;; As a result, the architecture lane numbering matches GCC's lane
162;; numbering, with element 0 always being the first in memory.
163;; However:
164;;
165;; - Applying a subreg offset to a register does not give the element
166;; that GCC expects: the first element in memory has the subreg offset
167;; associated with a big-endian lowpart while the last element in memory
168;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
169;;
170;; - We cannot use LDR and STR for spill slots that might be accessed
171;; via subregs, since although the elements have the order GCC expects,
172;; the order of the bytes within the elements is different. We instead
173;; access spill slots via LD1 and ST1, using secondary reloads to
174;; reserve a predicate register.
34467289
RS
175;;
176;; -------------------------------------------------------------------------
177;; ---- Description of UNSPEC_PTEST
178;; -------------------------------------------------------------------------
179;;
180;; SVE provides a PTEST instruction for testing the active lanes of a
181;; predicate and setting the flags based on the result. The associated
182;; condition code tests are:
183;;
184;; - any (= ne): at least one active bit is set
185;; - none (= eq): all active bits are clear (*)
186;; - first (= mi): the first active bit is set
187;; - nfrst (= pl): the first active bit is clear (*)
188;; - last (= cc): the last active bit is set
189;; - nlast (= cs): the last active bit is clear (*)
190;;
191;; where the conditions marked (*) are also true when there are no active
192;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
193;; of a PTEST use the condition code mode CC_NZC.
194;;
195;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
196;; This means that for other predicate modes, we need a governing predicate
197;; in which all bits are defined.
198;;
199;; For example, most predicated .H operations ignore the odd bits of the
200;; governing predicate, so that an active lane is represented by the
201;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
202;; any value. To test a .H predicate, we instead need "10" and "00"
203;; respectively, so that the condition only tests the even bits of the
204;; predicate.
205;;
206;; Several instructions set the flags as a side-effect, in the same way
207;; that a separate PTEST would. It's important for code quality that we
208;; use these flags results as often as possible, particularly in the case
209;; of WHILE* and RDFFR.
210;;
211;; Also, some of the instructions that set the flags are unpredicated
212;; and instead implicitly test all .B, .H, .S or .D elements, as though
213;; they were predicated on a PTRUE of that size. For example, a .S
214;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
215;; would.
216;;
217;; We therefore need to represent PTEST operations in a way that
218;; makes it easy to combine them with both predicated and unpredicated
219;; operations, while using a VNx16BI governing predicate for all
220;; predicate modes. We do this using:
221;;
222;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
223;;
224;; where:
225;;
226;; - GP is the real VNx16BI governing predicate
227;;
228;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
229;; GP to CAST_GP are guaranteed to be clear in GP.
230;;
231;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
232;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
233;; SVE_MAYBE_NOT_PTRUE otherwise.
234;;
235;; - OP is the predicate we want to test, of the same mode as CAST_GP.
c9c5a809
RS
236;;
237;; -------------------------------------------------------------------------
00fa90d9
RS
238;; ---- Description of UNSPEC_PRED_Z
239;; -------------------------------------------------------------------------
240;;
241;; SVE integer comparisons are predicated and return zero for inactive
242;; lanes. Sometimes we use them with predicates that are all-true and
243;; sometimes we use them with general predicates.
244;;
245;; The integer comparisons also set the flags and so build-in the effect
246;; of a PTEST. We therefore want to be able to combine integer comparison
247;; patterns with PTESTs of the result. One difficulty with doing this is
248;; that (as noted above) the PTEST is always a .B operation and so can place
249;; stronger requirements on the governing predicate than the comparison does.
250;;
251;; For example, when applying a separate PTEST to the result of a full-vector
252;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
253;; .B PTRUE. In constrast, the comparison might be predicated on either
254;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
255;; bits don't matter for .H operations.
256;;
257;; We therefore can't rely on a full-vector comparison using the same
258;; predicate register as a following PTEST. We instead need to remember
259;; whether a comparison is known to be a full-vector comparison and use
260;; this information in addition to a check for equal predicate registers.
261;; At the same time, it's useful to have a common representation for all
262;; integer comparisons, so that they can be handled by a single set of
263;; patterns.
264;;
265;; We therefore take a similar approach to UNSPEC_PTEST above and use:
266;;
267;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
268;;
269;; where:
270;;
271;; - GP is the governing predicate, of mode <M:VPRED>
272;;
273;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
274;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
275;; otherwise
276;;
277;; - CODE is the comparison code
278;;
279;; - OP0 and OP1 are the values being compared, of mode M
280;;
281;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
282;;
283;; -------------------------------------------------------------------------
06308276
RS
284;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
285;; -------------------------------------------------------------------------
286;;
287;; Many SVE integer operations are predicated. We can generate them
288;; from four sources:
289;;
290;; (1) Using normal unpredicated optabs. In this case we need to create
291;; an all-true predicate register to act as the governing predicate
292;; for the SVE instruction. There are no inactive lanes, and thus
293;; the values of inactive lanes don't matter.
294;;
295;; (2) Using _x ACLE functions. In this case the function provides a
296;; specific predicate and some lanes might be inactive. However,
297;; as for (1), the values of the inactive lanes don't matter.
298;; We can make extra lanes active without changing the behavior
299;; (although for code-quality reasons we should avoid doing so
300;; needlessly).
301;;
302;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
303;; These optabs have a predicate operand that specifies which lanes are
304;; active and another operand that provides the values of inactive lanes.
305;;
306;; (4) Using _m and _z ACLE functions. These functions map to the same
307;; patterns as (3), with the _z functions setting inactive lanes to zero
308;; and the _m functions setting the inactive lanes to one of the function
309;; arguments.
310;;
311;; For (1) and (2) we need a way of attaching the predicate to a normal
312;; unpredicated integer operation. We do this using:
313;;
314;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
315;;
316;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
317;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE;
318;; it always is for (1), but might not be for (2).
319;;
320;; The unspec as a whole has the same value as (code:M ...) when PRED is
321;; all-true. It is always semantically valid to replace PRED with a PTRUE,
322;; but as noted above, we should only do so if there's a specific benefit.
323;;
324;; (The "_X" in the unspec is named after the ACLE functions in (2).)
325;;
326;; For (3) and (4) we can simply use the SVE port's normal representation
327;; of a predicate-based select:
328;;
329;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
330;;
331;; where INACTIVE specifies the values of inactive lanes.
332;;
333;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
334;; than inserting the integer operation directly. This is mostly useful
335;; if we want the combine pass to merge an integer operation with an explicit
336;; vcond_mask (in other words, with a following SEL instruction). However,
337;; it's generally better to merge such operations at the gimple level
338;; using (3).
339;;
340;; -------------------------------------------------------------------------
c9c5a809
RS
341;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
342;; -------------------------------------------------------------------------
343;;
344;; Most SVE floating-point operations are predicated. We can generate
345;; them from four sources:
346;;
347;; (1) Using normal unpredicated optabs. In this case we need to create
348;; an all-true predicate register to act as the governing predicate
349;; for the SVE instruction. There are no inactive lanes, and thus
350;; the values of inactive lanes don't matter.
351;;
352;; (2) Using _x ACLE functions. In this case the function provides a
353;; specific predicate and some lanes might be inactive. However,
354;; as for (1), the values of the inactive lanes don't matter.
355;;
356;; The instruction must have the same exception behavior as the
357;; function call unless things like command-line flags specifically
358;; allow otherwise. For example, with -ffast-math, it is OK to
359;; raise exceptions for inactive lanes, but normally it isn't.
360;;
361;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
362;; These optabs have a predicate operand that specifies which lanes are
363;; active and another operand that provides the values of inactive lanes.
364;;
365;; (4) Using _m and _z ACLE functions. These functions map to the same
366;; patterns as (3), with the _z functions setting inactive lanes to zero
367;; and the _m functions setting the inactive lanes to one of the function
368;; arguments.
369;;
370;; So:
371;;
372;; - In (1), the predicate is known to be all true and the pattern can use
373;; unpredicated operations where available.
374;;
375;; - In (2), the predicate might or might not be all true. The pattern can
376;; use unpredicated instructions if the predicate is all-true or if things
377;; like command-line flags allow exceptions for inactive lanes.
378;;
379;; - (3) and (4) represent a native SVE predicated operation. Some lanes
380;; might be inactive and inactive lanes of the result must have specific
381;; values. There is no scope for using unpredicated instructions (and no
382;; reason to want to), so the question about command-line flags doesn't
383;; arise.
384;;
385;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
386;; in combination with a separate predicate operand, e.g.
387;;
388;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
389;; (sqrt:SVE_F 2 "register_operand" "w")]
390;; ....)
391;;
392;; because (sqrt ...) can raise an exception for any lane, including
393;; inactive ones. We therefore need to use an unspec instead.
394;;
395;; Also, (2) requires some way of distinguishing the case in which the
396;; predicate might have inactive lanes and cannot be changed from the
397;; case in which the predicate has no inactive lanes or can be changed.
398;; This information is also useful when matching combined FP patterns
399;; in which the predicates might not be equal.
400;;
401;; We therefore model FP operations as an unspec of the form:
402;;
403;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
404;;
405;; where:
406;;
407;; - PRED is the governing predicate.
408;;
409;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the
410;; value SVE_STRICT_GP if PRED might have inactive lanes and if those
411;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise.
412;;
413;; - OP0 OP1 ... are the normal input operands to the operation.
414;;
415;; - MNEMONIC is the mnemonic of the associated SVE instruction.
43cacb12 416
915d28fe
RS
417;; =========================================================================
418;; == Moves
419;; =========================================================================
420
421;; -------------------------------------------------------------------------
422;; ---- Moves of single vectors
423;; -------------------------------------------------------------------------
424;; Includes:
425;; - MOV (including aliases)
426;; - LD1B (contiguous form)
427;; - LD1D ( " " )
428;; - LD1H ( " " )
429;; - LD1W ( " " )
430;; - LDR
431;; - ST1B (contiguous form)
432;; - ST1D ( " " )
433;; - ST1H ( " " )
434;; - ST1W ( " " )
435;; - STR
436;; -------------------------------------------------------------------------
437
43cacb12
RS
438(define_expand "mov<mode>"
439 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
440 (match_operand:SVE_ALL 1 "general_operand"))]
441 "TARGET_SVE"
442 {
443 /* Use the predicated load and store patterns where possible.
444 This is required for big-endian targets (see the comment at the
445 head of the file) and increases the addressing choices for
446 little-endian. */
447 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
ea403d8b 448 && can_create_pseudo_p ())
43cacb12
RS
449 {
450 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
451 DONE;
452 }
453
454 if (CONSTANT_P (operands[1]))
455 {
4aeb1ba7 456 aarch64_expand_mov_immediate (operands[0], operands[1]);
43cacb12
RS
457 DONE;
458 }
002092be
RS
459
460 /* Optimize subregs on big-endian targets: we can use REV[BHW]
461 instead of going through memory. */
462 if (BYTES_BIG_ENDIAN
ea403d8b 463 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
002092be
RS
464 DONE;
465 }
466)
467
915d28fe
RS
468(define_expand "movmisalign<mode>"
469 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
470 (match_operand:SVE_ALL 1 "general_operand"))]
471 "TARGET_SVE"
002092be 472 {
915d28fe
RS
473 /* Equivalent to a normal move for our purpooses. */
474 emit_move_insn (operands[0], operands[1]);
002092be 475 DONE;
43cacb12
RS
476 }
477)
478
479;; Unpredicated moves (little-endian). Only allow memory operations
480;; during and after RA; before RA we want the predicated load and
481;; store patterns to be used instead.
482(define_insn "*aarch64_sve_mov<mode>_le"
483 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
484 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
485 "TARGET_SVE
486 && !BYTES_BIG_ENDIAN
487 && ((lra_in_progress || reload_completed)
488 || (register_operand (operands[0], <MODE>mode)
489 && nonmemory_operand (operands[1], <MODE>mode)))"
490 "@
491 ldr\t%0, %1
492 str\t%1, %0
493 mov\t%0.d, %1.d
494 * return aarch64_output_sve_mov_immediate (operands[1]);"
495)
496
497;; Unpredicated moves (big-endian). Memory accesses require secondary
498;; reloads.
499(define_insn "*aarch64_sve_mov<mode>_be"
500 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
501 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
502 "TARGET_SVE && BYTES_BIG_ENDIAN"
503 "@
504 mov\t%0.d, %1.d
505 * return aarch64_output_sve_mov_immediate (operands[1]);"
506)
507
508;; Handle big-endian memory reloads. We use byte PTRUE for all modes
509;; to try to encourage reuse.
1bbffb87 510;; This pattern needs constraints due to TARGET_SECONDARY_RELOAD hook.
43cacb12
RS
511(define_expand "aarch64_sve_reload_be"
512 [(parallel
513 [(set (match_operand 0)
ea403d8b 514 (match_operand 1))
43cacb12
RS
515 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
516 "TARGET_SVE && BYTES_BIG_ENDIAN"
517 {
518 /* Create a PTRUE. */
519 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
520
521 /* Refer to the PTRUE in the appropriate mode for this move. */
522 machine_mode mode = GET_MODE (operands[0]);
523 machine_mode pred_mode
524 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
525 rtx pred = gen_lowpart (pred_mode, operands[2]);
526
527 /* Emit a predicated load or store. */
528 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
529 DONE;
530 }
531)
532
915d28fe
RS
533;; A predicated move in which the predicate is known to be all-true.
534;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
535;; so changes to this pattern will need changes there as well.
0c63a8ee 536(define_insn_and_split "@aarch64_pred_mov<mode>"
9c6b4601 537 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
43cacb12 538 (unspec:SVE_ALL
9c6b4601
RS
539 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
540 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
06308276 541 UNSPEC_PRED_X))]
43cacb12
RS
542 "TARGET_SVE
543 && (register_operand (operands[0], <MODE>mode)
544 || register_operand (operands[2], <MODE>mode))"
545 "@
9c6b4601 546 #
43cacb12
RS
547 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
548 st1<Vesize>\t%2.<Vetype>, %1, %0"
9c6b4601
RS
549 "&& register_operand (operands[0], <MODE>mode)
550 && register_operand (operands[2], <MODE>mode)"
551 [(set (match_dup 0) (match_dup 2))]
43cacb12
RS
552)
553
915d28fe
RS
554;; A pattern for optimizing SUBREGs that have a reinterpreting effect
555;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
556;; for details. We use a special predicate for operand 2 to reduce
557;; the number of patterns.
558(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
559 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
43cacb12 560 (unspec:SVE_ALL
915d28fe
RS
561 [(match_operand:VNx16BI 1 "register_operand" "Upl")
562 (match_operand 2 "aarch64_any_register_operand" "w")]
563 UNSPEC_REV_SUBREG))]
564 "TARGET_SVE && BYTES_BIG_ENDIAN"
565 "#"
566 "&& reload_completed"
567 [(const_int 0)]
f307441a 568 {
915d28fe
RS
569 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
570 DONE;
f307441a
RS
571 }
572)
573
4aeb1ba7
RS
574;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
575;; This is equivalent to a subreg on little-endian targets but not for
576;; big-endian; see the comment at the head of the file for details.
577(define_expand "@aarch64_sve_reinterpret<mode>"
578 [(set (match_operand:SVE_ALL 0 "register_operand")
579 (unspec:SVE_ALL [(match_operand 1 "aarch64_any_register_operand")]
580 UNSPEC_REINTERPRET))]
581 "TARGET_SVE"
582 {
583 if (!BYTES_BIG_ENDIAN)
584 {
585 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
586 DONE;
587 }
588 }
589)
590
591;; A pattern for handling type punning on big-endian targets. We use a
592;; special predicate for operand 1 to reduce the number of patterns.
593(define_insn_and_split "*aarch64_sve_reinterpret<mode>"
594 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
595 (unspec:SVE_ALL [(match_operand 1 "aarch64_any_register_operand" "0")]
596 UNSPEC_REINTERPRET))]
597 "TARGET_SVE"
598 "#"
599 "&& reload_completed"
600 [(set (match_dup 0) (match_dup 1))]
601 {
602 emit_note (NOTE_INSN_DELETED);
603 DONE;
604 }
605)
606
915d28fe
RS
607;; -------------------------------------------------------------------------
608;; ---- Moves of multiple vectors
609;; -------------------------------------------------------------------------
610;; All patterns in this section are synthetic and split to real
611;; instructions after reload.
612;; -------------------------------------------------------------------------
f307441a 613
9f4cbab8
RS
614(define_expand "mov<mode>"
615 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
616 (match_operand:SVE_STRUCT 1 "general_operand"))]
617 "TARGET_SVE"
618 {
619 /* Big-endian loads and stores need to be done via LD1 and ST1;
620 see the comment at the head of the file for details. */
621 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
622 && BYTES_BIG_ENDIAN)
623 {
624 gcc_assert (can_create_pseudo_p ());
625 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
626 DONE;
627 }
628
629 if (CONSTANT_P (operands[1]))
630 {
631 aarch64_expand_mov_immediate (operands[0], operands[1]);
632 DONE;
633 }
634 }
635)
636
637;; Unpredicated structure moves (little-endian).
638(define_insn "*aarch64_sve_mov<mode>_le"
639 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
640 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
641 "TARGET_SVE && !BYTES_BIG_ENDIAN"
642 "#"
643 [(set_attr "length" "<insn_length>")]
644)
645
646;; Unpredicated structure moves (big-endian). Memory accesses require
647;; secondary reloads.
915d28fe 648(define_insn "*aarch64_sve_mov<mode>_be"
9f4cbab8
RS
649 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
650 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
651 "TARGET_SVE && BYTES_BIG_ENDIAN"
652 "#"
653 [(set_attr "length" "<insn_length>")]
654)
655
656;; Split unpredicated structure moves into pieces. This is the same
657;; for both big-endian and little-endian code, although it only needs
658;; to handle memory operands for little-endian code.
659(define_split
660 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
661 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
662 "TARGET_SVE && reload_completed"
663 [(const_int 0)]
664 {
665 rtx dest = operands[0];
666 rtx src = operands[1];
667 if (REG_P (dest) && REG_P (src))
668 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
669 else
670 for (unsigned int i = 0; i < <vector_count>; ++i)
671 {
672 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
673 i * BYTES_PER_SVE_VECTOR);
674 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
675 i * BYTES_PER_SVE_VECTOR);
676 emit_insn (gen_rtx_SET (subdest, subsrc));
677 }
678 DONE;
679 }
680)
681
682;; Predicated structure moves. This works for both endiannesses but in
683;; practice is only useful for big-endian.
0c63a8ee 684(define_insn_and_split "@aarch64_pred_mov<mode>"
9c6b4601 685 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
9f4cbab8 686 (unspec:SVE_STRUCT
9c6b4601
RS
687 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
688 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
06308276 689 UNSPEC_PRED_X))]
9f4cbab8
RS
690 "TARGET_SVE
691 && (register_operand (operands[0], <MODE>mode)
692 || register_operand (operands[2], <MODE>mode))"
693 "#"
694 "&& reload_completed"
695 [(const_int 0)]
696 {
697 for (unsigned int i = 0; i < <vector_count>; ++i)
698 {
699 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
700 <MODE>mode,
701 i * BYTES_PER_SVE_VECTOR);
702 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
703 <MODE>mode,
704 i * BYTES_PER_SVE_VECTOR);
705 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
706 }
707 DONE;
708 }
709 [(set_attr "length" "<insn_length>")]
710)
711
915d28fe
RS
712;; -------------------------------------------------------------------------
713;; ---- Moves of predicates
714;; -------------------------------------------------------------------------
715;; Includes:
716;; - MOV
717;; - LDR
718;; - PFALSE
719;; - PTRUE
720;; - STR
721;; -------------------------------------------------------------------------
722
43cacb12
RS
723(define_expand "mov<mode>"
724 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
725 (match_operand:PRED_ALL 1 "general_operand"))]
726 "TARGET_SVE"
727 {
728 if (GET_CODE (operands[0]) == MEM)
729 operands[1] = force_reg (<MODE>mode, operands[1]);
0b1fe8cf
RS
730
731 if (CONSTANT_P (operands[1]))
732 {
733 aarch64_expand_mov_immediate (operands[0], operands[1]);
734 DONE;
735 }
43cacb12
RS
736 }
737)
738
739(define_insn "*aarch64_sve_mov<mode>"
1044fa32 740 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
0b1fe8cf 741 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
43cacb12
RS
742 "TARGET_SVE
743 && (register_operand (operands[0], <MODE>mode)
744 || register_operand (operands[1], <MODE>mode))"
745 "@
746 mov\t%0.b, %1.b
747 str\t%1, %0
748 ldr\t%0, %1
1044fa32 749 * return aarch64_output_sve_mov_immediate (operands[1]);"
43cacb12
RS
750)
751
915d28fe
RS
752;; =========================================================================
753;; == Loads
754;; =========================================================================
755
756;; -------------------------------------------------------------------------
757;; ---- Normal contiguous loads
758;; -------------------------------------------------------------------------
759;; Includes contiguous forms of:
760;; - LD1B
761;; - LD1D
762;; - LD1H
763;; - LD1W
764;; - LD2B
765;; - LD2D
766;; - LD2H
767;; - LD2W
768;; - LD3B
769;; - LD3D
770;; - LD3H
771;; - LD3W
772;; - LD4B
773;; - LD4D
774;; - LD4H
775;; - LD4W
776;; -------------------------------------------------------------------------
777
778;; Predicated LD1.
779(define_insn "maskload<mode><vpred>"
780 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
781 (unspec:SVE_ALL
782 [(match_operand:<VPRED> 2 "register_operand" "Upl")
783 (match_operand:SVE_ALL 1 "memory_operand" "m")]
784 UNSPEC_LD1_SVE))]
43cacb12 785 "TARGET_SVE"
915d28fe 786 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
43cacb12
RS
787)
788
915d28fe
RS
789;; Unpredicated LD[234].
790(define_expand "vec_load_lanes<mode><vsingle>"
791 [(set (match_operand:SVE_STRUCT 0 "register_operand")
792 (unspec:SVE_STRUCT
793 [(match_dup 2)
794 (match_operand:SVE_STRUCT 1 "memory_operand")]
795 UNSPEC_LDN))]
43cacb12
RS
796 "TARGET_SVE"
797 {
915d28fe 798 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
799 }
800)
801
915d28fe
RS
802;; Predicated LD[234].
803(define_insn "vec_mask_load_lanes<mode><vsingle>"
804 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
805 (unspec:SVE_STRUCT
806 [(match_operand:<VPRED> 2 "register_operand" "Upl")
807 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
808 UNSPEC_LDN))]
8711e791 809 "TARGET_SVE"
915d28fe 810 "ld<vector_count><Vesize>\t%0, %2/z, %1"
8711e791
RS
811)
812
915d28fe
RS
813;; -------------------------------------------------------------------------
814;; ---- Normal gather loads
815;; -------------------------------------------------------------------------
816;; Includes gather forms of:
817;; - LD1D
818;; - LD1W
819;; -------------------------------------------------------------------------
820
821;; Unpredicated gather loads.
822(define_expand "gather_load<mode>"
823 [(set (match_operand:SVE_SD 0 "register_operand")
824 (unspec:SVE_SD
825 [(match_dup 5)
826 (match_operand:DI 1 "aarch64_reg_or_zero")
827 (match_operand:<V_INT_EQUIV> 2 "register_operand")
828 (match_operand:DI 3 "const_int_operand")
829 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
830 (mem:BLK (scratch))]
831 UNSPEC_LD1_GATHER))]
832 "TARGET_SVE"
43cacb12 833 {
915d28fe 834 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12 835 }
43cacb12
RS
836)
837
915d28fe
RS
838;; Predicated gather loads for 32-bit elements. Operand 3 is true for
839;; unsigned extension and false for signed extension.
840(define_insn "mask_gather_load<mode>"
841 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
842 (unspec:SVE_S
843 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
844 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
845 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
846 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
847 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
848 (mem:BLK (scratch))]
849 UNSPEC_LD1_GATHER))]
850 "TARGET_SVE"
851 "@
852 ld1w\t%0.s, %5/z, [%2.s]
853 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
854 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
855 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
856 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
857)
858
859;; Predicated gather loads for 64-bit elements. The value of operand 3
860;; doesn't matter in this case.
861(define_insn "mask_gather_load<mode>"
862 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
863 (unspec:SVE_D
864 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
865 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
866 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
867 (match_operand:DI 3 "const_int_operand")
868 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
869 (mem:BLK (scratch))]
870 UNSPEC_LD1_GATHER))]
871 "TARGET_SVE"
872 "@
873 ld1d\t%0.d, %5/z, [%2.d]
874 ld1d\t%0.d, %5/z, [%1, %2.d]
875 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
876)
877
878;; =========================================================================
879;; == Stores
880;; =========================================================================
881
882;; -------------------------------------------------------------------------
883;; ---- Normal contiguous stores
884;; -------------------------------------------------------------------------
885;; Includes contiguous forms of:
886;; - ST1B
887;; - ST1D
888;; - ST1H
889;; - ST1W
890;; - ST2B
891;; - ST2D
892;; - ST2H
893;; - ST2W
894;; - ST3B
895;; - ST3D
896;; - ST3H
897;; - ST3W
898;; - ST4B
899;; - ST4D
900;; - ST4H
901;; - ST4W
902;; -------------------------------------------------------------------------
903
904;; Predicated ST1.
905(define_insn "maskstore<mode><vpred>"
906 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
907 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
908 (match_operand:SVE_ALL 1 "register_operand" "w")
909 (match_dup 0)]
910 UNSPEC_ST1_SVE))]
911 "TARGET_SVE"
912 "st1<Vesize>\t%1.<Vetype>, %2, %0"
913)
914
915;; Unpredicated ST[234]. This is always a full update, so the dependence
916;; on the old value of the memory location (via (match_dup 0)) is redundant.
917;; There doesn't seem to be any obvious benefit to treating the all-true
918;; case differently though. In particular, it's very unlikely that we'll
919;; only find out during RTL that a store_lanes is dead.
920(define_expand "vec_store_lanes<mode><vsingle>"
921 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
922 (unspec:SVE_STRUCT
923 [(match_dup 2)
924 (match_operand:SVE_STRUCT 1 "register_operand")
925 (match_dup 0)]
926 UNSPEC_STN))]
927 "TARGET_SVE"
43cacb12 928 {
915d28fe 929 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
930 }
931)
932
915d28fe
RS
933;; Predicated ST[234].
934(define_insn "vec_mask_store_lanes<mode><vsingle>"
935 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
936 (unspec:SVE_STRUCT
937 [(match_operand:<VPRED> 2 "register_operand" "Upl")
938 (match_operand:SVE_STRUCT 1 "register_operand" "w")
939 (match_dup 0)]
940 UNSPEC_STN))]
941 "TARGET_SVE"
942 "st<vector_count><Vesize>\t%1, %2, %0"
943)
944
945;; -------------------------------------------------------------------------
946;; ---- Normal scatter stores
947;; -------------------------------------------------------------------------
948;; Includes scatter forms of:
949;; - ST1D
950;; - ST1W
951;; -------------------------------------------------------------------------
952
953;; Unpredicated scatter stores.
954(define_expand "scatter_store<mode>"
955 [(set (mem:BLK (scratch))
956 (unspec:BLK
957 [(match_dup 5)
958 (match_operand:DI 0 "aarch64_reg_or_zero")
959 (match_operand:<V_INT_EQUIV> 1 "register_operand")
960 (match_operand:DI 2 "const_int_operand")
961 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
962 (match_operand:SVE_SD 4 "register_operand")]
963 UNSPEC_ST1_SCATTER))]
964 "TARGET_SVE"
43cacb12 965 {
915d28fe 966 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
967 }
968)
969
915d28fe
RS
970;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
971;; unsigned extension and false for signed extension.
972(define_insn "mask_scatter_store<mode>"
973 [(set (mem:BLK (scratch))
974 (unspec:BLK
975 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
976 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
977 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
978 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
979 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
980 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
981 UNSPEC_ST1_SCATTER))]
43cacb12
RS
982 "TARGET_SVE"
983 "@
915d28fe
RS
984 st1w\t%4.s, %5, [%1.s]
985 st1w\t%4.s, %5, [%0, %1.s, sxtw]
986 st1w\t%4.s, %5, [%0, %1.s, uxtw]
987 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
988 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
989)
990
991;; Predicated scatter stores for 64-bit elements. The value of operand 2
992;; doesn't matter in this case.
993(define_insn "mask_scatter_store<mode>"
994 [(set (mem:BLK (scratch))
995 (unspec:BLK
996 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
997 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
998 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
999 (match_operand:DI 2 "const_int_operand")
1000 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
1001 (match_operand:SVE_D 4 "register_operand" "w, w, w")]
1002 UNSPEC_ST1_SCATTER))]
1003 "TARGET_SVE"
1004 "@
1005 st1d\t%4.d, %5, [%1.d]
1006 st1d\t%4.d, %5, [%0, %1.d]
1007 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
43cacb12
RS
1008)
1009
915d28fe
RS
1010;; =========================================================================
1011;; == Vector creation
1012;; =========================================================================
1013
1014;; -------------------------------------------------------------------------
1015;; ---- [INT,FP] Duplicate element
1016;; -------------------------------------------------------------------------
1017;; Includes:
1018;; - MOV
1019;; - LD1RB
1020;; - LD1RD
1021;; - LD1RH
1022;; - LD1RW
1023;; - LD1RQB
1024;; - LD1RQD
1025;; - LD1RQH
1026;; - LD1RQW
1027;; -------------------------------------------------------------------------
1028
43cacb12
RS
1029(define_expand "vec_duplicate<mode>"
1030 [(parallel
1031 [(set (match_operand:SVE_ALL 0 "register_operand")
1032 (vec_duplicate:SVE_ALL
1033 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
678faefc 1034 (clobber (scratch:VNx16BI))])]
43cacb12
RS
1035 "TARGET_SVE"
1036 {
1037 if (MEM_P (operands[1]))
1038 {
16de3637 1039 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
1040 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
1041 CONST0_RTX (<MODE>mode)));
1042 DONE;
1043 }
1044 }
1045)
1046
1047;; Accept memory operands for the benefit of combine, and also in case
1048;; the scalar input gets spilled to memory during RA. We want to split
1049;; the load at the first opportunity in order to allow the PTRUE to be
1050;; optimized with surrounding code.
1051(define_insn_and_split "*vec_duplicate<mode>_reg"
1052 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
1053 (vec_duplicate:SVE_ALL
1054 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
678faefc 1055 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
43cacb12
RS
1056 "TARGET_SVE"
1057 "@
1058 mov\t%0.<Vetype>, %<vwcore>1
1059 mov\t%0.<Vetype>, %<Vetype>1
1060 #"
1061 "&& MEM_P (operands[1])"
1062 [(const_int 0)]
1063 {
1064 if (GET_CODE (operands[2]) == SCRATCH)
678faefc
RS
1065 operands[2] = gen_reg_rtx (VNx16BImode);
1066 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
1067 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
1068 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
43cacb12
RS
1069 CONST0_RTX (<MODE>mode)));
1070 DONE;
1071 }
1072 [(set_attr "length" "4,4,8")]
1073)
1074
4aeb1ba7
RS
1075;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
1076(define_insn "@aarch64_vec_duplicate_vq<mode>_le"
1077 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1078 (vec_duplicate:SVE_ALL
1079 (match_operand:<V128> 1 "register_operand" "w")))]
1080 "TARGET_SVE && !BYTES_BIG_ENDIAN"
1081 {
1082 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
1083 return "dup\t%0.q, %1.q[0]";
1084 }
1085)
1086
1087;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
1088;; The SVE register layout puts memory lane N into (architectural)
1089;; register lane N, whereas the Advanced SIMD layout puts the memory
1090;; lsb into the register lsb. We therefore have to describe this in rtl
1091;; terms as a reverse of the V128 vector followed by a duplicate.
1092(define_insn "@aarch64_vec_duplicate_vq<mode>_be"
1093 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1094 (vec_duplicate:SVE_ALL
1095 (vec_select:<V128>
1096 (match_operand:<V128> 1 "register_operand" "w")
1097 (match_operand 2 "descending_int_parallel"))))]
1098 "TARGET_SVE
1099 && BYTES_BIG_ENDIAN
1100 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
1101 GET_MODE_NUNITS (<V128>mode) - 1)"
1102 {
1103 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
1104 return "dup\t%0.q, %1.q[0]";
1105 }
1106)
1107
43cacb12
RS
1108;; This is used for vec_duplicate<mode>s from memory, but can also
1109;; be used by combine to optimize selects of a a vec_duplicate<mode>
1110;; with zero.
1111(define_insn "sve_ld1r<mode>"
1112 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1113 (unspec:SVE_ALL
1114 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1115 (vec_duplicate:SVE_ALL
1116 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
1117 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
1118 UNSPEC_SEL))]
1119 "TARGET_SVE"
1120 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
1121)
1122
4aeb1ba7
RS
1123;; Load 128 bits from memory under predicate control and duplicate to
1124;; fill a vector.
1125(define_insn "@aarch64_sve_ld1rq<mode>"
947b1372
RS
1126 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1127 (unspec:SVE_ALL
4aeb1ba7
RS
1128 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1129 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
43cacb12
RS
1130 UNSPEC_LD1RQ))]
1131 "TARGET_SVE"
4aeb1ba7
RS
1132 {
1133 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
1134 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
1135 }
43cacb12
RS
1136)
1137
915d28fe
RS
1138;; -------------------------------------------------------------------------
1139;; ---- [INT,FP] Initialize from individual elements
1140;; -------------------------------------------------------------------------
1141;; Includes:
1142;; - INSR
1143;; -------------------------------------------------------------------------
1144
1145(define_expand "vec_init<mode><Vel>"
1146 [(match_operand:SVE_ALL 0 "register_operand")
1147 (match_operand 1 "")]
43cacb12
RS
1148 "TARGET_SVE"
1149 {
915d28fe 1150 aarch64_sve_expand_vector_init (operands[0], operands[1]);
43cacb12
RS
1151 DONE;
1152 }
1153)
1154
915d28fe
RS
1155;; Shift an SVE vector left and insert a scalar into element 0.
1156(define_insn "vec_shl_insert_<mode>"
61ee25b9 1157 [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??&w, ?&w")
915d28fe 1158 (unspec:SVE_ALL
61ee25b9
RS
1159 [(match_operand:SVE_ALL 1 "register_operand" "0, 0, w, w")
1160 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
915d28fe
RS
1161 UNSPEC_INSR))]
1162 "TARGET_SVE"
1163 "@
1164 insr\t%0.<Vetype>, %<vwcore>2
61ee25b9
RS
1165 insr\t%0.<Vetype>, %<Vetype>2
1166 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
1167 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
1168 [(set_attr "movprfx" "*,*,yes,yes")]
915d28fe
RS
1169)
1170
1171;; -------------------------------------------------------------------------
1172;; ---- [INT] Linear series
1173;; -------------------------------------------------------------------------
1174;; Includes:
1175;; - INDEX
1176;; -------------------------------------------------------------------------
1177
1178(define_insn "vec_series<mode>"
1179 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
1180 (vec_series:SVE_I
1181 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
1182 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
1183 "TARGET_SVE"
1184 "@
1185 index\t%0.<Vetype>, #%1, %<vw>2
43cacb12
RS
1186 index\t%0.<Vetype>, %<vw>1, #%2
1187 index\t%0.<Vetype>, %<vw>1, %<vw>2"
1188)
1189
1190;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
1191;; of an INDEX instruction.
1192(define_insn "*vec_series<mode>_plus"
1193 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1194 (plus:SVE_I
1195 (vec_duplicate:SVE_I
1196 (match_operand:<VEL> 1 "register_operand" "r"))
1197 (match_operand:SVE_I 2 "immediate_operand")))]
1198 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
1199 {
1200 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
1201 return "index\t%0.<Vetype>, %<vw>1, #%2";
1202 }
1203)
1204
915d28fe
RS
1205;; -------------------------------------------------------------------------
1206;; ---- [PRED] Duplicate element
1207;; -------------------------------------------------------------------------
1208;; The patterns in this section are synthetic.
1209;; -------------------------------------------------------------------------
1210
1211;; Implement a predicate broadcast by shifting the low bit of the scalar
1212;; input into the top bit and using a WHILELO. An alternative would be to
1213;; duplicate the input and do a compare with zero.
1214(define_expand "vec_duplicate<mode>"
1215 [(set (match_operand:PRED_ALL 0 "register_operand")
1216 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
9f4cbab8
RS
1217 "TARGET_SVE"
1218 {
915d28fe
RS
1219 rtx tmp = gen_reg_rtx (DImode);
1220 rtx op1 = gen_lowpart (DImode, operands[1]);
1221 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
1222 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
1223 DONE;
9f4cbab8
RS
1224 }
1225)
1226
915d28fe
RS
1227;; =========================================================================
1228;; == Vector decomposition
1229;; =========================================================================
9f4cbab8 1230
915d28fe
RS
1231;; -------------------------------------------------------------------------
1232;; ---- [INT,FP] Extract index
1233;; -------------------------------------------------------------------------
1234;; Includes:
1235;; - DUP (Advanced SIMD)
1236;; - DUP (SVE)
1237;; - EXT (SVE)
1238;; - ST1 (Advanced SIMD)
1239;; - UMOV (Advanced SIMD)
1240;; -------------------------------------------------------------------------
1241
1242(define_expand "vec_extract<mode><Vel>"
1243 [(set (match_operand:<VEL> 0 "register_operand")
1244 (vec_select:<VEL>
1245 (match_operand:SVE_ALL 1 "register_operand")
1246 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
9f4cbab8
RS
1247 "TARGET_SVE"
1248 {
915d28fe
RS
1249 poly_int64 val;
1250 if (poly_int_rtx_p (operands[2], &val)
1251 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
1252 {
1253 /* The last element can be extracted with a LASTB and a false
1254 predicate. */
1255 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
1256 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
1257 DONE;
1258 }
1259 if (!CONST_INT_P (operands[2]))
1260 {
1261 /* Create an index with operand[2] as the base and -1 as the step.
1262 It will then be zero for the element we care about. */
1263 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
1264 index = force_reg (<VEL_INT>mode, index);
1265 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
1266 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
1267
1268 /* Get a predicate that is true for only that element. */
1269 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
1270 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
1271 rtx sel = gen_reg_rtx (<VPRED>mode);
1272 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
1273
1274 /* Select the element using LASTB. */
1275 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
1276 DONE;
1277 }
9f4cbab8
RS
1278 }
1279)
1280
915d28fe
RS
1281;; Extract element zero. This is a special case because we want to force
1282;; the registers to be the same for the second alternative, and then
1283;; split the instruction into nothing after RA.
1284(define_insn_and_split "*vec_extract<mode><Vel>_0"
1285 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
1286 (vec_select:<VEL>
1287 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
1288 (parallel [(const_int 0)])))]
9f4cbab8 1289 "TARGET_SVE"
915d28fe
RS
1290 {
1291 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
1292 switch (which_alternative)
1293 {
1294 case 0:
1295 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
1296 case 1:
1297 return "#";
1298 case 2:
1299 return "st1\\t{%1.<Vetype>}[0], %0";
1300 default:
1301 gcc_unreachable ();
1302 }
1303 }
1304 "&& reload_completed
1305 && REG_P (operands[0])
1306 && REGNO (operands[0]) == REGNO (operands[1])"
1307 [(const_int 0)]
1308 {
1309 emit_note (NOTE_INSN_DELETED);
1310 DONE;
1311 }
1312 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
9f4cbab8
RS
1313)
1314
915d28fe
RS
1315;; Extract an element from the Advanced SIMD portion of the register.
1316;; We don't just reuse the aarch64-simd.md pattern because we don't
1317;; want any change in lane number on big-endian targets.
1318(define_insn "*vec_extract<mode><Vel>_v128"
1319 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
1320 (vec_select:<VEL>
1321 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
1322 (parallel [(match_operand:SI 2 "const_int_operand")])))]
1323 "TARGET_SVE
1324 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
43cacb12 1325 {
915d28fe
RS
1326 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
1327 switch (which_alternative)
1328 {
1329 case 0:
1330 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
1331 case 1:
1332 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
1333 case 2:
1334 return "st1\\t{%1.<Vetype>}[%2], %0";
1335 default:
1336 gcc_unreachable ();
1337 }
43cacb12 1338 }
915d28fe 1339 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
43cacb12
RS
1340)
1341
915d28fe
RS
1342;; Extract an element in the range of DUP. This pattern allows the
1343;; source and destination to be different.
1344(define_insn "*vec_extract<mode><Vel>_dup"
1345 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1346 (vec_select:<VEL>
1347 (match_operand:SVE_ALL 1 "register_operand" "w")
1348 (parallel [(match_operand:SI 2 "const_int_operand")])))]
1349 "TARGET_SVE
1350 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
1351 {
1352 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
1353 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
1354 }
43cacb12
RS
1355)
1356
915d28fe
RS
1357;; Extract an element outside the range of DUP. This pattern requires the
1358;; source and destination to be the same.
1359(define_insn "*vec_extract<mode><Vel>_ext"
06b3ba23 1360 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
915d28fe 1361 (vec_select:<VEL>
06b3ba23 1362 (match_operand:SVE_ALL 1 "register_operand" "0, w")
915d28fe
RS
1363 (parallel [(match_operand:SI 2 "const_int_operand")])))]
1364 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
1365 {
1366 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
1367 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
06b3ba23
RS
1368 return (which_alternative == 0
1369 ? "ext\t%0.b, %0.b, %0.b, #%2"
1370 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
915d28fe 1371 }
06b3ba23 1372 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
1373)
1374
915d28fe
RS
1375;; -------------------------------------------------------------------------
1376;; ---- [INT,FP] Extract active element
1377;; -------------------------------------------------------------------------
1378;; Includes:
1379;; - LASTB
1380;; -------------------------------------------------------------------------
1381
1382;; Extract the last active element of operand 1 into operand 0.
1383;; If no elements are active, extract the last inactive element instead.
1384(define_insn "extract_last_<mode>"
1385 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
1386 (unspec:<VEL>
1387 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1388 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
1389 UNSPEC_LASTB))]
43cacb12 1390 "TARGET_SVE"
915d28fe
RS
1391 "@
1392 lastb\t%<vwcore>0, %1, %2.<Vetype>
1393 lastb\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
1394)
1395
915d28fe
RS
1396;; -------------------------------------------------------------------------
1397;; ---- [PRED] Extract index
1398;; -------------------------------------------------------------------------
1399;; The patterns in this section are synthetic.
1400;; -------------------------------------------------------------------------
1401
1402;; Handle extractions from a predicate by converting to an integer vector
1403;; and extracting from there.
1404(define_expand "vec_extract<vpred><Vel>"
1405 [(match_operand:<VEL> 0 "register_operand")
1406 (match_operand:<VPRED> 1 "register_operand")
1407 (match_operand:SI 2 "nonmemory_operand")
1408 ;; Dummy operand to which we can attach the iterator.
1409 (reg:SVE_I V0_REGNUM)]
43cacb12 1410 "TARGET_SVE"
915d28fe
RS
1411 {
1412 rtx tmp = gen_reg_rtx (<MODE>mode);
d29f7dd5
RS
1413 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
1414 CONST1_RTX (<MODE>mode),
1415 CONST0_RTX (<MODE>mode)));
915d28fe
RS
1416 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
1417 DONE;
1418 }
43cacb12
RS
1419)
1420
915d28fe
RS
1421;; =========================================================================
1422;; == Unary arithmetic
1423;; =========================================================================
1424
1425;; -------------------------------------------------------------------------
1426;; ---- [INT] General unary arithmetic corresponding to rtx codes
1427;; -------------------------------------------------------------------------
1428;; Includes:
1429;; - ABS
bca5a997
RS
1430;; - CLS (= clrsb)
1431;; - CLZ
915d28fe
RS
1432;; - CNT (= popcount)
1433;; - NEG
1434;; - NOT
1435;; -------------------------------------------------------------------------
1436
1437;; Unpredicated integer unary arithmetic.
1438(define_expand "<optab><mode>2"
1439 [(set (match_operand:SVE_I 0 "register_operand")
1440 (unspec:SVE_I
1441 [(match_dup 2)
1442 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
06308276 1443 UNSPEC_PRED_X))]
43cacb12 1444 "TARGET_SVE"
915d28fe
RS
1445 {
1446 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1447 }
43cacb12
RS
1448)
1449
915d28fe
RS
1450;; Integer unary arithmetic predicated with a PTRUE.
1451(define_insn "*<optab><mode>2"
1452 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1453 (unspec:SVE_I
1454 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1455 (SVE_INT_UNARY:SVE_I
1456 (match_operand:SVE_I 2 "register_operand" "w"))]
06308276 1457 UNSPEC_PRED_X))]
43cacb12 1458 "TARGET_SVE"
915d28fe 1459 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
43cacb12
RS
1460)
1461
3c9f4963
RS
1462;; Predicated integer unary arithmetic, merging with the first input.
1463(define_insn "*cond_<optab><mode>_2"
1464 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1465 (unspec:SVE_I
1466 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1467 (SVE_INT_UNARY:SVE_I
1468 (match_operand:SVE_I 2 "register_operand" "0, w"))
1469 (match_dup 2)]
1470 UNSPEC_SEL))]
1471 "TARGET_SVE"
1472 "@
1473 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
1474 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1475 [(set_attr "movprfx" "*,yes")]
1476)
1477
1478;; Predicated integer unary arithmetic, merging with an independent value.
1479;;
1480;; The earlyclobber isn't needed for the first alternative, but omitting
1481;; it would only help the case in which operands 2 and 3 are the same,
1482;; which is handled above rather than here. Marking all the alternatives
1483;; as earlyclobber helps to make the instruction more regular to the
1484;; register allocator.
1485(define_insn "*cond_<optab><mode>_any"
1486 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
1487 (unspec:SVE_I
1488 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1489 (SVE_INT_UNARY:SVE_I
1490 (match_operand:SVE_I 2 "register_operand" "w, w, w"))
1491 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
1492 UNSPEC_SEL))]
1493 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
1494 "@
1495 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
1496 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
1497 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1498 [(set_attr "movprfx" "*,yes,yes")]
1499)
1500
d7a09c44
RS
1501;; -------------------------------------------------------------------------
1502;; ---- [INT] General unary arithmetic corresponding to unspecs
1503;; -------------------------------------------------------------------------
1504;; Includes
1505;; - REVB
1506;; - REVH
1507;; - REVW
1508;; -------------------------------------------------------------------------
1509
1510;; Predicated integer unary operations.
1511(define_insn "@aarch64_pred_<optab><mode>"
1512 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1513 (unspec:SVE_I
1514 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1515 (unspec:SVE_I
1516 [(match_operand:SVE_I 2 "register_operand" "w")]
1517 SVE_INT_UNARY)]
1518 UNSPEC_PRED_X))]
1519 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
1520 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1521)
1522
d113ece6
RS
1523;; -------------------------------------------------------------------------
1524;; ---- [INT] Zero extension
1525;; -------------------------------------------------------------------------
1526;; Includes:
1527;; - UXTB
1528;; - UXTH
1529;; - UXTW
1530;; -------------------------------------------------------------------------
1531
1532;; Match UXT[BHW] as a conditional AND of a constant, merging with the
1533;; first input.
1534(define_insn "*cond_uxt<mode>_2"
1535 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1536 (unspec:SVE_I
1537 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1538 (and:SVE_I
1539 (match_operand:SVE_I 2 "register_operand" "0, w")
1540 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
1541 (match_dup 2)]
1542 UNSPEC_SEL))]
1543 "TARGET_SVE"
1544 "@
1545 uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
1546 movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
1547 [(set_attr "movprfx" "*,yes")]
1548)
1549
1550;; Match UXT[BHW] as a conditional AND of a constant, merging with an
1551;; independent value.
1552;;
1553;; The earlyclobber isn't needed for the first alternative, but omitting
1554;; it would only help the case in which operands 2 and 4 are the same,
1555;; which is handled above rather than here. Marking all the alternatives
1556;; as early-clobber helps to make the instruction more regular to the
1557;; register allocator.
1558(define_insn "*cond_uxt<mode>_any"
1559 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
1560 (unspec:SVE_I
1561 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1562 (and:SVE_I
1563 (match_operand:SVE_I 2 "register_operand" "w, w, w")
1564 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
1565 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
1566 UNSPEC_SEL))]
1567 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
1568 "@
1569 uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
1570 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
1571 movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
1572 [(set_attr "movprfx" "*,yes,yes")]
1573)
1574
e0a0be93
RS
1575;; -------------------------------------------------------------------------
1576;; ---- [INT] Logical inverse
1577;; -------------------------------------------------------------------------
1578
1579;; Predicated logical inverse.
1580(define_insn "*cnot<mode>"
1581 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1582 (unspec:SVE_I
1583 [(unspec:<VPRED>
1584 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1585 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
1586 (eq:<VPRED>
1587 (match_operand:SVE_I 2 "register_operand" "w")
1588 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
1589 UNSPEC_PRED_Z)
1590 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
1591 (match_dup 3)]
1592 UNSPEC_SEL))]
1593 "TARGET_SVE"
1594 "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
1595)
1596
1597;; Predicated logical inverse, merging with the first input.
1598(define_insn_and_rewrite "*cond_cnot<mode>_2"
1599 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1600 (unspec:SVE_I
1601 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1602 ;; Logical inverse of operand 2 (as above).
1603 (unspec:SVE_I
1604 [(unspec:<VPRED>
1605 [(match_operand 5)
1606 (const_int SVE_KNOWN_PTRUE)
1607 (eq:<VPRED>
1608 (match_operand:SVE_I 2 "register_operand" "0, w")
1609 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
1610 UNSPEC_PRED_Z)
1611 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
1612 (match_dup 3)]
1613 UNSPEC_SEL)
1614 (match_dup 2)]
1615 UNSPEC_SEL))]
1616 "TARGET_SVE"
1617 "@
1618 cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
1619 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
1620 "&& !CONSTANT_P (operands[5])"
1621 {
1622 operands[5] = CONSTM1_RTX (<VPRED>mode);
1623 }
1624 [(set_attr "movprfx" "*,yes")]
1625)
1626
1627;; Predicated logical inverse, merging with an independent value.
1628;;
1629;; The earlyclobber isn't needed for the first alternative, but omitting
1630;; it would only help the case in which operands 2 and 6 are the same,
1631;; which is handled above rather than here. Marking all the alternatives
1632;; as earlyclobber helps to make the instruction more regular to the
1633;; register allocator.
1634(define_insn_and_rewrite "*cond_cnot<mode>_any"
1635 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
1636 (unspec:SVE_I
1637 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1638 ;; Logical inverse of operand 2 (as above).
1639 (unspec:SVE_I
1640 [(unspec:<VPRED>
1641 [(match_operand 5)
1642 (const_int SVE_KNOWN_PTRUE)
1643 (eq:<VPRED>
1644 (match_operand:SVE_I 2 "register_operand" "w, w, w")
1645 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
1646 UNSPEC_PRED_Z)
1647 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
1648 (match_dup 3)]
1649 UNSPEC_SEL)
1650 (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
1651 UNSPEC_SEL))]
1652 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
1653 "@
1654 cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
1655 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
1656 movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
1657 "&& !CONSTANT_P (operands[5])"
1658 {
1659 operands[5] = CONSTM1_RTX (<VPRED>mode);
1660 }
1661 [(set_attr "movprfx" "*,yes,yes")]
1662)
1663
915d28fe 1664;; -------------------------------------------------------------------------
d45b20a5 1665;; ---- [FP] General unary arithmetic corresponding to unspecs
915d28fe
RS
1666;; -------------------------------------------------------------------------
1667;; Includes:
1668;; - FABS
1669;; - FNEG
915d28fe
RS
1670;; - FRINTA
1671;; - FRINTI
1672;; - FRINTM
1673;; - FRINTN
1674;; - FRINTP
1675;; - FRINTX
1676;; - FRINTZ
d45b20a5 1677;; - FSQRT
915d28fe
RS
1678;; -------------------------------------------------------------------------
1679
d45b20a5
RS
1680;; Unpredicated floating-point unary operations.
1681(define_expand "<optab><mode>2"
915d28fe
RS
1682 [(set (match_operand:SVE_F 0 "register_operand")
1683 (unspec:SVE_F
1684 [(match_dup 2)
c9c5a809 1685 (const_int SVE_RELAXED_GP)
d45b20a5
RS
1686 (match_operand:SVE_F 1 "register_operand")]
1687 SVE_COND_FP_UNARY))]
915d28fe
RS
1688 "TARGET_SVE"
1689 {
1690 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1691 }
1692)
1693
d45b20a5
RS
1694;; Predicated floating-point unary operations.
1695(define_insn "*<optab><mode>2"
915d28fe
RS
1696 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1697 (unspec:SVE_F
1698 [(match_operand:<VPRED> 1 "register_operand" "Upl")
c9c5a809 1699 (match_operand:SI 3 "aarch64_sve_gp_strictness")
d45b20a5
RS
1700 (match_operand:SVE_F 2 "register_operand" "w")]
1701 SVE_COND_FP_UNARY))]
915d28fe 1702 "TARGET_SVE"
d45b20a5 1703 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
915d28fe
RS
1704)
1705
b21f7d53
RS
1706;; Predicated floating-point unary arithmetic, merging with the first input.
1707(define_insn_and_rewrite "*cond_<optab><mode>_2"
1708 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1709 (unspec:SVE_F
1710 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1711 (unspec:SVE_F
1712 [(match_operand 3)
1713 (match_operand:SI 4 "aarch64_sve_gp_strictness")
1714 (match_operand:SVE_F 2 "register_operand" "0, w")]
1715 SVE_COND_FP_UNARY)
1716 (match_dup 2)]
1717 UNSPEC_SEL))]
1718 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
1719 "@
1720 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
1721 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1722 "&& !rtx_equal_p (operands[1], operands[3])"
1723 {
1724 operands[3] = copy_rtx (operands[1]);
1725 }
1726 [(set_attr "movprfx" "*,yes")]
1727)
1728
1729;; Predicated floating-point unary arithmetic, merging with an independent
1730;; value.
1731;;
1732;; The earlyclobber isn't needed for the first alternative, but omitting
1733;; it would only help the case in which operands 2 and 3 are the same,
1734;; which is handled above rather than here. Marking all the alternatives
1735;; as earlyclobber helps to make the instruction more regular to the
1736;; register allocator.
1737(define_insn_and_rewrite "*cond_<optab><mode>_any"
1738 [(set (match_operand:SVE_F 0 "register_operand" "=&w, ?&w, ?&w")
1739 (unspec:SVE_F
1740 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1741 (unspec:SVE_F
1742 [(match_operand 4)
1743 (match_operand:SI 5 "aarch64_sve_gp_strictness")
1744 (match_operand:SVE_F 2 "register_operand" "w, w, w")]
1745 SVE_COND_FP_UNARY)
1746 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
1747 UNSPEC_SEL))]
1748 "TARGET_SVE
1749 && !rtx_equal_p (operands[2], operands[3])
1750 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
1751 "@
1752 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
1753 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
1754 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1755 "&& !rtx_equal_p (operands[1], operands[4])"
1756 {
1757 operands[4] = copy_rtx (operands[1]);
1758 }
1759 [(set_attr "movprfx" "*,yes,yes")]
1760)
1761
915d28fe
RS
1762;; -------------------------------------------------------------------------
1763;; ---- [PRED] Inverse
1764;; -------------------------------------------------------------------------
1765;; Includes:
1766;; - NOT
1767;; -------------------------------------------------------------------------
1768
1769;; Unpredicated predicate inverse.
1770(define_expand "one_cmpl<mode>2"
1771 [(set (match_operand:PRED_ALL 0 "register_operand")
1772 (and:PRED_ALL
1773 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1774 (match_dup 2)))]
1775 "TARGET_SVE"
1776 {
1777 operands[2] = aarch64_ptrue_reg (<MODE>mode);
1778 }
1779)
1780
1781;; Predicated predicate inverse.
1782(define_insn "*one_cmpl<mode>3"
1783 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1784 (and:PRED_ALL
1785 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1786 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1787 "TARGET_SVE"
1788 "not\t%0.b, %1/z, %2.b"
1789)
1790
1791;; =========================================================================
1792;; == Binary arithmetic
1793;; =========================================================================
1794
1795;; -------------------------------------------------------------------------
1796;; ---- [INT] General binary arithmetic corresponding to rtx codes
1797;; -------------------------------------------------------------------------
f8c22a8b
RS
1798;; Includes:
1799;; - ADD (merging form only)
1800;; - AND (merging form only)
20103c0e 1801;; - ASR (merging form only)
f8c22a8b 1802;; - EOR (merging form only)
20103c0e
RS
1803;; - LSL (merging form only)
1804;; - LSR (merging form only)
915d28fe 1805;; - MUL
f8c22a8b 1806;; - ORR (merging form only)
915d28fe
RS
1807;; - SMAX
1808;; - SMIN
f8c22a8b 1809;; - SUB (merging form only)
915d28fe
RS
1810;; - UMAX
1811;; - UMIN
1812;; -------------------------------------------------------------------------
1813
f8c22a8b
RS
1814;; Unpredicated integer binary operations that have an immediate form.
1815(define_expand "<optab><mode>3"
1816 [(set (match_operand:SVE_I 0 "register_operand")
1817 (unspec:SVE_I
1818 [(match_dup 3)
1819 (SVE_INT_BINARY_IMM:SVE_I
1820 (match_operand:SVE_I 1 "register_operand")
1821 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
1822 UNSPEC_PRED_X))]
1823 "TARGET_SVE"
1824 {
1825 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1826 }
1827)
1828
1829;; Integer binary operations that have an immediate form, predicated
1830;; with a PTRUE. We don't actually need the predicate for the first
1831;; and third alternatives, but using Upa or X isn't likely to gain much
1832;; and would make the instruction seem less uniform to the register
1833;; allocator.
1834(define_insn_and_split "*<optab><mode>3"
1835 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w")
1836 (unspec:SVE_I
1837 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
1838 (SVE_INT_BINARY_IMM:SVE_I
1839 (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w")
1840 (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
1841 UNSPEC_PRED_X))]
1842 "TARGET_SVE"
1843 "@
1844 #
1845 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1846 #
1847 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1848 ; Split the unpredicated form after reload, so that we don't have
1849 ; the unnecessary PTRUE.
1850 "&& reload_completed
1851 && !register_operand (operands[3], <MODE>mode)"
1852 [(set (match_dup 0) (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
1853 ""
1854 [(set_attr "movprfx" "*,*,yes,yes")]
1855)
1856
1857;; Unpredicated binary operations with a constant (post-RA only).
1858;; These are generated by splitting a predicated instruction whose
1859;; predicate is unused.
1860(define_insn "*post_ra_<optab><mode>3"
1861 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1862 (SVE_INT_BINARY_IMM:SVE_I
1863 (match_operand:SVE_I 1 "register_operand" "0, w")
1864 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
1865 "TARGET_SVE && reload_completed"
1866 "@
1867 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
1868 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
1869 [(set_attr "movprfx" "*,yes")]
1870)
1871
915d28fe 1872;; Predicated integer operations with merging.
b6c3aea1 1873(define_expand "@cond_<optab><mode>"
915d28fe
RS
1874 [(set (match_operand:SVE_I 0 "register_operand")
1875 (unspec:SVE_I
1876 [(match_operand:<VPRED> 1 "register_operand")
1877 (SVE_INT_BINARY:SVE_I
1878 (match_operand:SVE_I 2 "register_operand")
d113ece6 1879 (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
915d28fe
RS
1880 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
1881 UNSPEC_SEL))]
1882 "TARGET_SVE"
1883)
1884
1885;; Predicated integer operations, merging with the first input.
1886(define_insn "*cond_<optab><mode>_2"
1887 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1888 (unspec:SVE_I
1889 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1890 (SVE_INT_BINARY:SVE_I
1891 (match_operand:SVE_I 2 "register_operand" "0, w")
1892 (match_operand:SVE_I 3 "register_operand" "w, w"))
1893 (match_dup 2)]
1894 UNSPEC_SEL))]
1895 "TARGET_SVE"
1896 "@
1897 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1898 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1899 [(set_attr "movprfx" "*,yes")]
1900)
1901
1902;; Predicated integer operations, merging with the second input.
1903(define_insn "*cond_<optab><mode>_3"
1904 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1905 (unspec:SVE_I
1906 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1907 (SVE_INT_BINARY:SVE_I
1908 (match_operand:SVE_I 2 "register_operand" "w, w")
1909 (match_operand:SVE_I 3 "register_operand" "0, w"))
1910 (match_dup 3)]
1911 UNSPEC_SEL))]
1912 "TARGET_SVE"
1913 "@
1914 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1915 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1916 [(set_attr "movprfx" "*,yes")]
1917)
1918
1919;; Predicated integer operations, merging with an independent value.
1920(define_insn_and_rewrite "*cond_<optab><mode>_any"
1921 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1922 (unspec:SVE_I
1923 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1924 (SVE_INT_BINARY:SVE_I
1925 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
1926 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
1927 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1928 UNSPEC_SEL))]
43cacb12 1929 "TARGET_SVE
915d28fe
RS
1930 && !rtx_equal_p (operands[2], operands[4])
1931 && !rtx_equal_p (operands[3], operands[4])"
1932 "@
1933 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1934 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1935 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1936 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1937 #"
1938 "&& reload_completed
1939 && register_operand (operands[4], <MODE>mode)
1940 && !rtx_equal_p (operands[0], operands[4])"
43cacb12 1941 {
915d28fe
RS
1942 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1943 operands[4], operands[1]));
1944 operands[4] = operands[2] = operands[0];
43cacb12 1945 }
915d28fe 1946 [(set_attr "movprfx" "yes")]
43cacb12
RS
1947)
1948
915d28fe
RS
1949;; -------------------------------------------------------------------------
1950;; ---- [INT] Addition
1951;; -------------------------------------------------------------------------
1952;; Includes:
1953;; - ADD
1954;; - DECB
1955;; - DECD
1956;; - DECH
1957;; - DECW
1958;; - INCB
1959;; - INCD
1960;; - INCH
1961;; - INCW
1962;; - SUB
1963;; -------------------------------------------------------------------------
1964
43cacb12 1965(define_insn "add<mode>3"
5e176a61 1966 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
43cacb12 1967 (plus:SVE_I
5e176a61
RS
1968 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
1969 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
43cacb12
RS
1970 "TARGET_SVE"
1971 "@
1972 add\t%0.<Vetype>, %0.<Vetype>, #%D2
1973 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
0fdc30bc 1974 * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
5e176a61
RS
1975 movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
1976 movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
43cacb12 1977 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5e176a61 1978 [(set_attr "movprfx" "*,*,*,yes,yes,*")]
43cacb12
RS
1979)
1980
915d28fe
RS
1981;; Merging forms are handled through SVE_INT_BINARY.
1982
1983;; -------------------------------------------------------------------------
1984;; ---- [INT] Subtraction
1985;; -------------------------------------------------------------------------
1986;; Includes:
1987;; - SUB
1988;; - SUBR
1989;; -------------------------------------------------------------------------
1990
43cacb12 1991(define_insn "sub<mode>3"
5e176a61 1992 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
43cacb12 1993 (minus:SVE_I
5e176a61
RS
1994 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
1995 (match_operand:SVE_I 2 "register_operand" "w, 0, w")))]
43cacb12
RS
1996 "TARGET_SVE"
1997 "@
1998 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
5e176a61
RS
1999 subr\t%0.<Vetype>, %0.<Vetype>, #%D1
2000 movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
2001 [(set_attr "movprfx" "*,*,yes")]
43cacb12
RS
2002)
2003
915d28fe
RS
2004;; Merging forms are handled through SVE_INT_BINARY.
2005
a229966c
RS
2006;; -------------------------------------------------------------------------
2007;; ---- [INT] Take address
2008;; -------------------------------------------------------------------------
2009;; Includes:
2010;; - ADR
2011;; -------------------------------------------------------------------------
2012
2013;; Unshifted ADR, with the offset being zero-extended from the low 32 bits.
2014(define_insn "*aarch64_adr_uxtw"
2015 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
2016 (plus:VNx2DI
2017 (and:VNx2DI
2018 (match_operand:VNx2DI 2 "register_operand" "w")
2019 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
2020 (match_operand:VNx2DI 1 "register_operand" "w")))]
2021 "TARGET_SVE"
2022 "adr\t%0.d, [%1.d, %2.d, uxtw]"
2023)
2024
2025;; ADR with a nonzero shift.
2026(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
2027 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2028 (plus:SVE_SDI
2029 (unspec:SVE_SDI
2030 [(match_operand 4)
2031 (ashift:SVE_SDI
2032 (match_operand:SVE_SDI 2 "register_operand" "w")
2033 (match_operand:SVE_SDI 3 "const_1_to_3_operand"))]
2034 UNSPEC_PRED_X)
2035 (match_operand:SVE_SDI 1 "register_operand" "w")))]
2036 "TARGET_SVE"
2037 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]"
2038 "&& !CONSTANT_P (operands[4])"
2039 {
2040 operands[4] = CONSTM1_RTX (<VPRED>mode);
2041 }
2042)
2043
2044;; Same, but with the index being zero-extended from the low 32 bits.
2045(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
2046 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
2047 (plus:VNx2DI
2048 (unspec:VNx2DI
2049 [(match_operand 5)
2050 (ashift:VNx2DI
2051 (and:VNx2DI
2052 (match_operand:VNx2DI 2 "register_operand" "w")
2053 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
2054 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
2055 UNSPEC_PRED_X)
2056 (match_operand:VNx2DI 1 "register_operand" "w")))]
2057 "TARGET_SVE"
2058 "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
2059 "&& !CONSTANT_P (operands[5])"
2060 {
2061 operands[5] = CONSTM1_RTX (VNx2BImode);
2062 }
2063)
2064
915d28fe
RS
2065;; -------------------------------------------------------------------------
2066;; ---- [INT] Absolute difference
2067;; -------------------------------------------------------------------------
2068;; Includes:
2069;; - SABD
2070;; - UABD
2071;; -------------------------------------------------------------------------
2072
2073;; Unpredicated integer absolute difference.
2074(define_expand "<su>abd<mode>_3"
2075 [(use (match_operand:SVE_I 0 "register_operand"))
2076 (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
2077 (match_operand:SVE_I 2 "register_operand"))]
2078 "TARGET_SVE"
2079 {
2080 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
2081 emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
2082 operands[2]));
2083 DONE;
2084 }
2085)
2086
2087;; Predicated integer absolute difference.
2088(define_insn "aarch64_<su>abd<mode>_3"
2089 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2090 (unspec:SVE_I
2091 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2092 (minus:SVE_I
2093 (USMAX:SVE_I
9a8d9b3f 2094 (match_operand:SVE_I 2 "register_operand" "%0, w")
915d28fe
RS
2095 (match_operand:SVE_I 3 "register_operand" "w, w"))
2096 (<max_opp>:SVE_I
2097 (match_dup 2)
2098 (match_dup 3)))]
06308276 2099 UNSPEC_PRED_X))]
915d28fe
RS
2100 "TARGET_SVE"
2101 "@
2102 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2103 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2104 [(set_attr "movprfx" "*,yes")]
2105)
2106
9730c5cc
RS
2107;; Predicated integer absolute difference, merging with the first input.
2108(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
2109 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2110 (unspec:SVE_I
2111 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2112 (minus:SVE_I
2113 (unspec:SVE_I
2114 [(match_operand 4)
2115 (USMAX:SVE_I
2116 (match_operand:SVE_I 2 "register_operand" "0, w")
2117 (match_operand:SVE_I 3 "register_operand" "w, w"))]
2118 UNSPEC_PRED_X)
2119 (unspec:SVE_I
2120 [(match_operand 5)
2121 (<max_opp>:SVE_I
2122 (match_dup 2)
2123 (match_dup 3))]
2124 UNSPEC_PRED_X))
2125 (match_dup 2)]
2126 UNSPEC_SEL))]
2127 "TARGET_SVE"
2128 "@
2129 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2130 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2131 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
2132 {
2133 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
2134 }
2135 [(set_attr "movprfx" "*,yes")]
2136)
2137
2138;; Predicated integer absolute difference, merging with an independent value.
2139(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
2140 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
2141 (unspec:SVE_I
2142 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
2143 (minus:SVE_I
2144 (unspec:SVE_I
2145 [(match_operand 5)
2146 (USMAX:SVE_I
2147 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
2148 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))]
2149 UNSPEC_PRED_X)
2150 (unspec:SVE_I
2151 [(match_operand 6)
2152 (<max_opp>:SVE_I
2153 (match_dup 2)
2154 (match_dup 3))]
2155 UNSPEC_PRED_X))
2156 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
2157 UNSPEC_SEL))]
2158 "TARGET_SVE
2159 && !rtx_equal_p (operands[2], operands[4])
2160 && !rtx_equal_p (operands[3], operands[4])"
2161 "@
2162 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2163 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2164 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2165 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2166 #"
2167 "&& 1"
2168 {
2169 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
2170 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
2171 else if (reload_completed
2172 && register_operand (operands[4], <MODE>mode)
2173 && !rtx_equal_p (operands[0], operands[4]))
2174 {
2175 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2176 operands[4], operands[1]));
2177 operands[4] = operands[2] = operands[0];
2178 }
2179 else
2180 FAIL;
2181 }
2182 [(set_attr "movprfx" "yes")]
2183)
2184
915d28fe
RS
2185;; -------------------------------------------------------------------------
2186;; ---- [INT] Highpart multiplication
2187;; -------------------------------------------------------------------------
2188;; Includes:
2189;; - SMULH
2190;; - UMULH
2191;; -------------------------------------------------------------------------
43cacb12 2192
11e9443f
RS
2193;; Unpredicated highpart multiplication.
2194(define_expand "<su>mul<mode>3_highpart"
2195 [(set (match_operand:SVE_I 0 "register_operand")
2196 (unspec:SVE_I
2197 [(match_dup 3)
2198 (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
2199 (match_operand:SVE_I 2 "register_operand")]
2200 MUL_HIGHPART)]
06308276 2201 UNSPEC_PRED_X))]
11e9443f
RS
2202 "TARGET_SVE"
2203 {
16de3637 2204 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
11e9443f
RS
2205 }
2206)
2207
2208;; Predicated highpart multiplication.
2209(define_insn "*<su>mul<mode>3_highpart"
a08acce8 2210 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
11e9443f 2211 (unspec:SVE_I
a08acce8
RH
2212 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2213 (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
2214 (match_operand:SVE_I 3 "register_operand" "w, w")]
11e9443f 2215 MUL_HIGHPART)]
06308276 2216 UNSPEC_PRED_X))]
11e9443f 2217 "TARGET_SVE"
a08acce8
RH
2218 "@
2219 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2220 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2221 [(set_attr "movprfx" "*,yes")]
11e9443f
RS
2222)
2223
915d28fe
RS
2224;; -------------------------------------------------------------------------
2225;; ---- [INT] Division
2226;; -------------------------------------------------------------------------
2227;; Includes:
2228;; - SDIV
2229;; - SDIVR
2230;; - UDIV
2231;; - UDIVR
2232;; -------------------------------------------------------------------------
2233
2234;; Unpredicated integer division.
c38f7319
RS
2235(define_expand "<optab><mode>3"
2236 [(set (match_operand:SVE_SDI 0 "register_operand")
2237 (unspec:SVE_SDI
2238 [(match_dup 3)
2239 (SVE_INT_BINARY_SD:SVE_SDI
2240 (match_operand:SVE_SDI 1 "register_operand")
2241 (match_operand:SVE_SDI 2 "register_operand"))]
06308276 2242 UNSPEC_PRED_X))]
c38f7319
RS
2243 "TARGET_SVE"
2244 {
16de3637 2245 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
c38f7319
RS
2246 }
2247)
2248
915d28fe 2249;; Integer division predicated with a PTRUE.
c38f7319 2250(define_insn "*<optab><mode>3"
a08acce8 2251 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
c38f7319 2252 (unspec:SVE_SDI
a08acce8 2253 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
c38f7319 2254 (SVE_INT_BINARY_SD:SVE_SDI
a08acce8 2255 (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
f8c22a8b 2256 (match_operand:SVE_SDI 3 "register_operand" "w, 0, w"))]
06308276 2257 UNSPEC_PRED_X))]
c38f7319
RS
2258 "TARGET_SVE"
2259 "@
2260 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
a08acce8
RH
2261 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2262 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2263 [(set_attr "movprfx" "*,*,yes")]
c38f7319
RS
2264)
2265
915d28fe
RS
2266;; Predicated integer division with merging.
2267(define_expand "cond_<optab><mode>"
2268 [(set (match_operand:SVE_SDI 0 "register_operand")
2269 (unspec:SVE_SDI
2270 [(match_operand:<VPRED> 1 "register_operand")
2271 (SVE_INT_BINARY_SD:SVE_SDI
2272 (match_operand:SVE_SDI 2 "register_operand")
2273 (match_operand:SVE_SDI 3 "register_operand"))
2274 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
2275 UNSPEC_SEL))]
43cacb12 2276 "TARGET_SVE"
43cacb12
RS
2277)
2278
915d28fe
RS
2279;; Predicated integer division, merging with the first input.
2280(define_insn "*cond_<optab><mode>_2"
2281 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
2282 (unspec:SVE_SDI
2283 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2284 (SVE_INT_BINARY_SD:SVE_SDI
2285 (match_operand:SVE_SDI 2 "register_operand" "0, w")
2286 (match_operand:SVE_SDI 3 "register_operand" "w, w"))
2287 (match_dup 2)]
2288 UNSPEC_SEL))]
43cacb12 2289 "TARGET_SVE"
915d28fe
RS
2290 "@
2291 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2292 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2293 [(set_attr "movprfx" "*,yes")]
2294)
2295
2296;; Predicated integer division, merging with the second input.
2297(define_insn "*cond_<optab><mode>_3"
2298 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
2299 (unspec:SVE_SDI
2300 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2301 (SVE_INT_BINARY_SD:SVE_SDI
2302 (match_operand:SVE_SDI 2 "register_operand" "w, w")
2303 (match_operand:SVE_SDI 3 "register_operand" "0, w"))
2304 (match_dup 3)]
2305 UNSPEC_SEL))]
2306 "TARGET_SVE"
2307 "@
2308 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2309 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2310 [(set_attr "movprfx" "*,yes")]
2311)
2312
2313;; Predicated integer division, merging with an independent value.
2314(define_insn_and_rewrite "*cond_<optab><mode>_any"
2315 [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
2316 (unspec:SVE_SDI
2317 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
2318 (SVE_INT_BINARY_SD:SVE_SDI
2319 (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
2320 (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
2321 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
2322 UNSPEC_SEL))]
2323 "TARGET_SVE
2324 && !rtx_equal_p (operands[2], operands[4])
2325 && !rtx_equal_p (operands[3], operands[4])"
2326 "@
2327 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2328 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2329 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2330 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2331 #"
2332 "&& reload_completed
2333 && register_operand (operands[4], <MODE>mode)
2334 && !rtx_equal_p (operands[0], operands[4])"
2335 {
2336 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2337 operands[4], operands[1]));
2338 operands[4] = operands[2] = operands[0];
2339 }
2340 [(set_attr "movprfx" "yes")]
43cacb12
RS
2341)
2342
915d28fe
RS
2343;; -------------------------------------------------------------------------
2344;; ---- [INT] Binary logical operations
2345;; -------------------------------------------------------------------------
2346;; Includes:
2347;; - AND
2348;; - EOR
2349;; - ORR
2350;; -------------------------------------------------------------------------
2351
2352;; Unpredicated integer binary logical operations.
43cacb12 2353(define_insn "<optab><mode>3"
5e176a61 2354 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w")
43cacb12 2355 (LOGICAL:SVE_I
5e176a61
RS
2356 (match_operand:SVE_I 1 "register_operand" "%0, w, w")
2357 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
43cacb12
RS
2358 "TARGET_SVE"
2359 "@
2360 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
5e176a61 2361 movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
43cacb12 2362 <logical>\t%0.d, %1.d, %2.d"
5e176a61 2363 [(set_attr "movprfx" "*,yes,*")]
43cacb12
RS
2364)
2365
915d28fe
RS
2366;; Merging forms are handled through SVE_INT_BINARY.
2367
2368;; -------------------------------------------------------------------------
2369;; ---- [INT] Binary logical operations (inverted second input)
2370;; -------------------------------------------------------------------------
2371;; Includes:
2372;; - BIC
2373;; -------------------------------------------------------------------------
43cacb12 2374
35d6c591 2375(define_insn_and_rewrite "*bic<mode>3"
43cacb12
RS
2376 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2377 (and:SVE_I
35d6c591
RS
2378 (unspec:SVE_I
2379 [(match_operand 3)
2380 (not:SVE_I (match_operand:SVE_I 2 "register_operand" "w"))]
06308276 2381 UNSPEC_PRED_X)
35d6c591
RS
2382 (match_operand:SVE_I 1 "register_operand" "w")))]
2383 "TARGET_SVE"
2384 "bic\t%0.d, %1.d, %2.d"
2385 "&& !CONSTANT_P (operands[3])"
2386 {
2387 operands[3] = CONSTM1_RTX (<VPRED>mode);
2388 }
43cacb12
RS
2389)
2390
1b187f36
RS
2391;; Predicated integer BIC, merging with the first input.
2392(define_insn "*cond_bic<mode>_2"
2393 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2394 (unspec:SVE_I
2395 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2396 (and:SVE_I
2397 (not:SVE_I (match_operand:SVE_I 3 "register_operand" "w, w"))
2398 (match_operand:SVE_I 2 "register_operand" "0, w"))
2399 (match_dup 2)]
2400 UNSPEC_SEL))]
2401 "TARGET_SVE"
2402 "@
2403 bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2404 movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2405 [(set_attr "movprfx" "*,yes")]
2406)
2407
2408;; Predicated integer BIC, merging with an independent value.
2409(define_insn_and_rewrite "*cond_bic<mode>_any"
2410 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w")
2411 (unspec:SVE_I
2412 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
2413 (and:SVE_I
2414 (not:SVE_I (match_operand:SVE_I 3 "register_operand" "w, w, w, w"))
2415 (match_operand:SVE_I 2 "register_operand" "0, w, w, w"))
2416 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
2417 UNSPEC_SEL))]
2418 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
2419 "@
2420 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2421 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2422 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2423 #"
2424 "&& reload_completed
2425 && register_operand (operands[4], <MODE>mode)
2426 && !rtx_equal_p (operands[0], operands[4])"
2427 {
2428 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2429 operands[4], operands[1]));
2430 operands[4] = operands[2] = operands[0];
2431 }
2432 [(set_attr "movprfx" "yes")]
2433)
2434
915d28fe
RS
2435;; -------------------------------------------------------------------------
2436;; ---- [INT] Shifts
2437;; -------------------------------------------------------------------------
2438;; Includes:
2439;; - ASR
2440;; - LSL
2441;; - LSR
2442;; -------------------------------------------------------------------------
43cacb12 2443
915d28fe
RS
2444;; Unpredicated shift by a scalar, which expands into one of the vector
2445;; shifts below.
2446(define_expand "<ASHIFT:optab><mode>3"
2447 [(set (match_operand:SVE_I 0 "register_operand")
2448 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
2449 (match_operand:<VEL> 2 "general_operand")))]
43cacb12
RS
2450 "TARGET_SVE"
2451 {
915d28fe
RS
2452 rtx amount;
2453 if (CONST_INT_P (operands[2]))
2454 {
2455 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
2456 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
2457 amount = force_reg (<MODE>mode, amount);
2458 }
2459 else
2460 {
2461 amount = gen_reg_rtx (<MODE>mode);
2462 emit_insn (gen_vec_duplicate<mode> (amount,
2463 convert_to_mode (<VEL>mode,
2464 operands[2], 0)));
2465 }
2466 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
2467 DONE;
43cacb12
RS
2468 }
2469)
2470
915d28fe 2471;; Unpredicated shift by a vector.
43cacb12
RS
2472(define_expand "v<optab><mode>3"
2473 [(set (match_operand:SVE_I 0 "register_operand")
2474 (unspec:SVE_I
2475 [(match_dup 3)
2476 (ASHIFT:SVE_I
2477 (match_operand:SVE_I 1 "register_operand")
2478 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
06308276 2479 UNSPEC_PRED_X))]
43cacb12
RS
2480 "TARGET_SVE"
2481 {
16de3637 2482 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
2483 }
2484)
2485
915d28fe
RS
2486;; Shift by a vector, predicated with a PTRUE. We don't actually need
2487;; the predicate for the first alternative, but using Upa or X isn't
2488;; likely to gain much and would make the instruction seem less uniform
2489;; to the register allocator.
26004f51 2490(define_insn_and_split "*v<optab><mode>3"
7d1f2401 2491 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
43cacb12 2492 (unspec:SVE_I
7d1f2401 2493 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
43cacb12 2494 (ASHIFT:SVE_I
7d1f2401
RS
2495 (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")
2496 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
06308276 2497 UNSPEC_PRED_X))]
43cacb12
RS
2498 "TARGET_SVE"
2499 "@
26004f51 2500 #
a08acce8 2501 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
7d1f2401 2502 <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
a08acce8 2503 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
26004f51
RS
2504 "&& reload_completed
2505 && !register_operand (operands[3], <MODE>mode)"
2506 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
2507 ""
7d1f2401 2508 [(set_attr "movprfx" "*,*,*,yes")]
43cacb12
RS
2509)
2510
26004f51
RS
2511;; Unpredicated shift operations by a constant (post-RA only).
2512;; These are generated by splitting a predicated instruction whose
2513;; predicate is unused.
2514(define_insn "*post_ra_v<optab><mode>3"
2515 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2516 (ASHIFT:SVE_I
2517 (match_operand:SVE_I 1 "register_operand" "w")
2518 (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
2519 "TARGET_SVE && reload_completed"
2520 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
2521)
2522
20103c0e
RS
2523;; Predicated integer shift, merging with the first input.
2524(define_insn "*cond_<optab><mode>_2_const"
2525 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2526 (unspec:SVE_I
2527 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2528 (ASHIFT:SVE_I
2529 (match_operand:SVE_I 2 "register_operand" "0, w")
2530 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
2531 (match_dup 2)]
2532 UNSPEC_SEL))]
2533 "TARGET_SVE"
2534 "@
2535 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2536 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
2537 [(set_attr "movprfx" "*,yes")]
2538)
2539
2540;; Predicated integer shift, merging with an independent value.
2541(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
2542 [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
2543 (unspec:SVE_I
2544 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2545 (ASHIFT:SVE_I
2546 (match_operand:SVE_I 2 "register_operand" "w, w, w")
2547 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
2548 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
2549 UNSPEC_SEL))]
2550 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
2551 "@
2552 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2553 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2554 #"
2555 "&& reload_completed
2556 && register_operand (operands[4], <MODE>mode)
2557 && !rtx_equal_p (operands[0], operands[4])"
2558 {
2559 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2560 operands[4], operands[1]));
2561 operands[4] = operands[2] = operands[0];
2562 }
2563 [(set_attr "movprfx" "yes")]
2564)
2565
915d28fe
RS
2566;; -------------------------------------------------------------------------
2567;; ---- [FP] General binary arithmetic corresponding to rtx codes
2568;; -------------------------------------------------------------------------
2569;; Includes post-RA forms of:
2570;; - FADD
2571;; - FMUL
2572;; - FSUB
2573;; -------------------------------------------------------------------------
43cacb12 2574
915d28fe
RS
2575;; Unpredicated floating-point binary operations (post-RA only).
2576;; These are generated by splitting a predicated instruction whose
2577;; predicate is unused.
2578(define_insn "*post_ra_<sve_fp_op><mode>3"
2579 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2580 (SVE_UNPRED_FP_BINARY:SVE_F
2581 (match_operand:SVE_F 1 "register_operand" "w")
2582 (match_operand:SVE_F 2 "register_operand" "w")))]
2583 "TARGET_SVE && reload_completed"
2584 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
2585
2586;; -------------------------------------------------------------------------
2587;; ---- [FP] General binary arithmetic corresponding to unspecs
2588;; -------------------------------------------------------------------------
2589;; Includes merging forms of:
a19ba9e1 2590;; - FADD (constant forms handled in the "Addition" section)
915d28fe
RS
2591;; - FDIV
2592;; - FDIVR
a19ba9e1
RS
2593;; - FMAXNM (including #0.0 and #1.0)
2594;; - FMINNM (including #0.0 and #1.0)
2595;; - FMUL (including #0.5 and #2.0)
2596;; - FSUB (constant forms handled in the "Addition" section)
2597;; - FSUBR (constant forms handled in the "Subtraction" section)
915d28fe
RS
2598;; -------------------------------------------------------------------------
2599
0254ed79
RS
2600;; Unpredicated floating-point binary operations.
2601(define_expand "<optab><mode>3"
2602 [(set (match_operand:SVE_F 0 "register_operand")
2603 (unspec:SVE_F
2604 [(match_dup 3)
2605 (const_int SVE_RELAXED_GP)
2606 (match_operand:SVE_F 1 "<sve_pred_fp_rhs1_operand>")
2607 (match_operand:SVE_F 2 "<sve_pred_fp_rhs2_operand>")]
2608 SVE_COND_FP_BINARY))]
2609 "TARGET_SVE"
2610 {
2611 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2612 }
2613)
2614
2615;; Predicated floating-point binary operations that have no immediate forms.
2616(define_insn "*<optab><mode>3"
2617 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2618 (unspec:SVE_F
2619 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2620 (match_operand:SI 4 "aarch64_sve_gp_strictness")
2621 (match_operand:SVE_F 2 "register_operand" "0, w, w")
2622 (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
2623 SVE_COND_FP_BINARY_REG))]
2624 "TARGET_SVE"
2625 "@
2626 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2627 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2628 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2629 [(set_attr "movprfx" "*,*,yes")]
2630)
2631
915d28fe
RS
2632;; Predicated floating-point operations with merging.
2633(define_expand "cond_<optab><mode>"
2634 [(set (match_operand:SVE_F 0 "register_operand")
2635 (unspec:SVE_F
2636 [(match_operand:<VPRED> 1 "register_operand")
2637 (unspec:SVE_F
6fe679cc 2638 [(match_dup 1)
c9c5a809 2639 (const_int SVE_STRICT_GP)
a19ba9e1
RS
2640 (match_operand:SVE_F 2 "<sve_pred_fp_rhs1_operand>")
2641 (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_operand>")]
915d28fe
RS
2642 SVE_COND_FP_BINARY)
2643 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
2644 UNSPEC_SEL))]
43cacb12 2645 "TARGET_SVE"
43cacb12
RS
2646)
2647
915d28fe 2648;; Predicated floating-point operations, merging with the first input.
c9c5a809 2649(define_insn_and_rewrite "*cond_<optab><mode>_2"
915d28fe
RS
2650 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2651 (unspec:SVE_F
57d6f4d0 2652 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
915d28fe 2653 (unspec:SVE_F
c9c5a809
RS
2654 [(match_operand 4)
2655 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6fe679cc 2656 (match_operand:SVE_F 2 "register_operand" "0, w")
915d28fe
RS
2657 (match_operand:SVE_F 3 "register_operand" "w, w")]
2658 SVE_COND_FP_BINARY)
2659 (match_dup 2)]
2660 UNSPEC_SEL))]
c9c5a809 2661 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
43cacb12 2662 "@
915d28fe
RS
2663 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2664 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
c9c5a809
RS
2665 "&& !rtx_equal_p (operands[1], operands[4])"
2666 {
2667 operands[4] = copy_rtx (operands[1]);
2668 }
915d28fe 2669 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
2670)
2671
a19ba9e1
RS
2672;; Same for operations that take a 1-bit constant.
2673(define_insn_and_rewrite "*cond_<optab><mode>_2_const"
2674 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?w")
2675 (unspec:SVE_F
2676 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2677 (unspec:SVE_F
2678 [(match_operand 4)
2679 (match_operand:SI 5 "aarch64_sve_gp_strictness")
2680 (match_operand:SVE_F 2 "register_operand" "0, w")
2681 (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
2682 SVE_COND_FP_BINARY_I1)
2683 (match_dup 2)]
2684 UNSPEC_SEL))]
2685 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
2686 "@
2687 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2688 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
2689 "&& !rtx_equal_p (operands[1], operands[4])"
2690 {
2691 operands[4] = copy_rtx (operands[1]);
2692 }
2693 [(set_attr "movprfx" "*,yes")]
2694)
2695
915d28fe 2696;; Predicated floating-point operations, merging with the second input.
c9c5a809 2697(define_insn_and_rewrite "*cond_<optab><mode>_3"
915d28fe
RS
2698 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2699 (unspec:SVE_F
57d6f4d0 2700 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
915d28fe 2701 (unspec:SVE_F
c9c5a809
RS
2702 [(match_operand 4)
2703 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6fe679cc 2704 (match_operand:SVE_F 2 "register_operand" "w, w")
915d28fe
RS
2705 (match_operand:SVE_F 3 "register_operand" "0, w")]
2706 SVE_COND_FP_BINARY)
2707 (match_dup 3)]
2708 UNSPEC_SEL))]
c9c5a809 2709 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
43cacb12 2710 "@
915d28fe
RS
2711 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2712 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
c9c5a809
RS
2713 "&& !rtx_equal_p (operands[1], operands[4])"
2714 {
2715 operands[4] = copy_rtx (operands[1]);
2716 }
915d28fe 2717 [(set_attr "movprfx" "*,yes")]
cee99fa0
RS
2718)
2719
915d28fe
RS
2720;; Predicated floating-point operations, merging with an independent value.
2721(define_insn_and_rewrite "*cond_<optab><mode>_any"
2722 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
2723 (unspec:SVE_F
2724 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
2725 (unspec:SVE_F
c9c5a809
RS
2726 [(match_operand 5)
2727 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6fe679cc 2728 (match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
915d28fe
RS
2729 (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
2730 SVE_COND_FP_BINARY)
2731 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
2732 UNSPEC_SEL))]
2733 "TARGET_SVE
2734 && !rtx_equal_p (operands[2], operands[4])
c9c5a809
RS
2735 && !rtx_equal_p (operands[3], operands[4])
2736 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
cee99fa0 2737 "@
915d28fe
RS
2738 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2739 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2740 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2741 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2742 #"
c9c5a809 2743 "&& 1"
915d28fe 2744 {
c9c5a809
RS
2745 if (reload_completed
2746 && register_operand (operands[4], <MODE>mode)
2747 && !rtx_equal_p (operands[0], operands[4]))
2748 {
2749 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2750 operands[4], operands[1]));
2751 operands[4] = operands[2] = operands[0];
2752 }
2753 else if (!rtx_equal_p (operands[1], operands[5]))
2754 operands[5] = copy_rtx (operands[1]);
2755 else
2756 FAIL;
915d28fe
RS
2757 }
2758 [(set_attr "movprfx" "yes")]
cee99fa0
RS
2759)
2760
a19ba9e1
RS
2761;; Same for operations that take a 1-bit constant.
2762(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
2763 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?w")
2764 (unspec:SVE_F
2765 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2766 (unspec:SVE_F
2767 [(match_operand 5)
2768 (match_operand:SI 6 "aarch64_sve_gp_strictness")
2769 (match_operand:SVE_F 2 "register_operand" "w, w, w")
2770 (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
2771 SVE_COND_FP_BINARY_I1)
2772 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
2773 UNSPEC_SEL))]
2774 "TARGET_SVE
2775 && !rtx_equal_p (operands[2], operands[4])
2776 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
2777 "@
2778 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2779 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2780 #"
2781 "&& 1"
2782 {
2783 if (reload_completed
2784 && register_operand (operands[4], <MODE>mode)
2785 && !rtx_equal_p (operands[0], operands[4]))
2786 {
2787 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2788 operands[4], operands[1]));
2789 operands[4] = operands[2] = operands[0];
2790 }
2791 else if (!rtx_equal_p (operands[1], operands[5]))
2792 operands[5] = copy_rtx (operands[1]);
2793 else
2794 FAIL;
2795 }
2796 [(set_attr "movprfx" "yes")]
2797)
2798
915d28fe
RS
2799;; -------------------------------------------------------------------------
2800;; ---- [FP] Addition
2801;; -------------------------------------------------------------------------
2802;; Includes:
2803;; - FADD
2804;; - FSUB
2805;; -------------------------------------------------------------------------
43cacb12 2806
c9c5a809 2807;; Predicated floating-point addition.
915d28fe 2808(define_insn_and_split "*add<mode>3"
5e176a61 2809 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
915d28fe 2810 (unspec:SVE_F
5e176a61
RS
2811 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
2812 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, i, i")
2813 (match_operand:SVE_F 2 "register_operand" "%0, 0, w, w, w")
2814 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, vsA, vsN")]
6fe679cc 2815 UNSPEC_COND_FADD))]
cee99fa0 2816 "TARGET_SVE"
915d28fe
RS
2817 "@
2818 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2819 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5e176a61
RS
2820 #
2821 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2822 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
915d28fe
RS
2823 ; Split the unpredicated form after reload, so that we don't have
2824 ; the unnecessary PTRUE.
2825 "&& reload_completed
2826 && register_operand (operands[3], <MODE>mode)"
2827 [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
5e176a61
RS
2828 ""
2829 [(set_attr "movprfx" "*,*,*,yes,yes")]
cee99fa0
RS
2830)
2831
a19ba9e1
RS
2832;; Predicated floating-point addition of a constant, merging with the
2833;; first input.
2834(define_insn_and_rewrite "*cond_add<mode>_2_const"
2835 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?w, ?w")
2836 (unspec:SVE_F
2837 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
2838 (unspec:SVE_F
2839 [(match_operand 4)
2840 (match_operand:SI 5 "aarch64_sve_gp_strictness")
2841 (match_operand:SVE_F 2 "register_operand" "0, 0, w, w")
2842 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
2843 UNSPEC_COND_FADD)
2844 (match_dup 2)]
2845 UNSPEC_SEL))]
2846 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
2847 "@
2848 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2849 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2850 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2851 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
2852 "&& !rtx_equal_p (operands[1], operands[4])"
2853 {
2854 operands[4] = copy_rtx (operands[1]);
2855 }
2856 [(set_attr "movprfx" "*,*,yes,yes")]
2857)
2858
2859;; Predicated floating-point addition of a constant, merging with an
2860;; independent value.
2861(define_insn_and_rewrite "*cond_add<mode>_any_const"
2862 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
2863 (unspec:SVE_F
2864 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2865 (unspec:SVE_F
2866 [(match_operand 5)
2867 (match_operand:SI 6 "aarch64_sve_gp_strictness")
2868 (match_operand:SVE_F 2 "register_operand" "w, w, w, w, w, w")
2869 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
2870 UNSPEC_COND_FADD)
2871 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
2872 UNSPEC_SEL))]
2873 "TARGET_SVE
2874 && !rtx_equal_p (operands[2], operands[4])
2875 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
2876 "@
2877 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2878 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2879 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2880 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2881 #
2882 #"
2883 "&& 1"
2884 {
2885 if (reload_completed
2886 && register_operand (operands[4], <MODE>mode)
2887 && !rtx_equal_p (operands[0], operands[4]))
2888 {
2889 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2890 operands[4], operands[1]));
2891 operands[4] = operands[2] = operands[0];
2892 }
2893 else if (!rtx_equal_p (operands[1], operands[5]))
2894 operands[5] = copy_rtx (operands[1]);
2895 else
2896 FAIL;
2897 }
2898 [(set_attr "movprfx" "yes")]
2899)
2900
2901;; Register merging forms are handled through SVE_COND_FP_BINARY.
cee99fa0 2902
915d28fe
RS
2903;; -------------------------------------------------------------------------
2904;; ---- [FP] Subtraction
2905;; -------------------------------------------------------------------------
2906;; Includes:
915d28fe
RS
2907;; - FSUB
2908;; - FSUBR
2909;; -------------------------------------------------------------------------
cee99fa0 2910
c9c5a809 2911;; Predicated floating-point subtraction.
915d28fe 2912(define_insn_and_split "*sub<mode>3"
2ae21bd1 2913 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
915d28fe 2914 (unspec:SVE_F
2ae21bd1
RS
2915 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2916 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, i")
2917 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "vsA, w, vsA")
2918 (match_operand:SVE_F 3 "register_operand" "0, w, 0")]
6fe679cc 2919 UNSPEC_COND_FSUB))]
2ae21bd1 2920 "TARGET_SVE"
f22d7973 2921 "@
915d28fe 2922 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2ae21bd1
RS
2923 #
2924 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
915d28fe
RS
2925 ; Split the unpredicated form after reload, so that we don't have
2926 ; the unnecessary PTRUE.
2927 "&& reload_completed
2ae21bd1 2928 && register_operand (operands[2], <MODE>mode)"
915d28fe 2929 [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
2ae21bd1
RS
2930 ""
2931 [(set_attr "movprfx" "*,*,yes")]
f22d7973
RS
2932)
2933
a19ba9e1
RS
2934;; Predicated floating-point subtraction from a constant, merging with the
2935;; second input.
2936(define_insn_and_rewrite "*cond_sub<mode>_3_const"
2937 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?w")
2938 (unspec:SVE_F
2939 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2940 (unspec:SVE_F
2941 [(match_operand 4)
2942 (match_operand:SI 5 "aarch64_sve_gp_strictness")
2943 (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
2944 (match_operand:SVE_F 3 "register_operand" "0, w")]
2945 UNSPEC_COND_FSUB)
2946 (match_dup 3)]
2947 UNSPEC_SEL))]
2948 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
2949 "@
2950 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2951 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
2952 "&& !rtx_equal_p (operands[1], operands[4])"
2953 {
2954 operands[4] = copy_rtx (operands[1]);
2955 }
2956 [(set_attr "movprfx" "*,yes")]
2957)
2958
2959;; Predicated floating-point subtraction from a constant, merging with an
2960;; independent value.
2961(define_insn_and_rewrite "*cond_sub<mode>_any_const"
2962 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?w")
2963 (unspec:SVE_F
2964 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2965 (unspec:SVE_F
2966 [(match_operand 5)
2967 (match_operand:SI 6 "aarch64_sve_gp_strictness")
2968 (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
2969 (match_operand:SVE_F 3 "register_operand" "w, w, w")]
2970 UNSPEC_COND_FSUB)
2971 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
2972 UNSPEC_SEL))]
2973 "TARGET_SVE
2974 && !rtx_equal_p (operands[3], operands[4])
2975 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
2976 "@
2977 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2978 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2979 #"
2980 "&& 1"
2981 {
2982 if (reload_completed
2983 && register_operand (operands[4], <MODE>mode)
2984 && !rtx_equal_p (operands[0], operands[4]))
2985 {
2986 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
2987 operands[4], operands[1]));
2988 operands[4] = operands[3] = operands[0];
2989 }
2990 else if (!rtx_equal_p (operands[1], operands[5]))
2991 operands[5] = copy_rtx (operands[1]);
2992 else
2993 FAIL;
2994 }
2995 [(set_attr "movprfx" "yes")]
2996)
2997
2998;; Register merging forms are handled through SVE_COND_FP_BINARY.
43cacb12 2999
915d28fe
RS
3000;; -------------------------------------------------------------------------
3001;; ---- [FP] Absolute difference
3002;; -------------------------------------------------------------------------
3003;; Includes:
3004;; - FABD
3005;; -------------------------------------------------------------------------
3006
3007;; Predicated floating-point absolute difference.
c9c5a809 3008(define_insn_and_rewrite "*fabd<mode>3"
5e176a61 3009 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
915d28fe 3010 (unspec:SVE_F
5e176a61 3011 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
c9c5a809 3012 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6fe679cc 3013 (unspec:SVE_F
c9c5a809
RS
3014 [(match_operand 5)
3015 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5e176a61
RS
3016 (match_operand:SVE_F 2 "register_operand" "%0, w")
3017 (match_operand:SVE_F 3 "register_operand" "w, w")]
6fe679cc
RS
3018 UNSPEC_COND_FSUB)]
3019 UNSPEC_COND_FABS))]
c9c5a809 3020 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5e176a61
RS
3021 "@
3022 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3023 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
c9c5a809
RS
3024 "&& !rtx_equal_p (operands[1], operands[5])"
3025 {
3026 operands[5] = copy_rtx (operands[1]);
3027 }
5e176a61 3028 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
3029)
3030
bf30864e
RS
3031;; Predicated floating-point absolute difference, merging with the first
3032;; input.
3033(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
3034 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
3035 (unspec:SVE_F
3036 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3037 (unspec:SVE_F
3038 [(match_operand 4)
3039 (match_operand:SI 5 "aarch64_sve_gp_strictness")
3040 (unspec:SVE_F
3041 [(match_operand 6)
3042 (match_operand:SI 7 "aarch64_sve_gp_strictness")
3043 (match_operand:SVE_F 2 "register_operand" "0, w")
3044 (match_operand:SVE_F 3 "register_operand" "w, w")]
3045 UNSPEC_COND_FSUB)]
3046 UNSPEC_COND_FABS)
3047 (match_dup 2)]
3048 UNSPEC_SEL))]
3049 "TARGET_SVE
3050 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
3051 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
3052 "@
3053 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3054 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3055 "&& (!rtx_equal_p (operands[1], operands[4])
3056 || !rtx_equal_p (operands[1], operands[6]))"
3057 {
3058 operands[4] = copy_rtx (operands[1]);
3059 operands[6] = copy_rtx (operands[1]);
3060 }
3061 [(set_attr "movprfx" "*,yes")]
3062)
3063
3064;; Predicated floating-point absolute difference, merging with the second
3065;; input.
3066(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
3067 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
3068 (unspec:SVE_F
3069 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3070 (unspec:SVE_F
3071 [(match_operand 4)
3072 (match_operand:SI 5 "aarch64_sve_gp_strictness")
3073 (unspec:SVE_F
3074 [(match_operand 6)
3075 (match_operand:SI 7 "aarch64_sve_gp_strictness")
3076 (match_operand:SVE_F 2 "register_operand" "w, w")
3077 (match_operand:SVE_F 3 "register_operand" "0, w")]
3078 UNSPEC_COND_FSUB)]
3079 UNSPEC_COND_FABS)
3080 (match_dup 3)]
3081 UNSPEC_SEL))]
3082 "TARGET_SVE
3083 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
3084 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
3085 "@
3086 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3087 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3088 "&& (!rtx_equal_p (operands[1], operands[4])
3089 || !rtx_equal_p (operands[1], operands[6]))"
3090 {
3091 operands[4] = copy_rtx (operands[1]);
3092 operands[6] = copy_rtx (operands[1]);
3093 }
3094 [(set_attr "movprfx" "*,yes")]
3095)
3096
3097;; Predicated floating-point absolute difference, merging with an
3098;; independent value.
3099(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
3100 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3101 (unspec:SVE_F
3102 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3103 (unspec:SVE_F
3104 [(match_operand 5)
3105 (match_operand:SI 6 "aarch64_sve_gp_strictness")
3106 (unspec:SVE_F
3107 [(match_operand 7)
3108 (match_operand:SI 8 "aarch64_sve_gp_strictness")
3109 (match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
3110 (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
3111 UNSPEC_COND_FSUB)]
3112 UNSPEC_COND_FABS)
3113 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3114 UNSPEC_SEL))]
3115 "TARGET_SVE
3116 && !rtx_equal_p (operands[2], operands[4])
3117 && !rtx_equal_p (operands[3], operands[4])
3118 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
3119 && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
3120 "@
3121 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3122 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3123 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3124 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3125 #"
3126 "&& 1"
3127 {
3128 if (reload_completed
3129 && register_operand (operands[4], <MODE>mode)
3130 && !rtx_equal_p (operands[0], operands[4]))
3131 {
3132 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
3133 operands[4], operands[1]));
3134 operands[4] = operands[3] = operands[0];
3135 }
3136 else if (!rtx_equal_p (operands[1], operands[5])
3137 || !rtx_equal_p (operands[1], operands[7]))
3138 {
3139 operands[5] = copy_rtx (operands[1]);
3140 operands[7] = copy_rtx (operands[1]);
3141 }
3142 else
3143 FAIL;
3144 }
3145 [(set_attr "movprfx" "yes")]
3146)
3147
915d28fe
RS
3148;; -------------------------------------------------------------------------
3149;; ---- [FP] Multiplication
3150;; -------------------------------------------------------------------------
3151;; Includes:
3152;; - FMUL
3153;; -------------------------------------------------------------------------
3154
c9c5a809 3155;; Predicated floating-point multiplication.
915d28fe 3156(define_insn_and_split "*mul<mode>3"
5e176a61 3157 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
915d28fe 3158 (unspec:SVE_F
5e176a61
RS
3159 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3160 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, i")
3161 (match_operand:SVE_F 2 "register_operand" "%0, w, 0")
3162 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w, vsM")]
6fe679cc 3163 UNSPEC_COND_FMUL))]
43cacb12 3164 "TARGET_SVE"
915d28fe
RS
3165 "@
3166 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5e176a61
RS
3167 #
3168 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
915d28fe
RS
3169 ; Split the unpredicated form after reload, so that we don't have
3170 ; the unnecessary PTRUE.
3171 "&& reload_completed
3172 && register_operand (operands[3], <MODE>mode)"
3173 [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
5e176a61
RS
3174 ""
3175 [(set_attr "movprfx" "*,*,yes")]
43cacb12
RS
3176)
3177
a19ba9e1
RS
3178;; Merging forms are handled through SVE_COND_FP_BINARY and
3179;; SVE_COND_FP_BINARY_I1.
915d28fe 3180
915d28fe
RS
3181;; -------------------------------------------------------------------------
3182;; ---- [FP] Binary logical operations
3183;; -------------------------------------------------------------------------
3184;; Includes
3185;; - AND
3186;; - EOR
3187;; - ORR
3188;; -------------------------------------------------------------------------
3189
3190;; Binary logical operations on floating-point modes. We avoid subregs
3191;; by providing this, but we need to use UNSPECs since rtx logical ops
3192;; aren't defined for floating-point modes.
3193(define_insn "*<optab><mode>3"
3194 [(set (match_operand:SVE_F 0 "register_operand" "=w")
3195 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
3196 (match_operand:SVE_F 2 "register_operand" "w")]
3197 LOGICALF))]
43cacb12 3198 "TARGET_SVE"
915d28fe 3199 "<logicalf_op>\t%0.d, %1.d, %2.d"
43cacb12
RS
3200)
3201
915d28fe
RS
3202;; -------------------------------------------------------------------------
3203;; ---- [FP] Sign copying
3204;; -------------------------------------------------------------------------
3205;; The patterns in this section are synthetic.
3206;; -------------------------------------------------------------------------
3207
3208(define_expand "copysign<mode>3"
3209 [(match_operand:SVE_F 0 "register_operand")
3210 (match_operand:SVE_F 1 "register_operand")
3211 (match_operand:SVE_F 2 "register_operand")]
43cacb12
RS
3212 "TARGET_SVE"
3213 {
915d28fe
RS
3214 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
3215 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
3216 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
3217 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
43cacb12 3218
915d28fe
RS
3219 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
3220 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
3221
3222 emit_insn (gen_and<v_int_equiv>3
3223 (sign, arg2,
3224 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3225 HOST_WIDE_INT_M1U
3226 << bits)));
3227 emit_insn (gen_and<v_int_equiv>3
3228 (mant, arg1,
3229 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3230 ~(HOST_WIDE_INT_M1U
3231 << bits))));
3232 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
3233 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
3234 DONE;
43cacb12
RS
3235 }
3236)
3237
915d28fe
RS
3238(define_expand "xorsign<mode>3"
3239 [(match_operand:SVE_F 0 "register_operand")
3240 (match_operand:SVE_F 1 "register_operand")
3241 (match_operand:SVE_F 2 "register_operand")]
43cacb12
RS
3242 "TARGET_SVE"
3243 {
915d28fe
RS
3244 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
3245 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
3246 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
3247
3248 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
3249 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
3250
3251 emit_insn (gen_and<v_int_equiv>3
3252 (sign, arg2,
3253 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3254 HOST_WIDE_INT_M1U
3255 << bits)));
3256 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
3257 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
3258 DONE;
43cacb12
RS
3259 }
3260)
3261
915d28fe
RS
3262;; -------------------------------------------------------------------------
3263;; ---- [FP] Maximum and minimum
3264;; -------------------------------------------------------------------------
3265;; Includes:
915d28fe 3266;; - FMAXNM
915d28fe
RS
3267;; - FMINNM
3268;; -------------------------------------------------------------------------
43cacb12 3269
0254ed79
RS
3270;; Unpredicated fmax/fmin (the libm functions). The optabs for the
3271;; smin/smax rtx codes are handled in the generic section above.
43cacb12
RS
3272(define_expand "<maxmin_uns><mode>3"
3273 [(set (match_operand:SVE_F 0 "register_operand")
3274 (unspec:SVE_F
3275 [(match_dup 3)
c9c5a809 3276 (const_int SVE_RELAXED_GP)
214c42fa 3277 (match_operand:SVE_F 1 "register_operand")
75079ddf 3278 (match_operand:SVE_F 2 "aarch64_sve_float_maxmin_operand")]
214c42fa 3279 SVE_COND_FP_MAXMIN_PUBLIC))]
43cacb12
RS
3280 "TARGET_SVE"
3281 {
16de3637 3282 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
3283 }
3284)
3285
214c42fa
RS
3286;; Predicated floating-point maximum/minimum.
3287(define_insn "*<optab><mode>3"
75079ddf 3288 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w, ?&w")
43cacb12 3289 (unspec:SVE_F
75079ddf 3290 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
c9c5a809 3291 (match_operand:SI 4 "aarch64_sve_gp_strictness")
75079ddf
RS
3292 (match_operand:SVE_F 2 "register_operand" "%0, 0, w, w")
3293 (match_operand:SVE_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
214c42fa 3294 SVE_COND_FP_MAXMIN_PUBLIC))]
43cacb12 3295 "TARGET_SVE"
a08acce8 3296 "@
75079ddf 3297 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
214c42fa 3298 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
75079ddf 3299 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
214c42fa 3300 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
75079ddf 3301 [(set_attr "movprfx" "*,*,yes,yes")]
43cacb12
RS
3302)
3303
a19ba9e1
RS
3304;; Merging forms are handled through SVE_COND_FP_BINARY and
3305;; SVE_COND_FP_BINARY_I1.
915d28fe
RS
3306
3307;; -------------------------------------------------------------------------
3308;; ---- [PRED] Binary logical operations
3309;; -------------------------------------------------------------------------
3310;; Includes:
3311;; - AND
3312;; - ANDS
3313;; - EOR
3314;; - EORS
3315;; - ORR
3316;; - ORRS
3317;; -------------------------------------------------------------------------
3318
3319;; Predicate AND. We can reuse one of the inputs as the GP.
3320(define_insn "and<mode>3"
3321 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3322 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
3323 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
9d4ac06e 3324 "TARGET_SVE"
915d28fe 3325 "and\t%0.b, %1/z, %1.b, %2.b"
a08acce8 3326)
9d4ac06e 3327
915d28fe
RS
3328;; Unpredicated predicate EOR and ORR.
3329(define_expand "<optab><mode>3"
3330 [(set (match_operand:PRED_ALL 0 "register_operand")
3331 (and:PRED_ALL
3332 (LOGICAL_OR:PRED_ALL
3333 (match_operand:PRED_ALL 1 "register_operand")
3334 (match_operand:PRED_ALL 2 "register_operand"))
3335 (match_dup 3)))]
6c4fd4a9 3336 "TARGET_SVE"
915d28fe
RS
3337 {
3338 operands[3] = aarch64_ptrue_reg (<MODE>mode);
3339 }
a08acce8 3340)
6c4fd4a9 3341
915d28fe 3342;; Predicated predicate AND, EOR and ORR.
34467289 3343(define_insn "@aarch64_pred_<optab><mode>_z"
915d28fe
RS
3344 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3345 (and:PRED_ALL
3346 (LOGICAL:PRED_ALL
3347 (match_operand:PRED_ALL 2 "register_operand" "Upa")
3348 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
3349 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3350 "TARGET_SVE"
3351 "<logical>\t%0.b, %1/z, %2.b, %3.b"
3352)
3353
3354;; Perform a logical operation on operands 2 and 3, using operand 1 as
34467289
RS
3355;; the GP. Store the result in operand 0 and set the flags in the same
3356;; way as for PTEST.
915d28fe
RS
3357(define_insn "*<optab><mode>3_cc"
3358 [(set (reg:CC_NZC CC_REGNUM)
3359 (unspec:CC_NZC
34467289
RS
3360 [(match_operand:VNx16BI 1 "register_operand" "Upa")
3361 (match_operand 4)
3362 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe
RS
3363 (and:PRED_ALL
3364 (LOGICAL:PRED_ALL
3365 (match_operand:PRED_ALL 2 "register_operand" "Upa")
3366 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
34467289
RS
3367 (match_dup 4))]
3368 UNSPEC_PTEST))
915d28fe
RS
3369 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3370 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
34467289 3371 (match_dup 4)))]
915d28fe
RS
3372 "TARGET_SVE"
3373 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
3374)
3375
3376;; -------------------------------------------------------------------------
3377;; ---- [PRED] Binary logical operations (inverted second input)
3378;; -------------------------------------------------------------------------
3379;; Includes:
3380;; - BIC
3381;; - ORN
3382;; -------------------------------------------------------------------------
3383
3384;; Predicated predicate BIC and ORN.
3385(define_insn "*<nlogical><mode>3"
3386 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3387 (and:PRED_ALL
3388 (NLOGICAL:PRED_ALL
35d6c591
RS
3389 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
3390 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
915d28fe
RS
3391 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3392 "TARGET_SVE"
35d6c591 3393 "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
915d28fe
RS
3394)
3395
3396;; -------------------------------------------------------------------------
3397;; ---- [PRED] Binary logical operations (inverted result)
3398;; -------------------------------------------------------------------------
3399;; Includes:
3400;; - NAND
3401;; - NOR
3402;; -------------------------------------------------------------------------
3403
3404;; Predicated predicate NAND and NOR.
3405(define_insn "*<logical_nn><mode>3"
3406 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3407 (and:PRED_ALL
3408 (NLOGICAL:PRED_ALL
3409 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3410 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
3411 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3412 "TARGET_SVE"
3413 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
3414)
3415
3416;; =========================================================================
3417;; == Ternary arithmetic
3418;; =========================================================================
3419
3420;; -------------------------------------------------------------------------
3421;; ---- [INT] MLA and MAD
3422;; -------------------------------------------------------------------------
3423;; Includes:
3424;; - MAD
3425;; - MLA
3426;; -------------------------------------------------------------------------
3427
b6c3aea1
RS
3428;; Unpredicated integer addition of product.
3429(define_expand "fma<mode>4"
3430 [(set (match_operand:SVE_I 0 "register_operand")
3431 (plus:SVE_I
3432 (unspec:SVE_I
3433 [(match_dup 4)
3434 (mult:SVE_I (match_operand:SVE_I 1 "register_operand")
3435 (match_operand:SVE_I 2 "nonmemory_operand"))]
3436 UNSPEC_PRED_X)
3437 (match_operand:SVE_I 3 "register_operand")))]
3438 "TARGET_SVE"
3439 {
3440 if (aarch64_prepare_sve_int_fma (operands, PLUS))
3441 DONE;
3442 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
3443 }
3444)
3445
915d28fe 3446;; Predicated integer addition of product.
b6c3aea1 3447(define_insn "*fma<mode>4"
915d28fe
RS
3448 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
3449 (plus:SVE_I
3450 (unspec:SVE_I
3451 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3452 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
3453 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
06308276 3454 UNSPEC_PRED_X)
915d28fe 3455 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
a08acce8
RH
3456 "TARGET_SVE"
3457 "@
915d28fe
RS
3458 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3459 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3460 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
3461 [(set_attr "movprfx" "*,*,yes")]
a08acce8
RH
3462)
3463
b6c3aea1
RS
3464;; Predicated integer addition of product with merging.
3465(define_expand "cond_fma<mode>"
3466 [(set (match_operand:SVE_I 0 "register_operand")
3467 (unspec:SVE_I
3468 [(match_operand:<VPRED> 1 "register_operand")
3469 (plus:SVE_I
3470 (mult:SVE_I (match_operand:SVE_I 2 "register_operand")
3471 (match_operand:SVE_I 3 "general_operand"))
3472 (match_operand:SVE_I 4 "register_operand"))
3473 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
3474 UNSPEC_SEL))]
3475 "TARGET_SVE"
3476 {
3477 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
3478 DONE;
3479 /* Swap the multiplication operands if the fallback value is the
3480 second of the two. */
3481 if (rtx_equal_p (operands[3], operands[5]))
3482 std::swap (operands[2], operands[3]);
3483 }
3484)
3485
3486;; Predicated integer addition of product, merging with the first input.
3487(define_insn "*cond_fma<mode>_2"
3488 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3489 (unspec:SVE_I
3490 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3491 (plus:SVE_I
3492 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "0, w")
3493 (match_operand:SVE_I 3 "register_operand" "w, w"))
3494 (match_operand:SVE_I 4 "register_operand" "w, w"))
3495 (match_dup 2)]
3496 UNSPEC_SEL))]
3497 "TARGET_SVE"
3498 "@
3499 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3500 movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
3501 [(set_attr "movprfx" "*,yes")]
3502)
3503
3504;; Predicated integer addition of product, merging with the third input.
3505(define_insn "*cond_fma<mode>_4"
3506 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3507 (unspec:SVE_I
3508 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3509 (plus:SVE_I
3510 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w")
3511 (match_operand:SVE_I 3 "register_operand" "w, w"))
3512 (match_operand:SVE_I 4 "register_operand" "0, w"))
3513 (match_dup 4)]
3514 UNSPEC_SEL))]
3515 "TARGET_SVE"
3516 "@
3517 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3518 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
3519 [(set_attr "movprfx" "*,yes")]
3520)
3521
3522;; Predicated integer addition of product, merging with an independent value.
3523(define_insn_and_rewrite "*cond_fma<mode>_any"
3524 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
3525 (unspec:SVE_I
3526 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
3527 (plus:SVE_I
3528 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
3529 (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))
3530 (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w"))
3531 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
3532 UNSPEC_SEL))]
3533 "TARGET_SVE
3534 && !rtx_equal_p (operands[2], operands[5])
3535 && !rtx_equal_p (operands[3], operands[5])
3536 && !rtx_equal_p (operands[4], operands[5])"
3537 "@
3538 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3539 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3540 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3541 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
3542 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3543 #"
3544 "&& reload_completed
3545 && register_operand (operands[5], <MODE>mode)
3546 && !rtx_equal_p (operands[0], operands[5])"
3547 {
3548 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
3549 operands[5], operands[1]));
3550 operands[5] = operands[4] = operands[0];
3551 }
3552 [(set_attr "movprfx" "yes")]
3553)
3554
915d28fe
RS
3555;; -------------------------------------------------------------------------
3556;; ---- [INT] MLS and MSB
3557;; -------------------------------------------------------------------------
3558;; Includes:
3559;; - MLS
3560;; - MSB
3561;; -------------------------------------------------------------------------
3562
b6c3aea1
RS
3563;; Unpredicated integer subtraction of product.
3564(define_expand "fnma<mode>4"
3565 [(set (match_operand:SVE_I 0 "register_operand")
3566 (minus:SVE_I
3567 (match_operand:SVE_I 3 "register_operand")
3568 (unspec:SVE_I
3569 [(match_dup 4)
3570 (mult:SVE_I (match_operand:SVE_I 1 "register_operand")
3571 (match_operand:SVE_I 2 "general_operand"))]
3572 UNSPEC_PRED_X)))]
3573 "TARGET_SVE"
3574 {
3575 if (aarch64_prepare_sve_int_fma (operands, MINUS))
3576 DONE;
3577 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
3578 }
3579)
3580
915d28fe 3581;; Predicated integer subtraction of product.
b6c3aea1 3582(define_insn "*fnma<mode>3"
915d28fe
RS
3583 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
3584 (minus:SVE_I
3585 (match_operand:SVE_I 4 "register_operand" "w, 0, w")
3586 (unspec:SVE_I
3587 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3588 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
3589 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
06308276 3590 UNSPEC_PRED_X)))]
915d28fe
RS
3591 "TARGET_SVE"
3592 "@
3593 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3594 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3595 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
3596 [(set_attr "movprfx" "*,*,yes")]
3597)
3598
b6c3aea1
RS
3599;; Predicated integer subtraction of product with merging.
3600(define_expand "cond_fnma<mode>"
3601 [(set (match_operand:SVE_I 0 "register_operand")
3602 (unspec:SVE_I
3603 [(match_operand:<VPRED> 1 "register_operand")
3604 (minus:SVE_I
3605 (match_operand:SVE_I 4 "register_operand")
3606 (mult:SVE_I (match_operand:SVE_I 2 "register_operand")
3607 (match_operand:SVE_I 3 "general_operand")))
3608 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
3609 UNSPEC_SEL))]
3610 "TARGET_SVE"
3611 {
3612 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
3613 DONE;
3614 /* Swap the multiplication operands if the fallback value is the
3615 second of the two. */
3616 if (rtx_equal_p (operands[3], operands[5]))
3617 std::swap (operands[2], operands[3]);
3618 }
3619)
3620
3621;; Predicated integer subtraction of product, merging with the first input.
3622(define_insn "*cond_fnma<mode>_2"
3623 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3624 (unspec:SVE_I
3625 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3626 (minus:SVE_I
3627 (match_operand:SVE_I 4 "register_operand" "w, w")
3628 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "0, w")
3629 (match_operand:SVE_I 3 "register_operand" "w, w")))
3630 (match_dup 2)]
3631 UNSPEC_SEL))]
3632 "TARGET_SVE"
3633 "@
3634 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3635 movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
3636 [(set_attr "movprfx" "*,yes")]
3637)
3638
3639;; Predicated integer subtraction of product, merging with the third input.
3640(define_insn "*cond_fnma<mode>_4"
3641 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3642 (unspec:SVE_I
3643 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3644 (minus:SVE_I
3645 (match_operand:SVE_I 4 "register_operand" "0, w")
3646 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w")
3647 (match_operand:SVE_I 3 "register_operand" "w, w")))
3648 (match_dup 4)]
3649 UNSPEC_SEL))]
3650 "TARGET_SVE"
3651 "@
3652 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3653 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
3654 [(set_attr "movprfx" "*,yes")]
3655)
3656
3657;; Predicated integer subtraction of product, merging with an
3658;; independent value.
3659(define_insn_and_rewrite "*cond_fnma<mode>_any"
3660 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
3661 (unspec:SVE_I
3662 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
3663 (minus:SVE_I
3664 (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")
3665 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
3666 (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")))
3667 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
3668 UNSPEC_SEL))]
3669 "TARGET_SVE
3670 && !rtx_equal_p (operands[2], operands[5])
3671 && !rtx_equal_p (operands[3], operands[5])
3672 && !rtx_equal_p (operands[4], operands[5])"
3673 "@
3674 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3675 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3676 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3677 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
3678 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3679 #"
3680 "&& reload_completed
3681 && register_operand (operands[5], <MODE>mode)
3682 && !rtx_equal_p (operands[0], operands[5])"
3683 {
3684 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
3685 operands[5], operands[1]));
3686 operands[5] = operands[4] = operands[0];
3687 }
3688 [(set_attr "movprfx" "yes")]
3689)
3690
915d28fe
RS
3691;; -------------------------------------------------------------------------
3692;; ---- [INT] Dot product
3693;; -------------------------------------------------------------------------
3694;; Includes:
3695;; - SDOT
3696;; - UDOT
3697;; -------------------------------------------------------------------------
3698
3699;; Four-element integer dot-product with accumulation.
3700(define_insn "<sur>dot_prod<vsi2qi>"
a08acce8 3701 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
915d28fe
RS
3702 (plus:SVE_SDI
3703 (unspec:SVE_SDI
3704 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
3705 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
3706 DOTPROD)
3707 (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
a08acce8
RH
3708 "TARGET_SVE"
3709 "@
915d28fe
RS
3710 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
3711 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
a08acce8
RH
3712 [(set_attr "movprfx" "*,yes")]
3713)
3714
915d28fe
RS
3715;; -------------------------------------------------------------------------
3716;; ---- [INT] Sum of absolute differences
3717;; -------------------------------------------------------------------------
3718;; The patterns in this section are synthetic.
3719;; -------------------------------------------------------------------------
3720
3721;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
3722;; operands 1 and 2. The sequence also has to perform a widening reduction of
3723;; the difference into a vector and accumulate that into operand 3 before
3724;; copying that into the result operand 0.
3725;; Perform that with a sequence of:
3726;; MOV ones.b, #1
3727;; [SU]ABD diff.b, p0/m, op1.b, op2.b
3728;; MOVPRFX op0, op3 // If necessary
3729;; UDOT op0.s, diff.b, ones.b
3730(define_expand "<sur>sad<vsi2qi>"
3731 [(use (match_operand:SVE_SDI 0 "register_operand"))
3732 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
3733 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
3734 (use (match_operand:SVE_SDI 3 "register_operand"))]
3735 "TARGET_SVE"
3736 {
3737 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
3738 rtx diff = gen_reg_rtx (<VSI2QI>mode);
3739 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
3740 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
3741 DONE;
3742 }
3743)
3744
3745;; -------------------------------------------------------------------------
3746;; ---- [FP] General ternary arithmetic corresponding to unspecs
3747;; -------------------------------------------------------------------------
3748;; Includes merging patterns for:
3749;; - FMAD
3750;; - FMLA
3751;; - FMLS
3752;; - FMSB
3753;; - FNMAD
3754;; - FNMLA
3755;; - FNMLS
3756;; - FNMSB
3757;; -------------------------------------------------------------------------
3758
0d80d083
RS
3759;; Unpredicated floating-point ternary operations.
3760(define_expand "<optab><mode>4"
3761 [(set (match_operand:SVE_F 0 "register_operand")
3762 (unspec:SVE_F
3763 [(match_dup 4)
c9c5a809 3764 (const_int SVE_RELAXED_GP)
0d80d083
RS
3765 (match_operand:SVE_F 1 "register_operand")
3766 (match_operand:SVE_F 2 "register_operand")
3767 (match_operand:SVE_F 3 "register_operand")]
3768 SVE_COND_FP_TERNARY))]
3769 "TARGET_SVE"
3770 {
3771 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
3772 }
3773)
3774
3775;; Predicated floating-point ternary operations.
3776(define_insn "*<optab><mode>4"
3777 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
3778 (unspec:SVE_F
3779 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
c9c5a809 3780 (match_operand:SI 5 "aarch64_sve_gp_strictness")
0d80d083
RS
3781 (match_operand:SVE_F 2 "register_operand" "%w, 0, w")
3782 (match_operand:SVE_F 3 "register_operand" "w, w, w")
3783 (match_operand:SVE_F 4 "register_operand" "0, w, w")]
3784 SVE_COND_FP_TERNARY))]
3785 "TARGET_SVE"
3786 "@
3787 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3788 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3789 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
3790 [(set_attr "movprfx" "*,*,yes")]
3791)
3792
915d28fe
RS
3793;; Predicated floating-point ternary operations with merging.
3794(define_expand "cond_<optab><mode>"
3795 [(set (match_operand:SVE_F 0 "register_operand")
3796 (unspec:SVE_F
3797 [(match_operand:<VPRED> 1 "register_operand")
3798 (unspec:SVE_F
0d80d083 3799 [(match_dup 1)
c9c5a809 3800 (const_int SVE_STRICT_GP)
0d80d083 3801 (match_operand:SVE_F 2 "register_operand")
915d28fe
RS
3802 (match_operand:SVE_F 3 "register_operand")
3803 (match_operand:SVE_F 4 "register_operand")]
3804 SVE_COND_FP_TERNARY)
3805 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
3806 UNSPEC_SEL))]
3807 "TARGET_SVE"
3808{
3809 /* Swap the multiplication operands if the fallback value is the
3810 second of the two. */
3811 if (rtx_equal_p (operands[3], operands[5]))
3812 std::swap (operands[2], operands[3]);
3813})
3814
3815;; Predicated floating-point ternary operations, merging with the
3816;; first input.
c9c5a809 3817(define_insn_and_rewrite "*cond_<optab><mode>_2"
915d28fe
RS
3818 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
3819 (unspec:SVE_F
a08acce8 3820 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
915d28fe 3821 (unspec:SVE_F
c9c5a809
RS
3822 [(match_operand 5)
3823 (match_operand:SI 6 "aarch64_sve_gp_strictness")
0d80d083 3824 (match_operand:SVE_F 2 "register_operand" "0, w")
915d28fe
RS
3825 (match_operand:SVE_F 3 "register_operand" "w, w")
3826 (match_operand:SVE_F 4 "register_operand" "w, w")]
3827 SVE_COND_FP_TERNARY)
3828 (match_dup 2)]
a08acce8 3829 UNSPEC_SEL))]
c9c5a809 3830 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
a08acce8 3831 "@
915d28fe
RS
3832 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3833 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
c9c5a809
RS
3834 "&& !rtx_equal_p (operands[1], operands[5])"
3835 {
3836 operands[5] = copy_rtx (operands[1]);
3837 }
a08acce8
RH
3838 [(set_attr "movprfx" "*,yes")]
3839)
3840
915d28fe
RS
3841;; Predicated floating-point ternary operations, merging with the
3842;; third input.
c9c5a809 3843(define_insn_and_rewrite "*cond_<optab><mode>_4"
915d28fe
RS
3844 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
3845 (unspec:SVE_F
a08acce8 3846 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
915d28fe 3847 (unspec:SVE_F
c9c5a809
RS
3848 [(match_operand 5)
3849 (match_operand:SI 6 "aarch64_sve_gp_strictness")
0d80d083 3850 (match_operand:SVE_F 2 "register_operand" "w, w")
915d28fe
RS
3851 (match_operand:SVE_F 3 "register_operand" "w, w")
3852 (match_operand:SVE_F 4 "register_operand" "0, w")]
3853 SVE_COND_FP_TERNARY)
3854 (match_dup 4)]
a08acce8 3855 UNSPEC_SEL))]
c9c5a809 3856 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
a08acce8 3857 "@
915d28fe
RS
3858 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3859 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
c9c5a809
RS
3860 "&& !rtx_equal_p (operands[1], operands[5])"
3861 {
3862 operands[5] = copy_rtx (operands[1]);
3863 }
a08acce8
RH
3864 [(set_attr "movprfx" "*,yes")]
3865)
3866
915d28fe
RS
3867;; Predicated floating-point ternary operations, merging with an
3868;; independent value.
f4fde1b3 3869(define_insn_and_rewrite "*cond_<optab><mode>_any"
432b29c1 3870 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
915d28fe 3871 (unspec:SVE_F
432b29c1 3872 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
915d28fe 3873 (unspec:SVE_F
c9c5a809
RS
3874 [(match_operand 6)
3875 (match_operand:SI 7 "aarch64_sve_gp_strictness")
432b29c1
RS
3876 (match_operand:SVE_F 2 "register_operand" "w, w, 0, w, w, w")
3877 (match_operand:SVE_F 3 "register_operand" "w, w, w, 0, w, w")
3878 (match_operand:SVE_F 4 "register_operand" "w, 0, w, w, w, w")]
915d28fe 3879 SVE_COND_FP_TERNARY)
432b29c1 3880 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
0d2b3bca 3881 UNSPEC_SEL))]
f4fde1b3 3882 "TARGET_SVE
915d28fe
RS
3883 && !rtx_equal_p (operands[2], operands[5])
3884 && !rtx_equal_p (operands[3], operands[5])
c9c5a809
RS
3885 && !rtx_equal_p (operands[4], operands[5])
3886 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
32cf949c 3887 "@
915d28fe 3888 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
432b29c1
RS
3889 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3890 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3891 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
915d28fe 3892 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
32cf949c 3893 #"
c9c5a809 3894 "&& 1"
f4fde1b3 3895 {
c9c5a809
RS
3896 if (reload_completed
3897 && register_operand (operands[5], <MODE>mode)
3898 && !rtx_equal_p (operands[0], operands[5]))
3899 {
3900 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
3901 operands[5], operands[1]));
3902 operands[5] = operands[4] = operands[0];
3903 }
3904 else if (!rtx_equal_p (operands[1], operands[6]))
3905 operands[6] = copy_rtx (operands[1]);
3906 else
3907 FAIL;
f4fde1b3 3908 }
32cf949c 3909 [(set_attr "movprfx" "yes")]
0d2b3bca
RS
3910)
3911
915d28fe
RS
3912;; =========================================================================
3913;; == Comparisons and selects
3914;; =========================================================================
3915
3916;; -------------------------------------------------------------------------
3917;; ---- [INT,FP] Select based on predicates
3918;; -------------------------------------------------------------------------
3919;; Includes merging patterns for:
d29f7dd5 3920;; - FMOV
915d28fe
RS
3921;; - MOV
3922;; - SEL
3923;; -------------------------------------------------------------------------
3924
3925;; vcond_mask operand order: true, false, mask
3926;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
3927;; SEL operand order: mask, true, false
d29f7dd5
RS
3928(define_expand "vcond_mask_<mode><vpred>"
3929 [(set (match_operand:SVE_ALL 0 "register_operand")
915d28fe 3930 (unspec:SVE_ALL
d29f7dd5
RS
3931 [(match_operand:<VPRED> 3 "register_operand")
3932 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
3933 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
915d28fe
RS
3934 UNSPEC_SEL))]
3935 "TARGET_SVE"
d29f7dd5
RS
3936 {
3937 if (register_operand (operands[1], <MODE>mode))
3938 operands[2] = force_reg (<MODE>mode, operands[2]);
3939 }
915d28fe
RS
3940)
3941
d29f7dd5
RS
3942;; Selects between:
3943;; - two registers
3944;; - a duplicated immediate and a register
3945;; - a duplicated immediate and zero
3946(define_insn "*vcond_mask_<mode><vpred>"
3947 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
3948 (unspec:SVE_ALL
3949 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
3950 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
3951 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
915d28fe 3952 UNSPEC_SEL))]
d29f7dd5
RS
3953 "TARGET_SVE
3954 && (!register_operand (operands[1], <MODE>mode)
3955 || register_operand (operands[2], <MODE>mode))"
3956 "@
3957 sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
3958 mov\t%0.<Vetype>, %3/m, #%I1
3959 mov\t%0.<Vetype>, %3/z, #%I1
3960 fmov\t%0.<Vetype>, %3/m, #%1
3961 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
3962 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
3963 movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
3964 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
43cacb12
RS
3965)
3966
88a37c4d
RS
3967;; Optimize selects between a duplicated scalar variable and another vector,
3968;; the latter of which can be a zero constant or a variable. Treat duplicates
3969;; of GPRs as being more expensive than duplicates of FPRs, since they
3970;; involve a cross-file move.
3971(define_insn "*aarch64_sel_dup<mode>"
3972 [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
3973 (unspec:SVE_ALL
3974 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl")
3975 (vec_duplicate:SVE_ALL
3976 (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
3977 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
3978 UNSPEC_SEL))]
3979 "TARGET_SVE"
3980 "@
3981 mov\t%0.<Vetype>, %3/m, %<vwcore>1
3982 mov\t%0.<Vetype>, %3/m, %<Vetype>1
3983 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
3984 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
3985 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
3986 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
3987 [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
3988)
3989
915d28fe
RS
3990;; -------------------------------------------------------------------------
3991;; ---- [INT,FP] Compare and select
3992;; -------------------------------------------------------------------------
3993;; The patterns in this section are synthetic.
3994;; -------------------------------------------------------------------------
43cacb12 3995
915d28fe
RS
3996;; Integer (signed) vcond. Don't enforce an immediate range here, since it
3997;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
3998(define_expand "vcond<mode><v_int_equiv>"
3999 [(set (match_operand:SVE_ALL 0 "register_operand")
4000 (if_then_else:SVE_ALL
4001 (match_operator 3 "comparison_operator"
4002 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
4003 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
d29f7dd5
RS
4004 (match_operand:SVE_ALL 1 "nonmemory_operand")
4005 (match_operand:SVE_ALL 2 "nonmemory_operand")))]
898f07b0
RS
4006 "TARGET_SVE"
4007 {
915d28fe
RS
4008 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
4009 DONE;
898f07b0
RS
4010 }
4011)
4012
915d28fe
RS
4013;; Integer vcondu. Don't enforce an immediate range here, since it
4014;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
4015(define_expand "vcondu<mode><v_int_equiv>"
4016 [(set (match_operand:SVE_ALL 0 "register_operand")
4017 (if_then_else:SVE_ALL
4018 (match_operator 3 "comparison_operator"
4019 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
4020 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
d29f7dd5
RS
4021 (match_operand:SVE_ALL 1 "nonmemory_operand")
4022 (match_operand:SVE_ALL 2 "nonmemory_operand")))]
898f07b0 4023 "TARGET_SVE"
915d28fe
RS
4024 {
4025 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
4026 DONE;
4027 }
898f07b0
RS
4028)
4029
915d28fe
RS
4030;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
4031;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
4032(define_expand "vcond<mode><v_fp_equiv>"
a70965b1
RS
4033 [(set (match_operand:SVE_HSD 0 "register_operand")
4034 (if_then_else:SVE_HSD
915d28fe
RS
4035 (match_operator 3 "comparison_operator"
4036 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
4037 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
d29f7dd5
RS
4038 (match_operand:SVE_HSD 1 "nonmemory_operand")
4039 (match_operand:SVE_HSD 2 "nonmemory_operand")))]
b781a135
RS
4040 "TARGET_SVE"
4041 {
915d28fe
RS
4042 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
4043 DONE;
b781a135
RS
4044 }
4045)
4046
915d28fe
RS
4047;; -------------------------------------------------------------------------
4048;; ---- [INT] Comparisons
4049;; -------------------------------------------------------------------------
4050;; Includes merging patterns for:
4051;; - CMPEQ
4052;; - CMPGE
4053;; - CMPGT
4054;; - CMPHI
4055;; - CMPHS
4056;; - CMPLE
4057;; - CMPLO
4058;; - CMPLS
4059;; - CMPLT
4060;; - CMPNE
4061;; -------------------------------------------------------------------------
b781a135 4062
915d28fe
RS
4063;; Signed integer comparisons. Don't enforce an immediate range here, since
4064;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
4065;; instead.
4066(define_expand "vec_cmp<mode><vpred>"
4067 [(parallel
4068 [(set (match_operand:<VPRED> 0 "register_operand")
4069 (match_operator:<VPRED> 1 "comparison_operator"
4070 [(match_operand:SVE_I 2 "register_operand")
4071 (match_operand:SVE_I 3 "nonmemory_operand")]))
4072 (clobber (reg:CC_NZC CC_REGNUM))])]
b781a135 4073 "TARGET_SVE"
915d28fe
RS
4074 {
4075 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
4076 operands[2], operands[3]);
4077 DONE;
4078 }
b781a135
RS
4079)
4080
915d28fe
RS
4081;; Unsigned integer comparisons. Don't enforce an immediate range here, since
4082;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
4083;; instead.
4084(define_expand "vec_cmpu<mode><vpred>"
4085 [(parallel
4086 [(set (match_operand:<VPRED> 0 "register_operand")
4087 (match_operator:<VPRED> 1 "comparison_operator"
4088 [(match_operand:SVE_I 2 "register_operand")
4089 (match_operand:SVE_I 3 "nonmemory_operand")]))
4090 (clobber (reg:CC_NZC CC_REGNUM))])]
43cacb12
RS
4091 "TARGET_SVE"
4092 {
915d28fe
RS
4093 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
4094 operands[2], operands[3]);
4095 DONE;
43cacb12
RS
4096 }
4097)
4098
00fa90d9
RS
4099;; Predicated integer comparisons.
4100(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
915d28fe
RS
4101 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
4102 (unspec:<VPRED>
4103 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
00fa90d9 4104 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
915d28fe 4105 (SVE_INT_CMP:<VPRED>
00fa90d9
RS
4106 (match_operand:SVE_I 3 "register_operand" "w, w")
4107 (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
4108 UNSPEC_PRED_Z))
915d28fe 4109 (clobber (reg:CC_NZC CC_REGNUM))]
43cacb12
RS
4110 "TARGET_SVE"
4111 "@
00fa90d9
RS
4112 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
4113 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
43cacb12
RS
4114)
4115
00fa90d9
RS
4116;; Predicated integer comparisons in which both the flag and predicate
4117;; results are interesting.
4118(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
915d28fe
RS
4119 [(set (reg:CC_NZC CC_REGNUM)
4120 (unspec:CC_NZC
34467289
RS
4121 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
4122 (match_operand 4)
4123 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe 4124 (unspec:<VPRED>
00fa90d9
RS
4125 [(match_operand 6)
4126 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
915d28fe
RS
4127 (SVE_INT_CMP:<VPRED>
4128 (match_operand:SVE_I 2 "register_operand" "w, w")
4129 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
00fa90d9 4130 UNSPEC_PRED_Z)]
34467289 4131 UNSPEC_PTEST))
915d28fe
RS
4132 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
4133 (unspec:<VPRED>
00fa90d9
RS
4134 [(match_dup 6)
4135 (match_dup 7)
915d28fe
RS
4136 (SVE_INT_CMP:<VPRED>
4137 (match_dup 2)
4138 (match_dup 3))]
00fa90d9
RS
4139 UNSPEC_PRED_Z))]
4140 "TARGET_SVE
4141 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
915d28fe
RS
4142 "@
4143 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
4144 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
00fa90d9
RS
4145 "&& !rtx_equal_p (operands[4], operands[6])"
4146 {
4147 operands[6] = copy_rtx (operands[4]);
4148 operands[7] = operands[5];
4149 }
43cacb12
RS
4150)
4151
00fa90d9
RS
4152;; Predicated integer comparisons in which only the flags result is
4153;; interesting.
4154(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
915d28fe
RS
4155 [(set (reg:CC_NZC CC_REGNUM)
4156 (unspec:CC_NZC
34467289
RS
4157 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
4158 (match_operand 4)
4159 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe 4160 (unspec:<VPRED>
00fa90d9
RS
4161 [(match_operand 6)
4162 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
915d28fe
RS
4163 (SVE_INT_CMP:<VPRED>
4164 (match_operand:SVE_I 2 "register_operand" "w, w")
4165 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
00fa90d9 4166 UNSPEC_PRED_Z)]
34467289 4167 UNSPEC_PTEST))
915d28fe 4168 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
00fa90d9
RS
4169 "TARGET_SVE
4170 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
43cacb12 4171 "@
915d28fe
RS
4172 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
4173 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
00fa90d9
RS
4174 "&& !rtx_equal_p (operands[4], operands[6])"
4175 {
4176 operands[6] = copy_rtx (operands[4]);
4177 operands[7] = operands[5];
4178 }
43cacb12
RS
4179)
4180
915d28fe
RS
4181;; Predicated integer comparisons, formed by combining a PTRUE-predicated
4182;; comparison with an AND. Split the instruction into its preferred form
00fa90d9
RS
4183;; at the earliest opportunity, in order to get rid of the redundant
4184;; operand 4.
4185(define_insn_and_split "*cmp<cmp_op><mode>_and"
915d28fe 4186 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
00fa90d9
RS
4187 (and:<VPRED>
4188 (unspec:<VPRED>
4189 [(match_operand 4)
4190 (const_int SVE_KNOWN_PTRUE)
4191 (SVE_INT_CMP:<VPRED>
4192 (match_operand:SVE_I 2 "register_operand" "w, w")
4193 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
4194 UNSPEC_PRED_Z)
4195 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
915d28fe
RS
4196 (clobber (reg:CC_NZC CC_REGNUM))]
4197 "TARGET_SVE"
4198 "#"
4199 "&& 1"
4200 [(parallel
4201 [(set (match_dup 0)
00fa90d9
RS
4202 (unspec:<VPRED>
4203 [(match_dup 1)
4204 (const_int SVE_MAYBE_NOT_PTRUE)
4205 (SVE_INT_CMP:<VPRED>
4206 (match_dup 2)
4207 (match_dup 3))]
4208 UNSPEC_PRED_Z))
915d28fe 4209 (clobber (reg:CC_NZC CC_REGNUM))])]
43cacb12
RS
4210)
4211
915d28fe
RS
4212;; -------------------------------------------------------------------------
4213;; ---- [INT] While tests
4214;; -------------------------------------------------------------------------
4215;; Includes:
4216;; - WHILELO
4217;; -------------------------------------------------------------------------
740c1ed7 4218
915d28fe
RS
4219;; Set element I of the result if operand1 + J < operand2 for all J in [0, I],
4220;; with the comparison being unsigned.
0b1fe8cf 4221(define_insn "@while_ult<GPI:mode><PRED_ALL:mode>"
915d28fe
RS
4222 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
4223 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
4224 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
4225 UNSPEC_WHILE_LO))
4226 (clobber (reg:CC_NZC CC_REGNUM))]
43cacb12 4227 "TARGET_SVE"
915d28fe 4228 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
43cacb12
RS
4229)
4230
915d28fe 4231;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
34467289
RS
4232;; Handle the case in which both results are useful. The GP operands
4233;; to the PTEST aren't needed, so we allow them to be anything.
915d28fe
RS
4234(define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
4235 [(set (reg:CC_NZC CC_REGNUM)
4236 (unspec:CC_NZC
34467289
RS
4237 [(match_operand 3)
4238 (match_operand 4)
4239 (const_int SVE_KNOWN_PTRUE)
915d28fe 4240 (unspec:PRED_ALL
34467289
RS
4241 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
4242 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
915d28fe 4243 UNSPEC_WHILE_LO)]
34467289 4244 UNSPEC_PTEST))
915d28fe 4245 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
34467289
RS
4246 (unspec:PRED_ALL [(match_dup 1)
4247 (match_dup 2)]
915d28fe 4248 UNSPEC_WHILE_LO))]
43cacb12 4249 "TARGET_SVE"
34467289 4250 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
915d28fe
RS
4251 ;; Force the compiler to drop the unused predicate operand, so that we
4252 ;; don't have an unnecessary PTRUE.
34467289 4253 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
915d28fe 4254 {
34467289
RS
4255 operands[3] = CONSTM1_RTX (VNx16BImode);
4256 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
915d28fe 4257 }
43cacb12
RS
4258)
4259
915d28fe 4260;; -------------------------------------------------------------------------
42b4e87d 4261;; ---- [FP] Direct comparisons
915d28fe
RS
4262;; -------------------------------------------------------------------------
4263;; Includes:
4264;; - FCMEQ
4265;; - FCMGE
4266;; - FCMGT
4267;; - FCMLE
4268;; - FCMLT
4269;; - FCMNE
4270;; - FCMUO
4271;; -------------------------------------------------------------------------
4272
4273;; Floating-point comparisons. All comparisons except FCMUO allow a zero
4274;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
4275;; with zero.
4276(define_expand "vec_cmp<mode><vpred>"
4277 [(set (match_operand:<VPRED> 0 "register_operand")
4278 (match_operator:<VPRED> 1 "comparison_operator"
4279 [(match_operand:SVE_F 2 "register_operand")
4280 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
43cacb12
RS
4281 "TARGET_SVE"
4282 {
915d28fe
RS
4283 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
4284 operands[2], operands[3], false);
4285 DONE;
43cacb12
RS
4286 }
4287)
4288
4a942af6 4289;; Predicated floating-point comparisons.
915d28fe
RS
4290(define_insn "*fcm<cmp_op><mode>"
4291 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
4292 (unspec:<VPRED>
4293 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4a942af6
RS
4294 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
4295 (match_operand:SVE_F 2 "register_operand" "w, w")
4296 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
4297 SVE_COND_FP_CMP_I0))]
43cacb12
RS
4298 "TARGET_SVE"
4299 "@
915d28fe
RS
4300 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
4301 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
4302)
4303
915d28fe
RS
4304;; Same for unordered comparisons.
4305(define_insn "*fcmuo<mode>"
4306 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
4307 (unspec:<VPRED>
4308 [(match_operand:<VPRED> 1 "register_operand" "Upl")
4a942af6
RS
4309 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
4310 (match_operand:SVE_F 2 "register_operand" "w")
4311 (match_operand:SVE_F 3 "register_operand" "w")]
4312 UNSPEC_COND_FCMUO))]
43cacb12 4313 "TARGET_SVE"
915d28fe 4314 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
4315)
4316
915d28fe
RS
4317;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
4318;; with another predicate P. This does not have the same trapping behavior
4319;; as predicating the comparison itself on P, but it's a legitimate fold,
4320;; since we can drop any potentially-trapping operations whose results
4321;; are not needed.
4322;;
4323;; Split the instruction into its preferred form (below) at the earliest
4324;; opportunity, in order to get rid of the redundant operand 1.
4325(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
4326 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
4327 (and:<VPRED>
4328 (unspec:<VPRED>
4329 [(match_operand:<VPRED> 1)
4a942af6
RS
4330 (const_int SVE_KNOWN_PTRUE)
4331 (match_operand:SVE_F 2 "register_operand" "w, w")
4332 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
4333 SVE_COND_FP_CMP_I0)
915d28fe 4334 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
43cacb12 4335 "TARGET_SVE"
915d28fe
RS
4336 "#"
4337 "&& 1"
4338 [(set (match_dup 0)
4a942af6
RS
4339 (unspec:<VPRED>
4340 [(match_dup 4)
4341 (const_int SVE_MAYBE_NOT_PTRUE)
4342 (match_dup 2)
4343 (match_dup 3)]
4344 SVE_COND_FP_CMP_I0))]
43cacb12
RS
4345)
4346
915d28fe
RS
4347;; Same for unordered comparisons.
4348(define_insn_and_split "*fcmuo<mode>_and_combine"
4349 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
4350 (and:<VPRED>
4351 (unspec:<VPRED>
4352 [(match_operand:<VPRED> 1)
4a942af6
RS
4353 (const_int SVE_KNOWN_PTRUE)
4354 (match_operand:SVE_F 2 "register_operand" "w")
4355 (match_operand:SVE_F 3 "register_operand" "w")]
4356 UNSPEC_COND_FCMUO)
915d28fe 4357 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
43cacb12 4358 "TARGET_SVE"
915d28fe
RS
4359 "#"
4360 "&& 1"
4361 [(set (match_dup 0)
915d28fe 4362 (unspec:<VPRED>
4a942af6
RS
4363 [(match_dup 4)
4364 (const_int SVE_MAYBE_NOT_PTRUE)
4365 (match_dup 2)
4366 (match_dup 3)]
4367 UNSPEC_COND_FCMUO))]
43cacb12
RS
4368)
4369
42b4e87d
RS
4370;; -------------------------------------------------------------------------
4371;; ---- [FP] Absolute comparisons
4372;; -------------------------------------------------------------------------
4373;; Includes:
4374;; - FACGE
4375;; - FACGT
4376;; - FACLE
4377;; - FACLT
4378;; -------------------------------------------------------------------------
4379
4380;; Predicated floating-point absolute comparisons.
4381(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
4382 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
4383 (unspec:<VPRED>
4384 [(match_operand:<VPRED> 1 "register_operand" "Upl")
4385 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
4386 (unspec:SVE_F
4387 [(match_operand 5)
4388 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4389 (match_operand:SVE_F 2 "register_operand" "w")]
4390 UNSPEC_COND_FABS)
4391 (unspec:SVE_F
4392 [(match_operand 7)
4393 (match_operand:SI 8 "aarch64_sve_gp_strictness")
4394 (match_operand:SVE_F 3 "register_operand" "w")]
4395 UNSPEC_COND_FABS)]
4396 SVE_COND_FP_ABS_CMP))]
4397 "TARGET_SVE
4398 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
4399 && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
4400 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
4401 "&& (!rtx_equal_p (operands[1], operands[5])
4402 || !rtx_equal_p (operands[1], operands[7]))"
4403 {
4404 operands[5] = copy_rtx (operands[1]);
4405 operands[7] = copy_rtx (operands[1]);
4406 }
4407)
4408
915d28fe
RS
4409;; -------------------------------------------------------------------------
4410;; ---- [PRED] Test bits
4411;; -------------------------------------------------------------------------
4412;; Includes:
4413;; - PTEST
4414;; -------------------------------------------------------------------------
4415
4416;; Branch based on predicate equality or inequality.
4417(define_expand "cbranch<mode>4"
4418 [(set (pc)
4419 (if_then_else
4420 (match_operator 0 "aarch64_equality_operator"
4421 [(match_operand:PRED_ALL 1 "register_operand")
4422 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
4423 (label_ref (match_operand 3 ""))
4424 (pc)))]
4425 ""
43cacb12 4426 {
34467289
RS
4427 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
4428 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
4429 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
915d28fe
RS
4430 rtx pred;
4431 if (operands[2] == CONST0_RTX (<MODE>mode))
4432 pred = operands[1];
4433 else
4434 {
4435 pred = gen_reg_rtx (<MODE>mode);
34467289
RS
4436 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
4437 operands[2]));
915d28fe 4438 }
34467289 4439 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
915d28fe
RS
4440 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
4441 operands[2] = const0_rtx;
43cacb12
RS
4442 }
4443)
4444
34467289
RS
4445;; See "Description of UNSPEC_PTEST" above for details.
4446(define_insn "aarch64_ptest<mode>"
915d28fe 4447 [(set (reg:CC_NZC CC_REGNUM)
34467289
RS
4448 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
4449 (match_operand 1)
4450 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
4451 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
4452 UNSPEC_PTEST))]
43cacb12 4453 "TARGET_SVE"
34467289 4454 "ptest\t%0, %3.b"
43cacb12
RS
4455)
4456
915d28fe
RS
4457;; =========================================================================
4458;; == Reductions
4459;; =========================================================================
4460
4461;; -------------------------------------------------------------------------
4462;; ---- [INT,FP] Conditional reductions
4463;; -------------------------------------------------------------------------
4464;; Includes:
4465;; - CLASTB
4466;; -------------------------------------------------------------------------
4467
4468;; Set operand 0 to the last active element in operand 3, or to tied
4469;; operand 1 if no elements are active.
4470(define_insn "fold_extract_last_<mode>"
801790b3 4471 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
915d28fe
RS
4472 (unspec:<VEL>
4473 [(match_operand:<VEL> 1 "register_operand" "0, 0")
4474 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
4475 (match_operand:SVE_ALL 3 "register_operand" "w, w")]
4476 UNSPEC_CLASTB))]
3db85990 4477 "TARGET_SVE"
915d28fe
RS
4478 "@
4479 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
801790b3 4480 clastb\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
3db85990
ST
4481)
4482
915d28fe
RS
4483;; -------------------------------------------------------------------------
4484;; ---- [INT] Tree reductions
4485;; -------------------------------------------------------------------------
4486;; Includes:
4487;; - ANDV
4488;; - EORV
4489;; - ORV
4490;; - SMAXV
4491;; - SMINV
4492;; - UADDV
4493;; - UMAXV
4494;; - UMINV
4495;; -------------------------------------------------------------------------
4496
4497;; Unpredicated integer add reduction.
4498(define_expand "reduc_plus_scal_<mode>"
4499 [(set (match_operand:<VEL> 0 "register_operand")
4500 (unspec:<VEL> [(match_dup 2)
4501 (match_operand:SVE_I 1 "register_operand")]
4502 UNSPEC_ADDV))]
43cacb12
RS
4503 "TARGET_SVE"
4504 {
16de3637 4505 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
4506 }
4507)
4508
915d28fe
RS
4509;; Predicated integer add reduction. The result is always 64-bits.
4510(define_insn "*reduc_plus_scal_<mode>"
4511 [(set (match_operand:<VEL> 0 "register_operand" "=w")
4512 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
4513 (match_operand:SVE_I 2 "register_operand" "w")]
4514 UNSPEC_ADDV))]
43cacb12 4515 "TARGET_SVE"
915d28fe 4516 "uaddv\t%d0, %1, %2.<Vetype>"
43cacb12
RS
4517)
4518
b0760a40 4519;; Unpredicated integer reductions.
915d28fe
RS
4520(define_expand "reduc_<optab>_scal_<mode>"
4521 [(set (match_operand:<VEL> 0 "register_operand")
4522 (unspec:<VEL> [(match_dup 2)
4523 (match_operand:SVE_I 1 "register_operand")]
b0760a40 4524 SVE_INT_REDUCTION))]
43cacb12 4525 "TARGET_SVE"
915d28fe
RS
4526 {
4527 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
4528 }
43cacb12
RS
4529)
4530
b0760a40 4531;; Predicated integer reductions.
915d28fe
RS
4532(define_insn "*reduc_<optab>_scal_<mode>"
4533 [(set (match_operand:<VEL> 0 "register_operand" "=w")
4534 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
4535 (match_operand:SVE_I 2 "register_operand" "w")]
b0760a40 4536 SVE_INT_REDUCTION))]
43cacb12 4537 "TARGET_SVE"
b0760a40 4538 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
4539)
4540
915d28fe
RS
4541;; -------------------------------------------------------------------------
4542;; ---- [FP] Tree reductions
4543;; -------------------------------------------------------------------------
4544;; Includes:
4545;; - FADDV
4546;; - FMAXNMV
4547;; - FMAXV
4548;; - FMINNMV
4549;; - FMINV
4550;; -------------------------------------------------------------------------
4551
b0760a40
RS
4552;; Unpredicated floating-point tree reductions.
4553(define_expand "reduc_<optab>_scal_<mode>"
915d28fe
RS
4554 [(set (match_operand:<VEL> 0 "register_operand")
4555 (unspec:<VEL> [(match_dup 2)
4556 (match_operand:SVE_F 1 "register_operand")]
b0760a40 4557 SVE_FP_REDUCTION))]
43cacb12 4558 "TARGET_SVE"
915d28fe
RS
4559 {
4560 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
4561 }
43cacb12
RS
4562)
4563
b0760a40
RS
4564;; Predicated floating-point tree reductions.
4565(define_insn "*reduc_<optab>_scal_<mode>"
915d28fe
RS
4566 [(set (match_operand:<VEL> 0 "register_operand" "=w")
4567 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
4568 (match_operand:SVE_F 2 "register_operand" "w")]
b0760a40 4569 SVE_FP_REDUCTION))]
43cacb12 4570 "TARGET_SVE"
b0760a40 4571 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
4572)
4573
915d28fe
RS
4574;; -------------------------------------------------------------------------
4575;; ---- [FP] Left-to-right reductions
4576;; -------------------------------------------------------------------------
4577;; Includes:
4578;; - FADDA
4579;; -------------------------------------------------------------------------
4580
4581;; Unpredicated in-order FP reductions.
4582(define_expand "fold_left_plus_<mode>"
4583 [(set (match_operand:<VEL> 0 "register_operand")
4584 (unspec:<VEL> [(match_dup 3)
4585 (match_operand:<VEL> 1 "register_operand")
4586 (match_operand:SVE_F 2 "register_operand")]
4587 UNSPEC_FADDA))]
43cacb12 4588 "TARGET_SVE"
915d28fe
RS
4589 {
4590 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4591 }
43cacb12
RS
4592)
4593
915d28fe
RS
4594;; Predicated in-order FP reductions.
4595(define_insn "mask_fold_left_plus_<mode>"
4596 [(set (match_operand:<VEL> 0 "register_operand" "=w")
4597 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
4598 (match_operand:<VEL> 1 "register_operand" "0")
4599 (match_operand:SVE_F 2 "register_operand" "w")]
4600 UNSPEC_FADDA))]
43cacb12 4601 "TARGET_SVE"
915d28fe 4602 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
43cacb12
RS
4603)
4604
915d28fe
RS
4605;; =========================================================================
4606;; == Permutes
4607;; =========================================================================
4608
4609;; -------------------------------------------------------------------------
4610;; ---- [INT,FP] General permutes
4611;; -------------------------------------------------------------------------
4612;; Includes:
4613;; - TBL
4614;; -------------------------------------------------------------------------
4615
4616(define_expand "vec_perm<mode>"
4617 [(match_operand:SVE_ALL 0 "register_operand")
4618 (match_operand:SVE_ALL 1 "register_operand")
4619 (match_operand:SVE_ALL 2 "register_operand")
4620 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
4621 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
9bfb28ed 4622 {
915d28fe
RS
4623 aarch64_expand_sve_vec_perm (operands[0], operands[1],
4624 operands[2], operands[3]);
9bfb28ed
RS
4625 DONE;
4626 }
4627)
4628
915d28fe
RS
4629(define_insn "*aarch64_sve_tbl<mode>"
4630 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
4631 (unspec:SVE_ALL
4632 [(match_operand:SVE_ALL 1 "register_operand" "w")
4633 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4634 UNSPEC_TBL))]
43cacb12 4635 "TARGET_SVE"
915d28fe 4636 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
43cacb12
RS
4637)
4638
915d28fe
RS
4639;; -------------------------------------------------------------------------
4640;; ---- [INT,FP] Special-purpose unary permutes
4641;; -------------------------------------------------------------------------
4642;; Includes:
4643;; - DUP
4644;; - REV
915d28fe
RS
4645;; -------------------------------------------------------------------------
4646
4647;; Duplicate one element of a vector.
4648(define_insn "*aarch64_sve_dup_lane<mode>"
4649 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
4650 (vec_duplicate:SVE_ALL
4651 (vec_select:<VEL>
4652 (match_operand:SVE_ALL 1 "register_operand" "w")
4653 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
4654 "TARGET_SVE
4655 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
4656 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
4657)
4658
4659;; Reverse the order of elements within a full vector.
4660(define_insn "@aarch64_sve_rev<mode>"
4661 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
4662 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
4663 UNSPEC_REV))]
9bfb28ed 4664 "TARGET_SVE"
915d28fe
RS
4665 "rev\t%0.<Vetype>, %1.<Vetype>")
4666
915d28fe
RS
4667;; -------------------------------------------------------------------------
4668;; ---- [INT,FP] Special-purpose binary permutes
4669;; -------------------------------------------------------------------------
4670;; Includes:
4671;; - TRN1
4672;; - TRN2
4673;; - UZP1
4674;; - UZP2
4675;; - ZIP1
4676;; - ZIP2
4677;; -------------------------------------------------------------------------
4678
4679;; Permutes that take half the elements from one vector and half the
4680;; elements from the other.
3e2751ce 4681(define_insn "aarch64_sve_<perm_insn><mode>"
915d28fe
RS
4682 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
4683 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
4684 (match_operand:SVE_ALL 2 "register_operand" "w")]
4685 PERMUTE))]
9bfb28ed 4686 "TARGET_SVE"
3e2751ce 4687 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
915d28fe
RS
4688)
4689
4690;; Concatenate two vectors and extract a subvector. Note that the
4691;; immediate (third) operand is the lane index not the byte index.
4692(define_insn "*aarch64_sve_ext<mode>"
06b3ba23
RS
4693 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
4694 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0, w")
4695 (match_operand:SVE_ALL 2 "register_operand" "w, w")
915d28fe
RS
4696 (match_operand:SI 3 "const_int_operand")]
4697 UNSPEC_EXT))]
4698 "TARGET_SVE
4699 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
9bfb28ed 4700 {
915d28fe 4701 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
06b3ba23
RS
4702 return (which_alternative == 0
4703 ? "ext\\t%0.b, %0.b, %2.b, #%3"
4704 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
43cacb12 4705 }
06b3ba23 4706 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
4707)
4708
915d28fe
RS
4709;; -------------------------------------------------------------------------
4710;; ---- [PRED] Special-purpose binary permutes
4711;; -------------------------------------------------------------------------
4712;; Includes:
4713;; - TRN1
4714;; - TRN2
4715;; - UZP1
4716;; - UZP2
4717;; - ZIP1
4718;; - ZIP2
4719;; -------------------------------------------------------------------------
4720
4721;; Permutes that take half the elements from one vector and half the
4722;; elements from the other.
2803bc3b 4723(define_insn "@aarch64_sve_<perm_insn><mode>"
915d28fe
RS
4724 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
4725 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
4726 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
4727 PERMUTE))]
43cacb12 4728 "TARGET_SVE"
3e2751ce 4729 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
43cacb12
RS
4730)
4731
915d28fe
RS
4732;; =========================================================================
4733;; == Conversions
4734;; =========================================================================
4735
4736;; -------------------------------------------------------------------------
4737;; ---- [INT<-INT] Packs
4738;; -------------------------------------------------------------------------
4739;; Includes:
4740;; - UZP1
4741;; -------------------------------------------------------------------------
4742
43cacb12
RS
4743;; Integer pack. Use UZP1 on the narrower type, which discards
4744;; the high part of each wide element.
4745(define_insn "vec_pack_trunc_<Vwide>"
4746 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
4747 (unspec:SVE_BHSI
4748 [(match_operand:<VWIDE> 1 "register_operand" "w")
4749 (match_operand:<VWIDE> 2 "register_operand" "w")]
4750 UNSPEC_PACK))]
4751 "TARGET_SVE"
4752 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4753)
4754
915d28fe
RS
4755;; -------------------------------------------------------------------------
4756;; ---- [INT<-INT] Unpacks
4757;; -------------------------------------------------------------------------
4758;; Includes:
4759;; - SUNPKHI
4760;; - SUNPKLO
4761;; - UUNPKHI
4762;; - UUNPKLO
4763;; -------------------------------------------------------------------------
4764
4765;; Unpack the low or high half of a vector, where "high" refers to
4766;; the low-numbered lanes for big-endian and the high-numbered lanes
4767;; for little-endian.
4768(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
4769 [(match_operand:<VWIDE> 0 "register_operand")
4770 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
43cacb12
RS
4771 "TARGET_SVE"
4772 {
915d28fe
RS
4773 emit_insn ((<hi_lanes_optab>
4774 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
4775 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
4776 (operands[0], operands[1]));
4777 DONE;
4778 }
4779)
4780
4781(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
4782 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4783 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
4784 UNPACK))]
4785 "TARGET_SVE"
4786 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
4787)
4788
4789;; -------------------------------------------------------------------------
4790;; ---- [INT<-FP] Conversions
4791;; -------------------------------------------------------------------------
4792;; Includes:
4793;; - FCVTZS
4794;; - FCVTZU
4795;; -------------------------------------------------------------------------
4796
4797;; Unpredicated conversion of floats to integers of the same size (HF to HI,
4798;; SF to SI or DF to DI).
99361551 4799(define_expand "<optab><mode><v_int_equiv>2"
915d28fe
RS
4800 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
4801 (unspec:<V_INT_EQUIV>
4802 [(match_dup 2)
99361551
RS
4803 (const_int SVE_RELAXED_GP)
4804 (match_operand:SVE_F 1 "register_operand")]
4805 SVE_COND_FCVTI))]
915d28fe
RS
4806 "TARGET_SVE"
4807 {
4808 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
4809 }
4810)
4811
95eb5537
RS
4812;; Predicated float-to-integer conversion, either to the same width or wider.
4813(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_F:mode><SVE_HSDI:mode>"
915d28fe
RS
4814 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
4815 (unspec:SVE_HSDI
95eb5537 4816 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
99361551 4817 (match_operand:SI 3 "aarch64_sve_gp_strictness")
95eb5537 4818 (match_operand:SVE_F 2 "register_operand" "w")]
99361551 4819 SVE_COND_FCVTI))]
95eb5537
RS
4820 "TARGET_SVE && <SVE_HSDI:elem_bits> >= <SVE_F:elem_bits>"
4821 "fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_F:Vetype>"
915d28fe
RS
4822)
4823
95eb5537
RS
4824;; Predicated narrowing float-to-integer conversion.
4825(define_insn "*aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
4826 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
4827 (unspec:VNx4SI_ONLY
915d28fe 4828 [(match_operand:VNx2BI 1 "register_operand" "Upl")
99361551 4829 (match_operand:SI 3 "aarch64_sve_gp_strictness")
95eb5537 4830 (match_operand:VNx2DF_ONLY 2 "register_operand" "w")]
99361551 4831 SVE_COND_FCVTI))]
915d28fe 4832 "TARGET_SVE"
95eb5537 4833 "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
915d28fe
RS
4834)
4835
c5e16983
RS
4836;; Predicated float-to-integer conversion with merging, either to the same
4837;; width or wider.
4838;;
4839;; The first alternative doesn't need the earlyclobber, but the only case
4840;; it would help is the uninteresting one in which operands 2 and 3 are
4841;; the same register (despite having different modes). Making all the
4842;; alternatives earlyclobber makes things more consistent for the
4843;; register allocator.
4844(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_F:mode><SVE_HSDI:mode>"
4845 [(set (match_operand:SVE_HSDI 0 "register_operand" "=&w, &w, ?&w")
4846 (unspec:SVE_HSDI
4847 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
4848 (unspec:SVE_HSDI
4849 [(match_operand 4)
4850 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4851 (match_operand:SVE_F 2 "register_operand" "w, w, w")]
4852 SVE_COND_FCVTI)
4853 (match_operand:SVE_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
4854 UNSPEC_SEL))]
4855 "TARGET_SVE
4856 && <SVE_HSDI:elem_bits> >= <SVE_F:elem_bits>
4857 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4858 "@
4859 fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_F:Vetype>
4860 movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_F:Vetype>
4861 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_F:Vetype>"
4862 "&& !rtx_equal_p (operands[1], operands[4])"
4863 {
4864 operands[4] = copy_rtx (operands[1]);
4865 }
4866 [(set_attr "movprfx" "*,yes,yes")]
4867)
4868
915d28fe
RS
4869;; -------------------------------------------------------------------------
4870;; ---- [INT<-FP] Packs
4871;; -------------------------------------------------------------------------
4872;; The patterns in this section are synthetic.
4873;; -------------------------------------------------------------------------
4874
43cacb12
RS
4875;; Convert two vectors of DF to SI and pack the results into a single vector.
4876(define_expand "vec_pack_<su>fix_trunc_vnx2df"
4877 [(set (match_dup 4)
4878 (unspec:VNx4SI
4879 [(match_dup 3)
99361551
RS
4880 (const_int SVE_RELAXED_GP)
4881 (match_operand:VNx2DF 1 "register_operand")]
4882 SVE_COND_FCVTI))
43cacb12
RS
4883 (set (match_dup 5)
4884 (unspec:VNx4SI
4885 [(match_dup 3)
99361551
RS
4886 (const_int SVE_RELAXED_GP)
4887 (match_operand:VNx2DF 2 "register_operand")]
4888 SVE_COND_FCVTI))
43cacb12
RS
4889 (set (match_operand:VNx4SI 0 "register_operand")
4890 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
4891 "TARGET_SVE"
4892 {
16de3637 4893 operands[3] = aarch64_ptrue_reg (VNx2BImode);
43cacb12
RS
4894 operands[4] = gen_reg_rtx (VNx4SImode);
4895 operands[5] = gen_reg_rtx (VNx4SImode);
4896 }
4897)
f1739b48 4898
915d28fe
RS
4899;; -------------------------------------------------------------------------
4900;; ---- [INT<-FP] Unpacks
4901;; -------------------------------------------------------------------------
4902;; No patterns here yet!
4903;; -------------------------------------------------------------------------
9d4ac06e 4904
915d28fe
RS
4905;; -------------------------------------------------------------------------
4906;; ---- [FP<-INT] Conversions
4907;; -------------------------------------------------------------------------
4908;; Includes:
4909;; - SCVTF
4910;; - UCVTF
4911;; -------------------------------------------------------------------------
a08acce8 4912
915d28fe
RS
4913;; Unpredicated conversion of integers to floats of the same size
4914;; (HI to HF, SI to SF or DI to DF).
4915(define_expand "<optab><v_int_equiv><mode>2"
4916 [(set (match_operand:SVE_F 0 "register_operand")
a08acce8 4917 (unspec:SVE_F
915d28fe 4918 [(match_dup 2)
99361551
RS
4919 (const_int SVE_RELAXED_GP)
4920 (match_operand:<V_INT_EQUIV> 1 "register_operand")]
4921 SVE_COND_ICVTF))]
a08acce8 4922 "TARGET_SVE"
f4fde1b3 4923 {
915d28fe 4924 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
f4fde1b3 4925 }
b41d1f6e
RS
4926)
4927
95eb5537
RS
4928;; Predicated integer-to-float conversion, either to the same width or
4929;; narrower.
4930(define_insn "*aarch64_sve_<optab>_nonextend<SVE_HSDI:mode><SVE_F:mode>"
4931 [(set (match_operand:SVE_F 0 "register_operand" "=w")
4932 (unspec:SVE_F
4933 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
99361551
RS
4934 (match_operand:SI 3 "aarch64_sve_gp_strictness")
4935 (match_operand:SVE_HSDI 2 "register_operand" "w")]
4936 SVE_COND_ICVTF))]
95eb5537
RS
4937 "TARGET_SVE && <SVE_HSDI:elem_bits> >= <SVE_F:elem_bits>"
4938 "<su>cvtf\t%0.<SVE_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
f1739b48 4939)
6c9c7b73 4940
95eb5537
RS
4941;; Predicated widening integer-to-float conversion.
4942(define_insn "aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
4943 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w")
4944 (unspec:VNx2DF_ONLY
915d28fe 4945 [(match_operand:VNx2BI 1 "register_operand" "Upl")
99361551 4946 (match_operand:SI 3 "aarch64_sve_gp_strictness")
95eb5537 4947 (match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
99361551 4948 SVE_COND_ICVTF))]
6c9c7b73 4949 "TARGET_SVE"
95eb5537 4950 "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
915d28fe 4951)
6c9c7b73 4952
c5e16983
RS
4953;; Predicated integer-to-float conversion with merging, either to the same
4954;; width or narrower.
4955;;
4956;; The first alternative doesn't need the earlyclobber, but the only case
4957;; it would help is the uninteresting one in which operands 2 and 3 are
4958;; the same register (despite having different modes). Making all the
4959;; alternatives earlyclobber makes things more consistent for the
4960;; register allocator.
4961(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_HSDI:mode><SVE_F:mode>"
4962 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
4963 (unspec:SVE_F
4964 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
4965 (unspec:SVE_F
4966 [(match_operand 4)
4967 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4968 (match_operand:SVE_HSDI 2 "register_operand" "w, w, w")]
4969 SVE_COND_ICVTF)
4970 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
4971 UNSPEC_SEL))]
4972 "TARGET_SVE
4973 && <SVE_HSDI:elem_bits> >= <SVE_F:elem_bits>
4974 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4975 "@
4976 <su>cvtf\t%0.<SVE_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
4977 movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
4978 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
4979 "&& !rtx_equal_p (operands[1], operands[4])"
4980 {
4981 operands[4] = copy_rtx (operands[1]);
4982 }
4983 [(set_attr "movprfx" "*,yes,yes")]
4984)
4985
915d28fe
RS
4986;; -------------------------------------------------------------------------
4987;; ---- [FP<-INT] Packs
4988;; -------------------------------------------------------------------------
4989;; No patterns here yet!
4990;; -------------------------------------------------------------------------
6c9c7b73 4991
915d28fe
RS
4992;; -------------------------------------------------------------------------
4993;; ---- [FP<-INT] Unpacks
4994;; -------------------------------------------------------------------------
4995;; The patterns in this section are synthetic.
4996;; -------------------------------------------------------------------------
4997
4998;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
4999;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
5000;; unpacked VNx4SI to VNx2DF.
5001(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
5002 [(match_operand:VNx2DF 0 "register_operand")
5003 (FLOATUORS:VNx2DF
5004 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
5005 UNPACK_UNSIGNED))]
5006 "TARGET_SVE"
5007 {
5008 /* Use ZIP to do the unpack, since we don't care about the upper halves
5009 and since it has the nice property of not needing any subregs.
5010 If using UUNPK* turns out to be preferable, we could model it as
5011 a ZIP whose first operand is zero. */
5012 rtx temp = gen_reg_rtx (VNx4SImode);
5013 emit_insn ((<hi_lanes_optab>
5014 ? gen_aarch64_sve_zip2vnx4si
5015 : gen_aarch64_sve_zip1vnx4si)
5016 (temp, operands[1], operands[1]));
5017 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
99361551 5018 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
95eb5537 5019 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
99361551 5020 (operands[0], ptrue, temp, strictness));
6c9c7b73
AM
5021 DONE;
5022 }
5023)
5024
915d28fe
RS
5025;; -------------------------------------------------------------------------
5026;; ---- [FP<-FP] Packs
5027;; -------------------------------------------------------------------------
5028;; Includes:
5029;; - FCVT
5030;; -------------------------------------------------------------------------
5031
5032;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
5033;; the results into a single vector.
5034(define_expand "vec_pack_trunc_<Vwide>"
5035 [(set (match_dup 4)
5036 (unspec:SVE_HSF
5037 [(match_dup 3)
99361551
RS
5038 (const_int SVE_RELAXED_GP)
5039 (match_operand:<VWIDE> 1 "register_operand")]
5040 UNSPEC_COND_FCVT))
915d28fe
RS
5041 (set (match_dup 5)
5042 (unspec:SVE_HSF
5043 [(match_dup 3)
99361551
RS
5044 (const_int SVE_RELAXED_GP)
5045 (match_operand:<VWIDE> 2 "register_operand")]
5046 UNSPEC_COND_FCVT))
915d28fe
RS
5047 (set (match_operand:SVE_HSF 0 "register_operand")
5048 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
6c9c7b73
AM
5049 "TARGET_SVE"
5050 {
915d28fe
RS
5051 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
5052 operands[4] = gen_reg_rtx (<MODE>mode);
5053 operands[5] = gen_reg_rtx (<MODE>mode);
6c9c7b73
AM
5054 }
5055)
9feeafd7 5056
95eb5537
RS
5057;; Predicated float-to-float truncation.
5058(define_insn "*aarch64_sve_<optab>_trunc<SVE_SDF:mode><SVE_HSF:mode>"
915d28fe
RS
5059 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
5060 (unspec:SVE_HSF
95eb5537 5061 [(match_operand:<SVE_SDF:VPRED> 1 "register_operand" "Upl")
99361551 5062 (match_operand:SI 3 "aarch64_sve_gp_strictness")
95eb5537
RS
5063 (match_operand:SVE_SDF 2 "register_operand" "w")]
5064 SVE_COND_FCVT))]
5065 "TARGET_SVE && <SVE_SDF:elem_bits> > <SVE_HSF:elem_bits>"
5066 "fcvt\t%0.<SVE_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>"
9feeafd7 5067)
a9fad8fe 5068
915d28fe
RS
5069;; -------------------------------------------------------------------------
5070;; ---- [FP<-FP] Unpacks
5071;; -------------------------------------------------------------------------
5072;; Includes:
5073;; - FCVT
5074;; -------------------------------------------------------------------------
5075
5076;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
5077;; First unpack the source without conversion, then float-convert the
5078;; unpacked source.
5079(define_expand "vec_unpacks_<perm_hilo>_<mode>"
5080 [(match_operand:<VWIDE> 0 "register_operand")
5081 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
5082 UNPACK_UNSIGNED)]
a9fad8fe
AM
5083 "TARGET_SVE"
5084 {
915d28fe
RS
5085 /* Use ZIP to do the unpack, since we don't care about the upper halves
5086 and since it has the nice property of not needing any subregs.
5087 If using UUNPK* turns out to be preferable, we could model it as
5088 a ZIP whose first operand is zero. */
5089 rtx temp = gen_reg_rtx (<MODE>mode);
5090 emit_insn ((<hi_lanes_optab>
5091 ? gen_aarch64_sve_zip2<mode>
5092 : gen_aarch64_sve_zip1<mode>)
5093 (temp, operands[1], operands[1]));
5094 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
99361551 5095 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
95eb5537 5096 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
99361551 5097 (operands[0], ptrue, temp, strictness));
a9fad8fe
AM
5098 DONE;
5099 }
5100)
5101
95eb5537
RS
5102;; Predicated float-to-float extension.
5103(define_insn "aarch64_sve_<optab>_nontrunc<SVE_HSF:mode><SVE_SDF:mode>"
5104 [(set (match_operand:SVE_SDF 0 "register_operand" "=w")
5105 (unspec:SVE_SDF
5106 [(match_operand:<SVE_SDF:VPRED> 1 "register_operand" "Upl")
99361551
RS
5107 (match_operand:SI 3 "aarch64_sve_gp_strictness")
5108 (match_operand:SVE_HSF 2 "register_operand" "w")]
95eb5537
RS
5109 SVE_COND_FCVT))]
5110 "TARGET_SVE && <SVE_SDF:elem_bits> > <SVE_HSF:elem_bits>"
5111 "fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_HSF:Vetype>"
a9fad8fe
AM
5112)
5113
915d28fe
RS
5114;; -------------------------------------------------------------------------
5115;; ---- [PRED<-PRED] Packs
5116;; -------------------------------------------------------------------------
5117;; Includes:
5118;; - UZP1
5119;; -------------------------------------------------------------------------
a9fad8fe 5120
915d28fe
RS
5121;; Predicate pack. Use UZP1 on the narrower type, which discards
5122;; the high part of each wide element.
5123(define_insn "vec_pack_trunc_<Vwide>"
5124 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
5125 (unspec:PRED_BHS
5126 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
5127 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
5128 UNSPEC_PACK))]
a9fad8fe 5129 "TARGET_SVE"
915d28fe 5130 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
a9fad8fe 5131)
3a0afad0 5132
915d28fe
RS
5133;; -------------------------------------------------------------------------
5134;; ---- [PRED<-PRED] Unpacks
5135;; -------------------------------------------------------------------------
5136;; Includes:
5137;; - PUNPKHI
5138;; - PUNPKLO
5139;; -------------------------------------------------------------------------
5140
5141;; Unpack the low or high half of a predicate, where "high" refers to
5142;; the low-numbered lanes for big-endian and the high-numbered lanes
5143;; for little-endian.
5144(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
5145 [(match_operand:<VWIDE> 0 "register_operand")
5146 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
5147 UNPACK)]
3a0afad0
PK
5148 "TARGET_SVE"
5149 {
915d28fe
RS
5150 emit_insn ((<hi_lanes_optab>
5151 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
5152 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
5153 (operands[0], operands[1]));
3a0afad0
PK
5154 DONE;
5155 }
5156)
915d28fe
RS
5157
5158(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
5159 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
5160 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
5161 UNPACK_UNSIGNED))]
5162 "TARGET_SVE"
5163 "punpk<perm_hilo>\t%0.h, %1.b"
5164)
This page took 1.175698 seconds and 5 git commands to generate.