1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C)
2011-
2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version
3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16
0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16
1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is
16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is
8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[
0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[
1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode),
16)
35 && aarch64_mem_pair_operand (operands[
0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode),
8))))
37 operands[
1] = force_reg (<MODE>mode, operands[
1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL
0 "nonimmediate_operand" "")
43 (match_operand:VALL
1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand
1 into a register. */
49 if (!register_operand (operands[
0], <MODE>mode)
50 && !register_operand (operands[
1], <MODE>mode))
51 operands[
1] = force_reg (<MODE>mode, operands[
1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I
0 "register_operand" "=w, w")
57 (match_operand:<VEL>
1 "register_operand" "w,?r")))]
60 dup
\\t%
0.<Vtype>, %
1.<Vetype>[
0]
61 dup
\\t%
0.<Vtype>, %<vw>
1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16
0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL>
1 "register_operand" "w")))]
70 "dup
\\t%
0.<Vtype>, %
1.<Vetype>[
0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16
1 "register_operand" "w")
79 (parallel [(match_operand:SI
2 "immediate_operand" "i")])
83 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
84 return "dup
\\t%
0.<Vtype>, %
1.<Vetype>[%
2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q
0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "w")
94 (parallel [(match_operand:SI
2 "immediate_operand" "i")])
98 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
99 return "dup
\\t%
0.<Vtype>, %
1.<Vetype>[%
2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD
0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD
1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[
0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[
1], <MODE>mode))"
113 switch (which_alternative)
115 case
0: return "ldr
\t%d0, %
1";
116 case
1: return "str
\txzr, %
0";
117 case
2: return "str
\t%d1, %
0";
118 case
3: return "mov
\t%
0.<Vbtype>, %
1.<Vbtype>";
119 case
4: return "umov
\t%
0, %
1.d[
0]";
120 case
5: return "fmov
\t%d0, %
1";
121 case
6: return "mov
\t%
0, %
1";
123 return aarch64_output_simd_mov_immediate (operands[
1],
64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ
0 "nonimmediate_operand"
134 "=w, Umq, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ
1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[
0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[
1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr
\t%q0, %
1";
146 return "stp
\txzr, xzr, %
0";
148 return "str
\t%q1, %
0";
150 return "mov
\t%
0.<Vbtype>, %
1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[
1],
128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "
4,
4,
4,
4,
8,
8,
8,
4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL>
0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16
1 "register_operand" "w")
173 (parallel [(match_operand
2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[
2])) ==
0"
176 "str
\\t%<Vetype>
1, %
0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<mode>"
181 [(set (match_operand:VD
0 "register_operand" "=w")
182 (match_operand:VD
1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:VD
2 "register_operand" "=w")
184 (match_operand:VD
3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[
3],
0),
187 plus_constant (Pmode,
188 XEXP (operands[
1],
0),
189 GET_MODE_SIZE (<MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "store_pair<mode>"
195 [(set (match_operand:VD
0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:VD
1 "register_operand" "w"))
197 (set (match_operand:VD
2 "memory_operand" "=m")
198 (match_operand:VD
3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[
2],
0),
201 plus_constant (Pmode,
202 XEXP (operands[
0],
0),
203 GET_MODE_SIZE (<MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
209 [(set (match_operand:VQ
0 "register_operand" "")
210 (match_operand:VQ
1 "register_operand" ""))]
211 "TARGET_SIMD && reload_completed
212 && GP_REGNUM_P (REGNO (operands[
0]))
213 && GP_REGNUM_P (REGNO (operands[
1]))"
216 aarch64_simd_emit_reg_reg_move (operands, DImode,
2);
221 [(set (match_operand:VQ
0 "register_operand" "")
222 (match_operand:VQ
1 "register_operand" ""))]
223 "TARGET_SIMD && reload_completed
224 && ((FP_REGNUM_P (REGNO (operands[
0])) && GP_REGNUM_P (REGNO (operands[
1])))
225 || (GP_REGNUM_P (REGNO (operands[
0])) && FP_REGNUM_P (REGNO (operands[
1]))))"
228 aarch64_split_simd_move (operands[
0], operands[
1]);
232 (define_expand "aarch64_split_simd_mov<mode>"
233 [(set (match_operand:VQ
0)
234 (match_operand:VQ
1))]
237 rtx dst = operands[
0];
238 rtx src = operands[
1];
240 if (GP_REGNUM_P (REGNO (src)))
242 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
243 rtx src_high_part = gen_highpart (<VHALF>mode, src);
246 (gen_move_lo_quad_<mode> (dst, src_low_part));
248 (gen_move_hi_quad_<mode> (dst, src_high_part));
253 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
254 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
255 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
256 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
259 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
261 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
267 (define_insn "aarch64_simd_mov_from_<mode>low"
268 [(set (match_operand:<VHALF>
0 "register_operand" "=r")
270 (match_operand:VQ
1 "register_operand" "w")
271 (match_operand:VQ
2 "vect_par_cnst_lo_half" "")))]
272 "TARGET_SIMD && reload_completed"
274 [(set_attr "type" "neon_to_gp<q>")
275 (set_attr "length" "
4")
278 (define_insn "aarch64_simd_mov_from_<mode>high"
279 [(set (match_operand:<VHALF>
0 "register_operand" "=r")
281 (match_operand:VQ
1 "register_operand" "w")
282 (match_operand:VQ
2 "vect_par_cnst_hi_half" "")))]
283 "TARGET_SIMD && reload_completed"
285 [(set_attr "type" "neon_to_gp<q>")
286 (set_attr "length" "
4")
289 (define_insn "orn<mode>
3"
290 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
291 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w"))
292 (match_operand:VDQ_I
2 "register_operand" "w")))]
294 "orn
\t%
0.<Vbtype>, %
2.<Vbtype>, %
1.<Vbtype>"
295 [(set_attr "type" "neon_logic<q>")]
298 (define_insn "bic<mode>
3"
299 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
300 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w"))
301 (match_operand:VDQ_I
2 "register_operand" "w")))]
303 "bic
\t%
0.<Vbtype>, %
2.<Vbtype>, %
1.<Vbtype>"
304 [(set_attr "type" "neon_logic<q>")]
307 (define_insn "add<mode>
3"
308 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
309 (plus:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
310 (match_operand:VDQ_I
2 "register_operand" "w")))]
312 "add
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
313 [(set_attr "type" "neon_add<q>")]
316 (define_insn "sub<mode>
3"
317 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
318 (minus:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
319 (match_operand:VDQ_I
2 "register_operand" "w")))]
321 "sub
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
322 [(set_attr "type" "neon_sub<q>")]
325 (define_insn "mul<mode>
3"
326 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
327 (mult:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "w")
328 (match_operand:VDQ_BHSI
2 "register_operand" "w")))]
330 "mul
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
331 [(set_attr "type" "neon_mul_<Vetype><q>")]
334 (define_insn "bswap<mode>
2"
335 [(set (match_operand:VDQHSD
0 "register_operand" "=w")
336 (bswap:VDQHSD (match_operand:VDQHSD
1 "register_operand" "w")))]
338 "rev<Vrevsuff>
\\t%
0.<Vbtype>, %
1.<Vbtype>"
339 [(set_attr "type" "neon_rev<q>")]
342 (define_insn "aarch64_rbit<mode>"
343 [(set (match_operand:VB
0 "register_operand" "=w")
344 (unspec:VB [(match_operand:VB
1 "register_operand" "w")]
347 "rbit
\\t%
0.<Vbtype>, %
1.<Vbtype>"
348 [(set_attr "type" "neon_rbit")]
351 (define_expand "ctz<mode>
2"
352 [(set (match_operand:VS
0 "register_operand")
353 (ctz:VS (match_operand:VS
1 "register_operand")))]
356 emit_insn (gen_bswap<mode>
2 (operands[
0], operands[
1]));
357 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[
0],
359 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
360 emit_insn (gen_clz<mode>
2 (operands[
0], operands[
0]));
365 (define_expand "xorsign<mode>
3"
366 [(match_operand:VHSDF
0 "register_operand")
367 (match_operand:VHSDF
1 "register_operand")
368 (match_operand:VHSDF
2 "register_operand")]
372 machine_mode imode = <V_INT_EQUIV>mode;
373 rtx v_bitmask = gen_reg_rtx (imode);
374 rtx op1x = gen_reg_rtx (imode);
375 rtx op2x = gen_reg_rtx (imode);
377 rtx arg1 = lowpart_subreg (imode, operands[
1], <MODE>mode);
378 rtx arg2 = lowpart_subreg (imode, operands[
2], <MODE>mode);
380 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) -
1;
382 emit_move_insn (v_bitmask,
383 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
384 HOST_WIDE_INT_M1U << bits));
386 emit_insn (gen_and<v_int_equiv>
3 (op2x, v_bitmask, arg2));
387 emit_insn (gen_xor<v_int_equiv>
3 (op1x, arg1, op2x));
388 emit_move_insn (operands[
0],
389 lowpart_subreg (<MODE>mode, op1x, imode));
394 ;; These instructions map to the __builtins for the Dot Product operations.
395 (define_insn "aarch64_<sur>dot<vsi2qi>"
396 [(set (match_operand:VS
0 "register_operand" "=w")
397 (plus:VS (match_operand:VS
1 "register_operand" "
0")
398 (unspec:VS [(match_operand:<VSI2QI>
2 "register_operand" "w")
399 (match_operand:<VSI2QI>
3 "register_operand" "w")]
402 "<sur>dot
\\t%
0.<Vtype>, %
2.<Vdottype>, %
3.<Vdottype>"
403 [(set_attr "type" "neon_dot")]
406 ;; These expands map to the Dot Product optab the vectorizer checks for.
407 ;; The auto-vectorizer expects a dot product builtin that also does an
408 ;; accumulation into the provided register.
409 ;; Given the following pattern
411 ;; for (i=
0; i<len; i++) {
417 ;; This can be auto-vectorized to
418 ;; r = a[
0]*b[
0] + a[
1]*b[
1] + a[
2]*b[
2] + a[
3]*b[
3];
420 ;; given enough iterations. However the vectorizer can keep unrolling the loop
421 ;; r += a[
4]*b[
4] + a[
5]*b[
5] + a[
6]*b[
6] + a[
7]*b[
7];
422 ;; r += a[
8]*b[
8] + a[
9]*b[
9] + a[
10]*b[
10] + a[
11]*b[
11];
425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
426 (define_expand "<sur>dot_prod<vsi2qi>"
427 [(set (match_operand:VS
0 "register_operand")
428 (plus:VS (unspec:VS [(match_operand:<VSI2QI>
1 "register_operand")
429 (match_operand:<VSI2QI>
2 "register_operand")]
431 (match_operand:VS
3 "register_operand")))]
435 gen_aarch64_<sur>dot<vsi2qi> (operands[
3], operands[
3], operands[
1],
437 emit_insn (gen_rtx_SET (operands[
0], operands[
3]));
441 ;; These instructions map to the __builtins for the Dot Product
442 ;; indexed operations.
443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
444 [(set (match_operand:VS
0 "register_operand" "=w")
445 (plus:VS (match_operand:VS
1 "register_operand" "
0")
446 (unspec:VS [(match_operand:<VSI2QI>
2 "register_operand" "w")
447 (match_operand:V8QI
3 "register_operand" "<h_con>")
448 (match_operand:SI
4 "immediate_operand" "i")]
452 operands[
4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[
4]));
453 return "<sur>dot
\\t%
0.<Vtype>, %
2.<Vdottype>, %
3.4b[%
4]";
455 [(set_attr "type" "neon_dot")]
458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
459 [(set (match_operand:VS
0 "register_operand" "=w")
460 (plus:VS (match_operand:VS
1 "register_operand" "
0")
461 (unspec:VS [(match_operand:<VSI2QI>
2 "register_operand" "w")
462 (match_operand:V16QI
3 "register_operand" "<h_con>")
463 (match_operand:SI
4 "immediate_operand" "i")]
467 operands[
4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[
4]));
468 return "<sur>dot
\\t%
0.<Vtype>, %
2.<Vdottype>, %
3.4b[%
4]";
470 [(set_attr "type" "neon_dot")]
473 (define_expand "copysign<mode>
3"
474 [(match_operand:VHSDF
0 "register_operand")
475 (match_operand:VHSDF
1 "register_operand")
476 (match_operand:VHSDF
2 "register_operand")]
477 "TARGET_FLOAT && TARGET_SIMD"
479 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
480 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) -
1;
482 emit_move_insn (v_bitmask,
483 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
484 HOST_WIDE_INT_M1U << bits));
485 emit_insn (gen_aarch64_simd_bsl<mode> (operands[
0], v_bitmask,
486 operands[
2], operands[
1]));
491 (define_insn "*aarch64_mul3_elt<mode>"
492 [(set (match_operand:VMUL
0 "register_operand" "=w")
496 (match_operand:VMUL
1 "register_operand" "<h_con>")
497 (parallel [(match_operand:SI
2 "immediate_operand")])))
498 (match_operand:VMUL
3 "register_operand" "w")))]
501 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
502 return "<f>mul
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
504 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
508 [(set (match_operand:VMUL_CHANGE_NLANES
0 "register_operand" "=w")
509 (mult:VMUL_CHANGE_NLANES
510 (vec_duplicate:VMUL_CHANGE_NLANES
512 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
513 (parallel [(match_operand:SI
2 "immediate_operand")])))
514 (match_operand:VMUL_CHANGE_NLANES
3 "register_operand" "w")))]
517 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
518 return "<f>mul
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
520 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
524 [(set (match_operand:VMUL
0 "register_operand" "=w")
527 (match_operand:<VEL>
1 "register_operand" "<h_con>"))
528 (match_operand:VMUL
2 "register_operand" "w")))]
530 "<f>mul
\t%
0.<Vtype>, %
2.<Vtype>, %
1.<Vetype>[
0]";
531 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
534 (define_insn "aarch64_rsqrte<mode>"
535 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
536 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF
1 "register_operand" "w")]
539 "frsqrte
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>"
540 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
542 (define_insn "aarch64_rsqrts<mode>"
543 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
544 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF
1 "register_operand" "w")
545 (match_operand:VHSDF_HSDF
2 "register_operand" "w")]
548 "frsqrts
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
549 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
551 (define_expand "rsqrt<mode>
2"
552 [(set (match_operand:VALLF
0 "register_operand" "=w")
553 (unspec:VALLF [(match_operand:VALLF
1 "register_operand" "w")]
557 aarch64_emit_approx_sqrt (operands[
0], operands[
1], true);
561 (define_insn "*aarch64_mul3_elt_to_64v2df"
562 [(set (match_operand:DF
0 "register_operand" "=w")
565 (match_operand:V2DF
1 "register_operand" "w")
566 (parallel [(match_operand:SI
2 "immediate_operand")]))
567 (match_operand:DF
3 "register_operand" "w")))]
570 operands[
2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[
2]));
571 return "fmul
\\t%
0.2d, %
3.2d, %
1.d[%
2]";
573 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
576 (define_insn "neg<mode>
2"
577 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
578 (neg:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")))]
580 "neg
\t%
0.<Vtype>, %
1.<Vtype>"
581 [(set_attr "type" "neon_neg<q>")]
584 (define_insn "abs<mode>
2"
585 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
586 (abs:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")))]
588 "abs
\t%
0.<Vtype>, %
1.<Vtype>"
589 [(set_attr "type" "neon_abs<q>")]
592 ;; The intrinsic version of integer ABS must not be allowed to
593 ;; combine with any operation with an integerated ABS step, such
595 (define_insn "aarch64_abs<mode>"
596 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
598 [(match_operand:VSDQ_I_DI
1 "register_operand" "w")]
601 "abs
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>"
602 [(set_attr "type" "neon_abs<q>")]
605 (define_insn "abd<mode>_3"
606 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
607 (abs:VDQ_BHSI (minus:VDQ_BHSI
608 (match_operand:VDQ_BHSI
1 "register_operand" "w")
609 (match_operand:VDQ_BHSI
2 "register_operand" "w"))))]
611 "sabd
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
612 [(set_attr "type" "neon_abd<q>")]
615 (define_insn "aba<mode>_3"
616 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
617 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
618 (match_operand:VDQ_BHSI
1 "register_operand" "w")
619 (match_operand:VDQ_BHSI
2 "register_operand" "w")))
620 (match_operand:VDQ_BHSI
3 "register_operand" "
0")))]
622 "saba
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
623 [(set_attr "type" "neon_arith_acc<q>")]
626 (define_insn "fabd<mode>
3"
627 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
630 (match_operand:VHSDF_HSDF
1 "register_operand" "w")
631 (match_operand:VHSDF_HSDF
2 "register_operand" "w"))))]
633 "fabd
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
634 [(set_attr "type" "neon_fp_abd_<stype><q>")]
637 ;; For AND (vector, register) and BIC (vector, immediate)
638 (define_insn "and<mode>
3"
639 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w")
640 (and:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w,
0")
641 (match_operand:VDQ_I
2 "aarch64_reg_or_bic_imm" "w,Db")))]
644 switch (which_alternative)
647 return "and
\t%
0.<Vbtype>, %
1.<Vbtype>, %
2.<Vbtype>";
649 return aarch64_output_simd_mov_immediate (operands[
2], <bitsize>,
655 [(set_attr "type" "neon_logic<q>")]
658 ;; For ORR (vector, register) and ORR (vector, immediate)
659 (define_insn "ior<mode>
3"
660 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w")
661 (ior:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w,
0")
662 (match_operand:VDQ_I
2 "aarch64_reg_or_orr_imm" "w,Do")))]
665 switch (which_alternative)
668 return "orr
\t%
0.<Vbtype>, %
1.<Vbtype>, %
2.<Vbtype>";
670 return aarch64_output_simd_mov_immediate (operands[
2], <bitsize>,
676 [(set_attr "type" "neon_logic<q>")]
679 (define_insn "xor<mode>
3"
680 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
681 (xor:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
682 (match_operand:VDQ_I
2 "register_operand" "w")))]
684 "eor
\t%
0.<Vbtype>, %
1.<Vbtype>, %
2.<Vbtype>"
685 [(set_attr "type" "neon_logic<q>")]
688 (define_insn "one_cmpl<mode>
2"
689 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
690 (not:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")))]
692 "not
\t%
0.<Vbtype>, %
1.<Vbtype>"
693 [(set_attr "type" "neon_logic<q>")]
696 (define_insn "aarch64_simd_vec_set<mode>"
697 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w,w,w")
699 (vec_duplicate:VDQ_BHSI
700 (match_operand:<VEL>
1 "aarch64_simd_general_operand" "r,w,Utv"))
701 (match_operand:VDQ_BHSI
3 "register_operand" "
0,
0,
0")
702 (match_operand:SI
2 "immediate_operand" "i,i,i")))]
705 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[
2])));
706 operands[
2] = GEN_INT ((HOST_WIDE_INT)
1 << elt);
707 switch (which_alternative)
710 return "ins
\\t%
0.<Vetype>[%p2], %w1";
712 return "ins
\\t%
0.<Vetype>[%p2], %
1.<Vetype>[
0]";
714 return "ld1
\\t{%
0.<Vetype>}[%p2], %
1";
719 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
722 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
723 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
725 (vec_duplicate:VALL_F16
727 (match_operand:VALL_F16
3 "register_operand" "w")
729 [(match_operand:SI
4 "immediate_operand" "i")])))
730 (match_operand:VALL_F16
1 "register_operand" "
0")
731 (match_operand:SI
2 "immediate_operand" "i")))]
734 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[
2])));
735 operands[
2] = GEN_INT (HOST_WIDE_INT_1 << elt);
736 operands[
4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
4]));
738 return "ins
\t%
0.<Vetype>[%p2], %
3.<Vetype>[%
4]";
740 [(set_attr "type" "neon_ins<q>")]
743 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
744 [(set (match_operand:VALL_F16_NO_V2Q
0 "register_operand" "=w")
745 (vec_merge:VALL_F16_NO_V2Q
746 (vec_duplicate:VALL_F16_NO_V2Q
748 (match_operand:<VSWAP_WIDTH>
3 "register_operand" "w")
750 [(match_operand:SI
4 "immediate_operand" "i")])))
751 (match_operand:VALL_F16_NO_V2Q
1 "register_operand" "
0")
752 (match_operand:SI
2 "immediate_operand" "i")))]
755 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[
2])));
756 operands[
2] = GEN_INT (HOST_WIDE_INT_1 << elt);
757 operands[
4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
758 INTVAL (operands[
4]));
760 return "ins
\t%
0.<Vetype>[%p2], %
3.<Vetype>[%
4]";
762 [(set_attr "type" "neon_ins<q>")]
765 (define_insn "aarch64_simd_lshr<mode>"
766 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
767 (lshiftrt:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
768 (match_operand:VDQ_I
2 "aarch64_simd_rshift_imm" "Dr")))]
770 "ushr
\t%
0.<Vtype>, %
1.<Vtype>, %
2"
771 [(set_attr "type" "neon_shift_imm<q>")]
774 (define_insn "aarch64_simd_ashr<mode>"
775 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
776 (ashiftrt:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
777 (match_operand:VDQ_I
2 "aarch64_simd_rshift_imm" "Dr")))]
779 "sshr
\t%
0.<Vtype>, %
1.<Vtype>, %
2"
780 [(set_attr "type" "neon_shift_imm<q>")]
783 (define_insn "aarch64_simd_imm_shl<mode>"
784 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
785 (ashift:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
786 (match_operand:VDQ_I
2 "aarch64_simd_lshift_imm" "Dl")))]
788 "shl
\t%
0.<Vtype>, %
1.<Vtype>, %
2"
789 [(set_attr "type" "neon_shift_imm<q>")]
792 (define_insn "aarch64_simd_reg_sshl<mode>"
793 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
794 (ashift:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
795 (match_operand:VDQ_I
2 "register_operand" "w")))]
797 "sshl
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
798 [(set_attr "type" "neon_shift_reg<q>")]
801 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
802 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
803 (unspec:VDQ_I [(match_operand:VDQ_I
1 "register_operand" "w")
804 (match_operand:VDQ_I
2 "register_operand" "w")]
805 UNSPEC_ASHIFT_UNSIGNED))]
807 "ushl
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
808 [(set_attr "type" "neon_shift_reg<q>")]
811 (define_insn "aarch64_simd_reg_shl<mode>_signed"
812 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
813 (unspec:VDQ_I [(match_operand:VDQ_I
1 "register_operand" "w")
814 (match_operand:VDQ_I
2 "register_operand" "w")]
815 UNSPEC_ASHIFT_SIGNED))]
817 "sshl
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
818 [(set_attr "type" "neon_shift_reg<q>")]
821 (define_expand "ashl<mode>
3"
822 [(match_operand:VDQ_I
0 "register_operand" "")
823 (match_operand:VDQ_I
1 "register_operand" "")
824 (match_operand:SI
2 "general_operand" "")]
827 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
830 if (CONST_INT_P (operands[
2]))
832 shift_amount = INTVAL (operands[
2]);
833 if (shift_amount >=
0 && shift_amount < bit_width)
835 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
837 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[
0],
844 operands[
2] = force_reg (SImode, operands[
2]);
847 else if (MEM_P (operands[
2]))
849 operands[
2] = force_reg (SImode, operands[
2]);
852 if (REG_P (operands[
2]))
854 rtx tmp = gen_reg_rtx (<MODE>mode);
855 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
856 convert_to_mode (<VEL>mode,
859 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[
0], operands[
1],
868 (define_expand "lshr<mode>
3"
869 [(match_operand:VDQ_I
0 "register_operand" "")
870 (match_operand:VDQ_I
1 "register_operand" "")
871 (match_operand:SI
2 "general_operand" "")]
874 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
877 if (CONST_INT_P (operands[
2]))
879 shift_amount = INTVAL (operands[
2]);
880 if (shift_amount >
0 && shift_amount <= bit_width)
882 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
884 emit_insn (gen_aarch64_simd_lshr<mode> (operands[
0],
890 operands[
2] = force_reg (SImode, operands[
2]);
892 else if (MEM_P (operands[
2]))
894 operands[
2] = force_reg (SImode, operands[
2]);
897 if (REG_P (operands[
2]))
899 rtx tmp = gen_reg_rtx (SImode);
900 rtx tmp1 = gen_reg_rtx (<MODE>mode);
901 emit_insn (gen_negsi2 (tmp, operands[
2]));
902 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
903 convert_to_mode (<VEL>mode,
905 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[
0],
915 (define_expand "ashr<mode>
3"
916 [(match_operand:VDQ_I
0 "register_operand" "")
917 (match_operand:VDQ_I
1 "register_operand" "")
918 (match_operand:SI
2 "general_operand" "")]
921 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
924 if (CONST_INT_P (operands[
2]))
926 shift_amount = INTVAL (operands[
2]);
927 if (shift_amount >
0 && shift_amount <= bit_width)
929 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
931 emit_insn (gen_aarch64_simd_ashr<mode> (operands[
0],
937 operands[
2] = force_reg (SImode, operands[
2]);
939 else if (MEM_P (operands[
2]))
941 operands[
2] = force_reg (SImode, operands[
2]);
944 if (REG_P (operands[
2]))
946 rtx tmp = gen_reg_rtx (SImode);
947 rtx tmp1 = gen_reg_rtx (<MODE>mode);
948 emit_insn (gen_negsi2 (tmp, operands[
2]));
949 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
950 convert_to_mode (<VEL>mode,
952 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[
0],
962 (define_expand "vashl<mode>
3"
963 [(match_operand:VDQ_I
0 "register_operand" "")
964 (match_operand:VDQ_I
1 "register_operand" "")
965 (match_operand:VDQ_I
2 "register_operand" "")]
968 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[
0], operands[
1],
973 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
974 ;; Negating individual lanes most certainly offsets the
975 ;; gain from vectorization.
976 (define_expand "vashr<mode>
3"
977 [(match_operand:VDQ_BHSI
0 "register_operand" "")
978 (match_operand:VDQ_BHSI
1 "register_operand" "")
979 (match_operand:VDQ_BHSI
2 "register_operand" "")]
982 rtx neg = gen_reg_rtx (<MODE>mode);
983 emit (gen_neg<mode>
2 (neg, operands[
2]));
984 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[
0], operands[
1],
990 (define_expand "aarch64_ashr_simddi"
991 [(match_operand:DI
0 "register_operand" "=w")
992 (match_operand:DI
1 "register_operand" "w")
993 (match_operand:SI
2 "aarch64_shift_imm64_di" "")]
996 /* An arithmetic shift right by
64 fills the result with copies of the sign
997 bit, just like asr by
63 - however the standard pattern does not handle
999 if (INTVAL (operands[
2]) ==
64)
1000 operands[
2] = GEN_INT (
63);
1001 emit_insn (gen_ashrdi3 (operands[
0], operands[
1], operands[
2]));
1006 (define_expand "vlshr<mode>
3"
1007 [(match_operand:VDQ_BHSI
0 "register_operand" "")
1008 (match_operand:VDQ_BHSI
1 "register_operand" "")
1009 (match_operand:VDQ_BHSI
2 "register_operand" "")]
1012 rtx neg = gen_reg_rtx (<MODE>mode);
1013 emit (gen_neg<mode>
2 (neg, operands[
2]));
1014 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[
0], operands[
1],
1019 (define_expand "aarch64_lshr_simddi"
1020 [(match_operand:DI
0 "register_operand" "=w")
1021 (match_operand:DI
1 "register_operand" "w")
1022 (match_operand:SI
2 "aarch64_shift_imm64_di" "")]
1025 if (INTVAL (operands[
2]) ==
64)
1026 emit_move_insn (operands[
0], const0_rtx);
1028 emit_insn (gen_lshrdi3 (operands[
0], operands[
1], operands[
2]));
1033 (define_expand "vec_set<mode>"
1034 [(match_operand:VDQ_BHSI
0 "register_operand")
1035 (match_operand:<VEL>
1 "register_operand")
1036 (match_operand:SI
2 "immediate_operand")]
1039 HOST_WIDE_INT elem = (HOST_WIDE_INT)
1 << INTVAL (operands[
2]);
1040 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[
0], operands[
1],
1041 GEN_INT (elem), operands[
0]));
1046 ;; For
64-bit modes we use ushl/r, as this does not require a SIMD zero.
1047 (define_insn "vec_shr_<mode>"
1048 [(set (match_operand:VD
0 "register_operand" "=w")
1049 (unspec:VD [(match_operand:VD
1 "register_operand" "w")
1050 (match_operand:SI
2 "immediate_operand" "i")]
1054 if (BYTES_BIG_ENDIAN)
1055 return "shl %d0, %d1, %
2";
1057 return "ushr %d0, %d1, %
2";
1059 [(set_attr "type" "neon_shift_imm")]
1062 (define_insn "aarch64_simd_vec_setv2di"
1063 [(set (match_operand:V2DI
0 "register_operand" "=w,w")
1066 (match_operand:DI
1 "register_operand" "r,w"))
1067 (match_operand:V2DI
3 "register_operand" "
0,
0")
1068 (match_operand:SI
2 "immediate_operand" "i,i")))]
1071 int elt = ENDIAN_LANE_N (
2, exact_log2 (INTVAL (operands[
2])));
1072 operands[
2] = GEN_INT ((HOST_WIDE_INT)
1 << elt);
1073 switch (which_alternative)
1076 return "ins
\\t%
0.d[%p2], %
1";
1078 return "ins
\\t%
0.d[%p2], %
1.d[
0]";
1083 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1086 (define_expand "vec_setv2di"
1087 [(match_operand:V2DI
0 "register_operand")
1088 (match_operand:DI
1 "register_operand")
1089 (match_operand:SI
2 "immediate_operand")]
1092 HOST_WIDE_INT elem = (HOST_WIDE_INT)
1 << INTVAL (operands[
2]);
1093 emit_insn (gen_aarch64_simd_vec_setv2di (operands[
0], operands[
1],
1094 GEN_INT (elem), operands[
0]));
1099 (define_insn "aarch64_simd_vec_set<mode>"
1100 [(set (match_operand:VDQF_F16
0 "register_operand" "=w")
1102 (vec_duplicate:VDQF_F16
1103 (match_operand:<VEL>
1 "register_operand" "w"))
1104 (match_operand:VDQF_F16
3 "register_operand" "
0")
1105 (match_operand:SI
2 "immediate_operand" "i")))]
1108 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[
2])));
1110 operands[
2] = GEN_INT ((HOST_WIDE_INT)
1 << elt);
1111 return "ins
\t%
0.<Vetype>[%p2], %
1.<Vetype>[
0]";
1113 [(set_attr "type" "neon_ins<q>")]
1116 (define_expand "vec_set<mode>"
1117 [(match_operand:VDQF_F16
0 "register_operand" "+w")
1118 (match_operand:<VEL>
1 "register_operand" "w")
1119 (match_operand:SI
2 "immediate_operand" "")]
1122 HOST_WIDE_INT elem = (HOST_WIDE_INT)
1 << INTVAL (operands[
2]);
1123 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[
0], operands[
1],
1124 GEN_INT (elem), operands[
0]));
1130 (define_insn "aarch64_mla<mode>"
1131 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1132 (plus:VDQ_BHSI (mult:VDQ_BHSI
1133 (match_operand:VDQ_BHSI
2 "register_operand" "w")
1134 (match_operand:VDQ_BHSI
3 "register_operand" "w"))
1135 (match_operand:VDQ_BHSI
1 "register_operand" "
0")))]
1137 "mla
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vtype>"
1138 [(set_attr "type" "neon_mla_<Vetype><q>")]
1141 (define_insn "*aarch64_mla_elt<mode>"
1142 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1145 (vec_duplicate:VDQHS
1147 (match_operand:VDQHS
1 "register_operand" "<h_con>")
1148 (parallel [(match_operand:SI
2 "immediate_operand")])))
1149 (match_operand:VDQHS
3 "register_operand" "w"))
1150 (match_operand:VDQHS
4 "register_operand" "
0")))]
1153 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
1154 return "mla
\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vtype>[%
2]";
1156 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1159 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1160 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1163 (vec_duplicate:VDQHS
1165 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
1166 (parallel [(match_operand:SI
2 "immediate_operand")])))
1167 (match_operand:VDQHS
3 "register_operand" "w"))
1168 (match_operand:VDQHS
4 "register_operand" "
0")))]
1171 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
1172 return "mla
\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vtype>[%
2]";
1174 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1177 (define_insn "*aarch64_mla_elt_merge<mode>"
1178 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1180 (mult:VDQHS (vec_duplicate:VDQHS
1181 (match_operand:<VEL>
1 "register_operand" "<h_con>"))
1182 (match_operand:VDQHS
2 "register_operand" "w"))
1183 (match_operand:VDQHS
3 "register_operand" "
0")))]
1185 "mla
\t%
0.<Vtype>, %
2.<Vtype>, %
1.<Vetype>[
0]"
1186 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1189 (define_insn "aarch64_mls<mode>"
1190 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1191 (minus:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "
0")
1192 (mult:VDQ_BHSI (match_operand:VDQ_BHSI
2 "register_operand" "w")
1193 (match_operand:VDQ_BHSI
3 "register_operand" "w"))))]
1195 "mls
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vtype>"
1196 [(set_attr "type" "neon_mla_<Vetype><q>")]
1199 (define_insn "*aarch64_mls_elt<mode>"
1200 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1202 (match_operand:VDQHS
4 "register_operand" "
0")
1204 (vec_duplicate:VDQHS
1206 (match_operand:VDQHS
1 "register_operand" "<h_con>")
1207 (parallel [(match_operand:SI
2 "immediate_operand")])))
1208 (match_operand:VDQHS
3 "register_operand" "w"))))]
1211 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
1212 return "mls
\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vtype>[%
2]";
1214 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1217 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1218 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1220 (match_operand:VDQHS
4 "register_operand" "
0")
1222 (vec_duplicate:VDQHS
1224 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
1225 (parallel [(match_operand:SI
2 "immediate_operand")])))
1226 (match_operand:VDQHS
3 "register_operand" "w"))))]
1229 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
1230 return "mls
\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vtype>[%
2]";
1232 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1235 (define_insn "*aarch64_mls_elt_merge<mode>"
1236 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1238 (match_operand:VDQHS
1 "register_operand" "
0")
1239 (mult:VDQHS (vec_duplicate:VDQHS
1240 (match_operand:<VEL>
2 "register_operand" "<h_con>"))
1241 (match_operand:VDQHS
3 "register_operand" "w"))))]
1243 "mls
\t%
0.<Vtype>, %
3.<Vtype>, %
2.<Vetype>[
0]"
1244 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1247 ;; Max/Min operations.
1248 (define_insn "<su><maxmin><mode>
3"
1249 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1250 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "w")
1251 (match_operand:VDQ_BHSI
2 "register_operand" "w")))]
1253 "<su><maxmin>
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1254 [(set_attr "type" "neon_minmax<q>")]
1257 (define_expand "<su><maxmin>v2di3"
1258 [(set (match_operand:V2DI
0 "register_operand" "")
1259 (MAXMIN:V2DI (match_operand:V2DI
1 "register_operand" "")
1260 (match_operand:V2DI
2 "register_operand" "")))]
1263 enum rtx_code cmp_operator;
1284 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[
1], operands[
2]);
1285 emit_insn (gen_vcondv2div2di (operands[
0], operands[
1],
1286 operands[
2], cmp_fmt, operands[
1], operands[
2]));
1290 ;; Pairwise Integer Max/Min operations.
1291 (define_insn "aarch64_<maxmin_uns>p<mode>"
1292 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1293 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI
1 "register_operand" "w")
1294 (match_operand:VDQ_BHSI
2 "register_operand" "w")]
1297 "<maxmin_uns_op>p
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1298 [(set_attr "type" "neon_minmax<q>")]
1301 ;; Pairwise FP Max/Min operations.
1302 (define_insn "aarch64_<maxmin_uns>p<mode>"
1303 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1304 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")
1305 (match_operand:VHSDF
2 "register_operand" "w")]
1308 "<maxmin_uns_op>p
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1309 [(set_attr "type" "neon_minmax<q>")]
1312 ;; vec_concat gives a new vector with the low elements from operand
1, and
1313 ;; the high elements from operand
2. That is to say, given op1 = { a, b }
1314 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1315 ;; What that means, is that the RTL descriptions of the below patterns
1316 ;; need to change depending on endianness.
1318 ;; Move to the low architectural bits of the register.
1319 ;; On little-endian this is { operand, zeroes }
1320 ;; On big-endian this is { zeroes, operand }
1322 (define_insn "move_lo_quad_internal_<mode>"
1323 [(set (match_operand:VQ_NO2E
0 "register_operand" "=w,w,w")
1325 (match_operand:<VHALF>
1 "register_operand" "w,r,r")
1326 (vec_duplicate:<VHALF> (const_int
0))))]
1327 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1332 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1333 (set_attr "simd" "yes,*,yes")
1334 (set_attr "fp" "*,yes,*")
1335 (set_attr "length" "
4")]
1338 (define_insn "move_lo_quad_internal_<mode>"
1339 [(set (match_operand:VQ_2E
0 "register_operand" "=w,w,w")
1341 (match_operand:<VHALF>
1 "register_operand" "w,r,r")
1343 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1348 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1349 (set_attr "simd" "yes,*,yes")
1350 (set_attr "fp" "*,yes,*")
1351 (set_attr "length" "
4")]
1354 (define_insn "move_lo_quad_internal_be_<mode>"
1355 [(set (match_operand:VQ_NO2E
0 "register_operand" "=w,w,w")
1357 (vec_duplicate:<VHALF> (const_int
0))
1358 (match_operand:<VHALF>
1 "register_operand" "w,r,r")))]
1359 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1364 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1365 (set_attr "simd" "yes,*,yes")
1366 (set_attr "fp" "*,yes,*")
1367 (set_attr "length" "
4")]
1370 (define_insn "move_lo_quad_internal_be_<mode>"
1371 [(set (match_operand:VQ_2E
0 "register_operand" "=w,w,w")
1374 (match_operand:<VHALF>
1 "register_operand" "w,r,r")))]
1375 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1380 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1381 (set_attr "simd" "yes,*,yes")
1382 (set_attr "fp" "*,yes,*")
1383 (set_attr "length" "
4")]
1386 (define_expand "move_lo_quad_<mode>"
1387 [(match_operand:VQ
0 "register_operand")
1388 (match_operand:VQ
1 "register_operand")]
1391 if (BYTES_BIG_ENDIAN)
1392 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[
0], operands[
1]));
1394 emit_insn (gen_move_lo_quad_internal_<mode> (operands[
0], operands[
1]));
1399 ;; Move operand1 to the high architectural bits of the register, keeping
1400 ;; the low architectural bits of operand2.
1401 ;; For little-endian this is { operand2, operand1 }
1402 ;; For big-endian this is { operand1, operand2 }
1404 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1405 [(set (match_operand:VQ
0 "register_operand" "+w,w")
1409 (match_operand:VQ
2 "vect_par_cnst_lo_half" ""))
1410 (match_operand:<VHALF>
1 "register_operand" "w,r")))]
1411 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1413 ins
\\t%
0.d[
1], %
1.d[
0]
1415 [(set_attr "type" "neon_ins")]
1418 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1419 [(set (match_operand:VQ
0 "register_operand" "+w,w")
1421 (match_operand:<VHALF>
1 "register_operand" "w,r")
1424 (match_operand:VQ
2 "vect_par_cnst_lo_half" ""))))]
1425 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1427 ins
\\t%
0.d[
1], %
1.d[
0]
1429 [(set_attr "type" "neon_ins")]
1432 (define_expand "move_hi_quad_<mode>"
1433 [(match_operand:VQ
0 "register_operand" "")
1434 (match_operand:<VHALF>
1 "register_operand" "")]
1437 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1438 if (BYTES_BIG_ENDIAN)
1439 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[
0],
1442 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[
0],
1447 ;; Narrowing operations.
1450 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1451 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
1452 (truncate:<VNARROWQ> (match_operand:VQN
1 "register_operand" "w")))]
1454 "xtn
\\t%
0.<Vntype>, %
1.<Vtype>"
1455 [(set_attr "type" "neon_shift_imm_narrow_q")]
1458 (define_expand "vec_pack_trunc_<mode>"
1459 [(match_operand:<VNARROWD>
0 "register_operand" "")
1460 (match_operand:VDN
1 "register_operand" "")
1461 (match_operand:VDN
2 "register_operand" "")]
1464 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1465 int lo = BYTES_BIG_ENDIAN ?
2 :
1;
1466 int hi = BYTES_BIG_ENDIAN ?
1 :
2;
1468 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1469 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1470 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[
0], tempreg));
1476 (define_insn "vec_pack_trunc_<mode>"
1477 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=&w")
1478 (vec_concat:<VNARROWQ2>
1479 (truncate:<VNARROWQ> (match_operand:VQN
1 "register_operand" "w"))
1480 (truncate:<VNARROWQ> (match_operand:VQN
2 "register_operand" "w"))))]
1483 if (BYTES_BIG_ENDIAN)
1484 return "xtn
\\t%
0.<Vntype>, %
2.<Vtype>\;xtn2
\\t%
0.<V2ntype>, %
1.<Vtype>";
1486 return "xtn
\\t%
0.<Vntype>, %
1.<Vtype>\;xtn2
\\t%
0.<V2ntype>, %
2.<Vtype>";
1488 [(set_attr "type" "multiple")
1489 (set_attr "length" "
8")]
1492 ;; Widening operations.
1494 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1495 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1496 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1497 (match_operand:VQW
1 "register_operand" "w")
1498 (match_operand:VQW
2 "vect_par_cnst_lo_half" "")
1501 "<su>shll
\t%
0.<Vwtype>, %
1.<Vhalftype>,
0"
1502 [(set_attr "type" "neon_shift_imm_long")]
1505 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1506 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1507 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1508 (match_operand:VQW
1 "register_operand" "w")
1509 (match_operand:VQW
2 "vect_par_cnst_hi_half" "")
1512 "<su>shll2
\t%
0.<Vwtype>, %
1.<Vtype>,
0"
1513 [(set_attr "type" "neon_shift_imm_long")]
1516 (define_expand "vec_unpack<su>_hi_<mode>"
1517 [(match_operand:<VWIDE>
0 "register_operand" "")
1518 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))]
1521 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1522 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[
0],
1528 (define_expand "vec_unpack<su>_lo_<mode>"
1529 [(match_operand:<VWIDE>
0 "register_operand" "")
1530 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand" ""))]
1533 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1534 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[
0],
1540 ;; Widening arithmetic.
1542 (define_insn "*aarch64_<su>mlal_lo<mode>"
1543 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1546 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1547 (match_operand:VQW
2 "register_operand" "w")
1548 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
1549 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1550 (match_operand:VQW
4 "register_operand" "w")
1552 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
1554 "<su>mlal
\t%
0.<Vwtype>, %
2.<Vhalftype>, %
4.<Vhalftype>"
1555 [(set_attr "type" "neon_mla_<Vetype>_long")]
1558 (define_insn "*aarch64_<su>mlal_hi<mode>"
1559 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1562 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1563 (match_operand:VQW
2 "register_operand" "w")
1564 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
1565 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1566 (match_operand:VQW
4 "register_operand" "w")
1568 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
1570 "<su>mlal2
\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vtype>"
1571 [(set_attr "type" "neon_mla_<Vetype>_long")]
1574 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1575 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1577 (match_operand:<VWIDE>
1 "register_operand" "
0")
1579 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1580 (match_operand:VQW
2 "register_operand" "w")
1581 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
1582 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1583 (match_operand:VQW
4 "register_operand" "w")
1586 "<su>mlsl
\t%
0.<Vwtype>, %
2.<Vhalftype>, %
4.<Vhalftype>"
1587 [(set_attr "type" "neon_mla_<Vetype>_long")]
1590 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1591 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1593 (match_operand:<VWIDE>
1 "register_operand" "
0")
1595 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1596 (match_operand:VQW
2 "register_operand" "w")
1597 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
1598 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599 (match_operand:VQW
4 "register_operand" "w")
1602 "<su>mlsl2
\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vtype>"
1603 [(set_attr "type" "neon_mla_<Vetype>_long")]
1606 (define_insn "*aarch64_<su>mlal<mode>"
1607 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1611 (match_operand:VD_BHSI
1 "register_operand" "w"))
1613 (match_operand:VD_BHSI
2 "register_operand" "w")))
1614 (match_operand:<VWIDE>
3 "register_operand" "
0")))]
1616 "<su>mlal
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
1617 [(set_attr "type" "neon_mla_<Vetype>_long")]
1620 (define_insn "*aarch64_<su>mlsl<mode>"
1621 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1623 (match_operand:<VWIDE>
1 "register_operand" "
0")
1626 (match_operand:VD_BHSI
2 "register_operand" "w"))
1628 (match_operand:VD_BHSI
3 "register_operand" "w")))))]
1630 "<su>mlsl
\t%
0.<Vwtype>, %
2.<Vtype>, %
3.<Vtype>"
1631 [(set_attr "type" "neon_mla_<Vetype>_long")]
1634 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1635 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1636 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1637 (match_operand:VQW
1 "register_operand" "w")
1638 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
1639 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1640 (match_operand:VQW
2 "register_operand" "w")
1643 "<su>mull
\\t%
0.<Vwtype>, %
1.<Vhalftype>, %
2.<Vhalftype>"
1644 [(set_attr "type" "neon_mul_<Vetype>_long")]
1647 (define_expand "vec_widen_<su>mult_lo_<mode>"
1648 [(match_operand:<VWIDE>
0 "register_operand" "")
1649 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand" ""))
1650 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand" ""))]
1653 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1654 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[
0],
1661 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1662 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1663 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1664 (match_operand:VQW
1 "register_operand" "w")
1665 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
1666 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1667 (match_operand:VQW
2 "register_operand" "w")
1670 "<su>mull2
\\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
1671 [(set_attr "type" "neon_mul_<Vetype>_long")]
1674 (define_expand "vec_widen_<su>mult_hi_<mode>"
1675 [(match_operand:<VWIDE>
0 "register_operand" "")
1676 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand" ""))
1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand" ""))]
1680 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1681 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[
0],
1689 ;; FP vector operations.
1690 ;; AArch64 AdvSIMD supports single-precision (
32-bit) and
1691 ;; double-precision (
64-bit) floating-point data types and arithmetic as
1692 ;; defined by the IEEE
754-
2008 standard. This makes them vectorizable
1693 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1695 ;; Floating-point operations can raise an exception. Vectorizing such
1696 ;; operations are safe because of reasons explained below.
1698 ;; ARMv8 permits an extension to enable trapped floating-point
1699 ;; exception handling, however this is an optional feature. In the
1700 ;; event of a floating-point exception being raised by vectorised
1702 ;;
1. If trapped floating-point exceptions are available, then a trap
1703 ;; will be taken when any lane raises an enabled exception. A trap
1704 ;; handler may determine which lane raised the exception.
1705 ;;
2. Alternatively a sticky exception flag is set in the
1706 ;; floating-point status register (FPSR). Software may explicitly
1707 ;; test the exception flags, in which case the tests will either
1708 ;; prevent vectorisation, allowing precise identification of the
1709 ;; failing operation, or if tested outside of vectorisable regions
1710 ;; then the specific operation and lane are not of interest.
1712 ;; FP arithmetic operations.
1714 (define_insn "add<mode>
3"
1715 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1716 (plus:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
1717 (match_operand:VHSDF
2 "register_operand" "w")))]
1719 "fadd
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1720 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1723 (define_insn "sub<mode>
3"
1724 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1725 (minus:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
1726 (match_operand:VHSDF
2 "register_operand" "w")))]
1728 "fsub
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1729 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1732 (define_insn "mul<mode>
3"
1733 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1734 (mult:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
1735 (match_operand:VHSDF
2 "register_operand" "w")))]
1737 "fmul
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1738 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1741 (define_expand "div<mode>
3"
1742 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1743 (div:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
1744 (match_operand:VHSDF
2 "register_operand" "w")))]
1747 if (aarch64_emit_approx_div (operands[
0], operands[
1], operands[
2]))
1750 operands[
1] = force_reg (<MODE>mode, operands[
1]);
1753 (define_insn "*div<mode>
3"
1754 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1755 (div:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
1756 (match_operand:VHSDF
2 "register_operand" "w")))]
1758 "fdiv
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1759 [(set_attr "type" "neon_fp_div_<stype><q>")]
1762 (define_insn "neg<mode>
2"
1763 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1764 (neg:VHSDF (match_operand:VHSDF
1 "register_operand" "w")))]
1766 "fneg
\\t%
0.<Vtype>, %
1.<Vtype>"
1767 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1770 (define_insn "abs<mode>
2"
1771 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1772 (abs:VHSDF (match_operand:VHSDF
1 "register_operand" "w")))]
1774 "fabs
\\t%
0.<Vtype>, %
1.<Vtype>"
1775 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1778 (define_insn "fma<mode>
4"
1779 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1780 (fma:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
1781 (match_operand:VHSDF
2 "register_operand" "w")
1782 (match_operand:VHSDF
3 "register_operand" "
0")))]
1784 "fmla
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1785 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1788 (define_insn "*aarch64_fma4_elt<mode>"
1789 [(set (match_operand:VDQF
0 "register_operand" "=w")
1793 (match_operand:VDQF
1 "register_operand" "<h_con>")
1794 (parallel [(match_operand:SI
2 "immediate_operand")])))
1795 (match_operand:VDQF
3 "register_operand" "w")
1796 (match_operand:VDQF
4 "register_operand" "
0")))]
1799 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
1800 return "fmla
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vtype>[%
2]";
1802 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1805 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1806 [(set (match_operand:VDQSF
0 "register_operand" "=w")
1808 (vec_duplicate:VDQSF
1810 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
1811 (parallel [(match_operand:SI
2 "immediate_operand")])))
1812 (match_operand:VDQSF
3 "register_operand" "w")
1813 (match_operand:VDQSF
4 "register_operand" "
0")))]
1816 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
1817 return "fmla
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vtype>[%
2]";
1819 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1822 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1823 [(set (match_operand:VMUL
0 "register_operand" "=w")
1826 (match_operand:<VEL>
1 "register_operand" "<h_con>"))
1827 (match_operand:VMUL
2 "register_operand" "w")
1828 (match_operand:VMUL
3 "register_operand" "
0")))]
1830 "fmla
\t%
0.<Vtype>, %
2.<Vtype>, %
1.<Vetype>[
0]"
1831 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1834 (define_insn "*aarch64_fma4_elt_to_64v2df"
1835 [(set (match_operand:DF
0 "register_operand" "=w")
1838 (match_operand:V2DF
1 "register_operand" "w")
1839 (parallel [(match_operand:SI
2 "immediate_operand")]))
1840 (match_operand:DF
3 "register_operand" "w")
1841 (match_operand:DF
4 "register_operand" "
0")))]
1844 operands[
2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[
2]));
1845 return "fmla
\\t%
0.2d, %
3.2d, %
1.2d[%
2]";
1847 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1850 (define_insn "fnma<mode>
4"
1851 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1853 (neg:VHSDF (match_operand:VHSDF
1 "register_operand" "w"))
1854 (match_operand:VHSDF
2 "register_operand" "w")
1855 (match_operand:VHSDF
3 "register_operand" "
0")))]
1857 "fmls
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1858 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1861 (define_insn "*aarch64_fnma4_elt<mode>"
1862 [(set (match_operand:VDQF
0 "register_operand" "=w")
1865 (match_operand:VDQF
3 "register_operand" "w"))
1868 (match_operand:VDQF
1 "register_operand" "<h_con>")
1869 (parallel [(match_operand:SI
2 "immediate_operand")])))
1870 (match_operand:VDQF
4 "register_operand" "
0")))]
1873 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
1874 return "fmls
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vtype>[%
2]";
1876 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1879 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1880 [(set (match_operand:VDQSF
0 "register_operand" "=w")
1883 (match_operand:VDQSF
3 "register_operand" "w"))
1884 (vec_duplicate:VDQSF
1886 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
1887 (parallel [(match_operand:SI
2 "immediate_operand")])))
1888 (match_operand:VDQSF
4 "register_operand" "
0")))]
1891 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
1892 return "fmls
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vtype>[%
2]";
1894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1897 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1898 [(set (match_operand:VMUL
0 "register_operand" "=w")
1901 (match_operand:VMUL
2 "register_operand" "w"))
1903 (match_operand:<VEL>
1 "register_operand" "<h_con>"))
1904 (match_operand:VMUL
3 "register_operand" "
0")))]
1906 "fmls
\t%
0.<Vtype>, %
2.<Vtype>, %
1.<Vetype>[
0]"
1907 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1910 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1911 [(set (match_operand:DF
0 "register_operand" "=w")
1914 (match_operand:V2DF
1 "register_operand" "w")
1915 (parallel [(match_operand:SI
2 "immediate_operand")]))
1917 (match_operand:DF
3 "register_operand" "w"))
1918 (match_operand:DF
4 "register_operand" "
0")))]
1921 operands[
2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[
2]));
1922 return "fmls
\\t%
0.2d, %
3.2d, %
1.2d[%
2]";
1924 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1927 ;; Vector versions of the floating-point frint patterns.
1928 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1929 (define_insn "<frint_pattern><mode>
2"
1930 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1931 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")]
1934 "frint<frint_suffix>
\\t%
0.<Vtype>, %
1.<Vtype>"
1935 [(set_attr "type" "neon_fp_round_<stype><q>")]
1938 ;; Vector versions of the fcvt standard patterns.
1939 ;; Expands to lbtrunc, lround, lceil, lfloor
1940 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>
2"
1941 [(set (match_operand:<FCVT_TARGET>
0 "register_operand" "=w")
1942 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1943 [(match_operand:VHSDF
1 "register_operand" "w")]
1946 "fcvt<frint_suffix><su>
\\t%
0.<Vtype>, %
1.<Vtype>"
1947 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1950 ;; HF Scalar variants of related SIMD instructions.
1951 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1952 [(set (match_operand:HI
0 "register_operand" "=w")
1953 (FIXUORS:HI (unspec:HF [(match_operand:HF
1 "register_operand" "w")]
1955 "TARGET_SIMD_F16INST"
1956 "fcvt<frint_suffix><su>
\t%h0, %h1"
1957 [(set_attr "type" "neon_fp_to_int_s")]
1960 (define_insn "<optab>_trunchfhi2"
1961 [(set (match_operand:HI
0 "register_operand" "=w")
1962 (FIXUORS:HI (match_operand:HF
1 "register_operand" "w")))]
1963 "TARGET_SIMD_F16INST"
1964 "fcvtz<su>
\t%h0, %h1"
1965 [(set_attr "type" "neon_fp_to_int_s")]
1968 (define_insn "<optab>hihf2"
1969 [(set (match_operand:HF
0 "register_operand" "=w")
1970 (FLOATUORS:HF (match_operand:HI
1 "register_operand" "w")))]
1971 "TARGET_SIMD_F16INST"
1972 "<su_optab>cvtf
\t%h0, %h1"
1973 [(set_attr "type" "neon_int_to_fp_s")]
1976 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>
2_mult"
1977 [(set (match_operand:<FCVT_TARGET>
0 "register_operand" "=w")
1978 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1980 (match_operand:VDQF
1 "register_operand" "w")
1981 (match_operand:VDQF
2 "aarch64_fp_vec_pow2" ""))]
1984 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[
2]),
1,
1985 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1987 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[
2]);
1989 snprintf (buf,
64, "fcvtz<su>
\\t%%
0.<Vtype>, %%
1.<Vtype>, #%d", fbits);
1990 output_asm_insn (buf, operands);
1993 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1996 (define_expand "<optab><VHSDF:mode><fcvt_target>
2"
1997 [(set (match_operand:<FCVT_TARGET>
0 "register_operand")
1998 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1999 [(match_operand:VHSDF
1 "register_operand")]
2004 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>
2"
2005 [(set (match_operand:<FCVT_TARGET>
0 "register_operand")
2006 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2007 [(match_operand:VHSDF
1 "register_operand")]
2012 (define_expand "ftrunc<VHSDF:mode>
2"
2013 [(set (match_operand:VHSDF
0 "register_operand")
2014 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand")]
2019 (define_insn "<optab><fcvt_target><VHSDF:mode>
2"
2020 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2022 (match_operand:<FCVT_TARGET>
1 "register_operand" "w")))]
2024 "<su_optab>cvtf
\\t%
0.<Vtype>, %
1.<Vtype>"
2025 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2028 ;; Conversions between vectors of floats and doubles.
2029 ;; Contains a mix of patterns to match standard pattern names
2030 ;; and those for intrinsics.
2032 ;; Float widening operations.
2034 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2035 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2036 (float_extend:<VWIDE> (vec_select:<VHALF>
2037 (match_operand:VQ_HSF
1 "register_operand" "w")
2038 (match_operand:VQ_HSF
2 "vect_par_cnst_lo_half" "")
2041 "fcvtl
\\t%
0.<Vwtype>, %
1.<Vhalftype>"
2042 [(set_attr "type" "neon_fp_cvt_widen_s")]
2045 ;; Convert between fixed-point and floating-point (vector modes)
2047 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>
3"
2048 [(set (match_operand:<VHSDF:FCVT_TARGET>
0 "register_operand" "=w")
2049 (unspec:<VHSDF:FCVT_TARGET>
2050 [(match_operand:VHSDF
1 "register_operand" "w")
2051 (match_operand:SI
2 "immediate_operand" "i")]
2054 "<FCVT_F2FIXED:fcvt_fixed_insn>
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, #%
2"
2055 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2058 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>
3"
2059 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET>
0 "register_operand" "=w")
2060 (unspec:<VDQ_HSDI:FCVT_TARGET>
2061 [(match_operand:VDQ_HSDI
1 "register_operand" "w")
2062 (match_operand:SI
2 "immediate_operand" "i")]
2065 "<FCVT_FIXED2F:fcvt_fixed_insn>
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, #%
2"
2066 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2069 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2070 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2071 ;; the meaning of HI and LO changes depending on the target endianness.
2072 ;; While elsewhere we map the higher numbered elements of a vector to
2073 ;; the lower architectural lanes of the vector, for these patterns we want
2074 ;; to always treat "hi" as referring to the higher architectural lanes.
2075 ;; Consequently, while the patterns below look inconsistent with our
2076 ;; other big-endian patterns their behavior is as required.
2078 (define_expand "vec_unpacks_lo_<mode>"
2079 [(match_operand:<VWIDE>
0 "register_operand" "")
2080 (match_operand:VQ_HSF
1 "register_operand" "")]
2083 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2084 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[
0],
2090 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2091 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2092 (float_extend:<VWIDE> (vec_select:<VHALF>
2093 (match_operand:VQ_HSF
1 "register_operand" "w")
2094 (match_operand:VQ_HSF
2 "vect_par_cnst_hi_half" "")
2097 "fcvtl2
\\t%
0.<Vwtype>, %
1.<Vtype>"
2098 [(set_attr "type" "neon_fp_cvt_widen_s")]
2101 (define_expand "vec_unpacks_hi_<mode>"
2102 [(match_operand:<VWIDE>
0 "register_operand" "")
2103 (match_operand:VQ_HSF
1 "register_operand" "")]
2106 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2107 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[
0],
2112 (define_insn "aarch64_float_extend_lo_<Vwide>"
2113 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2114 (float_extend:<VWIDE>
2115 (match_operand:VDF
1 "register_operand" "w")))]
2117 "fcvtl
\\t%
0<Vmwtype>, %
1<Vmtype>"
2118 [(set_attr "type" "neon_fp_cvt_widen_s")]
2121 ;; Float narrowing operations.
2123 (define_insn "aarch64_float_truncate_lo_<mode>"
2124 [(set (match_operand:VDF
0 "register_operand" "=w")
2126 (match_operand:<VWIDE>
1 "register_operand" "w")))]
2128 "fcvtn
\\t%
0.<Vtype>, %
1<Vmwtype>"
2129 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2132 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2133 [(set (match_operand:<VDBL>
0 "register_operand" "=w")
2135 (match_operand:VDF
1 "register_operand" "
0")
2137 (match_operand:<VWIDE>
2 "register_operand" "w"))))]
2138 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2139 "fcvtn2
\\t%
0.<Vdtype>, %
2<Vmwtype>"
2140 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2143 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2144 [(set (match_operand:<VDBL>
0 "register_operand" "=w")
2147 (match_operand:<VWIDE>
2 "register_operand" "w"))
2148 (match_operand:VDF
1 "register_operand" "
0")))]
2149 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2150 "fcvtn2
\\t%
0.<Vdtype>, %
2<Vmwtype>"
2151 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2154 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2155 [(match_operand:<VDBL>
0 "register_operand" "=w")
2156 (match_operand:VDF
1 "register_operand" "
0")
2157 (match_operand:<VWIDE>
2 "register_operand" "w")]
2160 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2161 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2162 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2163 emit_insn (gen (operands[
0], operands[
1], operands[
2]));
2168 (define_expand "vec_pack_trunc_v2df"
2169 [(set (match_operand:V4SF
0 "register_operand")
2171 (float_truncate:V2SF
2172 (match_operand:V2DF
1 "register_operand"))
2173 (float_truncate:V2SF
2174 (match_operand:V2DF
2 "register_operand"))
2178 rtx tmp = gen_reg_rtx (V2SFmode);
2179 int lo = BYTES_BIG_ENDIAN ?
2 :
1;
2180 int hi = BYTES_BIG_ENDIAN ?
1 :
2;
2182 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2183 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[
0],
2184 tmp, operands[hi]));
2189 (define_expand "vec_pack_trunc_df"
2190 [(set (match_operand:V2SF
0 "register_operand")
2193 (match_operand:DF
1 "register_operand"))
2195 (match_operand:DF
2 "register_operand"))
2199 rtx tmp = gen_reg_rtx (V2SFmode);
2200 int lo = BYTES_BIG_ENDIAN ?
2 :
1;
2201 int hi = BYTES_BIG_ENDIAN ?
1 :
2;
2203 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2204 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2205 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[
0], tmp));
2211 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2213 ;; a = (b < c) ? b : c;
2214 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2215 ;; either explicitly or indirectly via -ffast-math.
2217 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2218 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2219 ;; operand will be returned when both operands are zero (i.e. they may not
2220 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2221 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2224 (define_insn "<su><maxmin><mode>
3"
2225 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2226 (FMAXMIN:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
2227 (match_operand:VHSDF
2 "register_operand" "w")))]
2229 "f<maxmin>nm
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2230 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2233 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2234 ;; fmaxnm and fminnm are used for the fmax<mode>
3 standard pattern names,
2235 ;; which implement the IEEE fmax ()/fmin () functions.
2236 (define_insn "<maxmin_uns><mode>
3"
2237 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2238 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")
2239 (match_operand:VHSDF
2 "register_operand" "w")]
2242 "<maxmin_uns_op>
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2243 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2246 ;; 'across lanes' add.
2248 (define_expand "reduc_plus_scal_<mode>"
2249 [(match_operand:<VEL>
0 "register_operand" "=w")
2250 (unspec:VDQ_I [(match_operand:VDQ_I
1 "register_operand" "w")]
2254 rtx elt = aarch64_endian_lane_rtx (<MODE>mode,
0);
2255 rtx scratch = gen_reg_rtx (<MODE>mode);
2256 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[
1]));
2257 emit_insn (gen_aarch64_get_lane<mode> (operands[
0], scratch, elt));
2262 (define_insn "aarch64_faddp<mode>"
2263 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2264 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")
2265 (match_operand:VHSDF
2 "register_operand" "w")]
2268 "faddp
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2269 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2272 (define_insn "aarch64_reduc_plus_internal<mode>"
2273 [(set (match_operand:VDQV
0 "register_operand" "=w")
2274 (unspec:VDQV [(match_operand:VDQV
1 "register_operand" "w")]
2277 "add<VDQV:vp>
\\t%<Vetype>
0, %
1.<Vtype>"
2278 [(set_attr "type" "neon_reduc_add<q>")]
2281 (define_insn "aarch64_reduc_plus_internalv2si"
2282 [(set (match_operand:V2SI
0 "register_operand" "=w")
2283 (unspec:V2SI [(match_operand:V2SI
1 "register_operand" "w")]
2286 "addp
\\t%
0.2s, %
1.2s, %
1.2s"
2287 [(set_attr "type" "neon_reduc_add")]
2290 (define_insn "reduc_plus_scal_<mode>"
2291 [(set (match_operand:<VEL>
0 "register_operand" "=w")
2292 (unspec:<VEL> [(match_operand:V2F
1 "register_operand" "w")]
2295 "faddp
\\t%<Vetype>
0, %
1.<Vtype>"
2296 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2299 (define_expand "reduc_plus_scal_v4sf"
2300 [(set (match_operand:SF
0 "register_operand")
2301 (unspec:V4SF [(match_operand:V4SF
1 "register_operand")]
2305 rtx elt = aarch64_endian_lane_rtx (V4SFmode,
0);
2306 rtx scratch = gen_reg_rtx (V4SFmode);
2307 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[
1], operands[
1]));
2308 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2309 emit_insn (gen_aarch64_get_lanev4sf (operands[
0], scratch, elt));
2313 (define_insn "clrsb<mode>
2"
2314 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
2315 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "w")))]
2317 "cls
\\t%
0.<Vtype>, %
1.<Vtype>"
2318 [(set_attr "type" "neon_cls<q>")]
2321 (define_insn "clz<mode>
2"
2322 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
2323 (clz:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "w")))]
2325 "clz
\\t%
0.<Vtype>, %
1.<Vtype>"
2326 [(set_attr "type" "neon_cls<q>")]
2329 (define_insn "popcount<mode>
2"
2330 [(set (match_operand:VB
0 "register_operand" "=w")
2331 (popcount:VB (match_operand:VB
1 "register_operand" "w")))]
2333 "cnt
\\t%
0.<Vbtype>, %
1.<Vbtype>"
2334 [(set_attr "type" "neon_cnt<q>")]
2337 ;; 'across lanes' max and min ops.
2339 ;; Template for outputting a scalar, so we can create __builtins which can be
2340 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2341 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2342 [(match_operand:<VEL>
0 "register_operand")
2343 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand")]
2347 rtx elt = aarch64_endian_lane_rtx (<MODE>mode,
0);
2348 rtx scratch = gen_reg_rtx (<MODE>mode);
2349 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2351 emit_insn (gen_aarch64_get_lane<mode> (operands[
0], scratch, elt));
2356 ;; Likewise for integer cases, signed and unsigned.
2357 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2358 [(match_operand:<VEL>
0 "register_operand")
2359 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI
1 "register_operand")]
2363 rtx elt = aarch64_endian_lane_rtx (<MODE>mode,
0);
2364 rtx scratch = gen_reg_rtx (<MODE>mode);
2365 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2367 emit_insn (gen_aarch64_get_lane<mode> (operands[
0], scratch, elt));
2372 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2373 [(set (match_operand:VDQV_S
0 "register_operand" "=w")
2374 (unspec:VDQV_S [(match_operand:VDQV_S
1 "register_operand" "w")]
2377 "<maxmin_uns_op>v
\\t%<Vetype>
0, %
1.<Vtype>"
2378 [(set_attr "type" "neon_reduc_minmax<q>")]
2381 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2382 [(set (match_operand:V2SI
0 "register_operand" "=w")
2383 (unspec:V2SI [(match_operand:V2SI
1 "register_operand" "w")]
2386 "<maxmin_uns_op>p
\\t%
0.2s, %
1.2s, %
1.2s"
2387 [(set_attr "type" "neon_reduc_minmax")]
2390 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2391 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2392 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")]
2395 "<maxmin_uns_op><vp>
\\t%<Vetype>
0, %
1.<Vtype>"
2396 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2399 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2401 ;; Operand
1 is the mask, operands
2 and
3 are the bitfields from which
2404 ;; Thus our BSL is of the form:
2405 ;; op0 = bsl (mask, op2, op3)
2406 ;; We can use any of:
2409 ;; bsl mask, op1, op2
2410 ;; if (op0 = op1) (so
1-bits in mask choose bits from op2, else op0)
2411 ;; bit op0, op2, mask
2412 ;; if (op0 = op2) (so
0-bits in mask choose bits from op1, else op0)
2413 ;; bif op0, op1, mask
2415 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2416 ;; Some forms of straight-line code may generate the equivalent form
2417 ;; in *aarch64_simd_bsl<mode>_alt.
2419 (define_insn "aarch64_simd_bsl<mode>_internal"
2420 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w,w")
2424 (match_operand:<V_INT_EQUIV>
3 "register_operand" "w,
0,w")
2425 (match_operand:VDQ_I
2 "register_operand" "w,w,
0"))
2426 (match_operand:VDQ_I
1 "register_operand" "
0,w,w"))
2427 (match_dup:<V_INT_EQUIV>
3)
2431 bsl
\\t%
0.<Vbtype>, %
2.<Vbtype>, %
3.<Vbtype>
2432 bit
\\t%
0.<Vbtype>, %
2.<Vbtype>, %
1.<Vbtype>
2433 bif
\\t%
0.<Vbtype>, %
3.<Vbtype>, %
1.<Vbtype>"
2434 [(set_attr "type" "neon_bsl<q>")]
2437 ;; We need this form in addition to the above pattern to match the case
2438 ;; when combine tries merging three insns such that the second operand of
2439 ;; the outer XOR matches the second operand of the inner XOR rather than
2440 ;; the first. The two are equivalent but since recog doesn't try all
2441 ;; permutations of commutative operations, we have to have a separate pattern.
2443 (define_insn "*aarch64_simd_bsl<mode>_alt"
2444 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w,w")
2448 (match_operand:VDQ_I
3 "register_operand" "w,w,
0")
2449 (match_operand:<V_INT_EQUIV>
2 "register_operand" "w,
0,w"))
2450 (match_operand:VDQ_I
1 "register_operand" "
0,w,w"))
2451 (match_dup:<V_INT_EQUIV>
2)))]
2454 bsl
\\t%
0.<Vbtype>, %
3.<Vbtype>, %
2.<Vbtype>
2455 bit
\\t%
0.<Vbtype>, %
3.<Vbtype>, %
1.<Vbtype>
2456 bif
\\t%
0.<Vbtype>, %
2.<Vbtype>, %
1.<Vbtype>"
2457 [(set_attr "type" "neon_bsl<q>")]
2460 ;; DImode is special, we want to avoid computing operations which are
2461 ;; more naturally computed in general purpose registers in the vector
2462 ;; registers. If we do that, we need to move all three operands from general
2463 ;; purpose registers to vector registers, then back again. However, we
2464 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2465 ;; optimizations based on the component operations of a BSL.
2467 ;; That means we need a splitter back to the individual operations, if they
2468 ;; would be better calculated on the integer side.
2470 (define_insn_and_split "aarch64_simd_bsldi_internal"
2471 [(set (match_operand:DI
0 "register_operand" "=w,w,w,&r")
2475 (match_operand:DI
3 "register_operand" "w,
0,w,r")
2476 (match_operand:DI
2 "register_operand" "w,w,
0,r"))
2477 (match_operand:DI
1 "register_operand" "
0,w,w,r"))
2482 bsl
\\t%
0.8b, %
2.8b, %
3.8b
2483 bit
\\t%
0.8b, %
2.8b, %
1.8b
2484 bif
\\t%
0.8b, %
3.8b, %
1.8b
2486 "&& REG_P (operands[
0]) && GP_REGNUM_P (REGNO (operands[
0]))"
2487 [(match_dup
1) (match_dup
1) (match_dup
2) (match_dup
3)]
2489 /* Split back to individual operations. If we're before reload, and
2490 able to create a temporary register, do so. If we're after reload,
2491 we've got an early-clobber destination register, so use that.
2492 Otherwise, we can't create pseudos and we can't yet guarantee that
2493 operands[
0] is safe to write, so FAIL to split. */
2496 if (reload_completed)
2497 scratch = operands[
0];
2498 else if (can_create_pseudo_p ())
2499 scratch = gen_reg_rtx (DImode);
2503 emit_insn (gen_xordi3 (scratch, operands[
2], operands[
3]));
2504 emit_insn (gen_anddi3 (scratch, scratch, operands[
1]));
2505 emit_insn (gen_xordi3 (operands[
0], scratch, operands[
3]));
2508 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2509 (set_attr "length" "
4,
4,
4,
12")]
2512 (define_insn_and_split "aarch64_simd_bsldi_alt"
2513 [(set (match_operand:DI
0 "register_operand" "=w,w,w,&r")
2517 (match_operand:DI
3 "register_operand" "w,w,
0,r")
2518 (match_operand:DI
2 "register_operand" "w,
0,w,r"))
2519 (match_operand:DI
1 "register_operand" "
0,w,w,r"))
2524 bsl
\\t%
0.8b, %
3.8b, %
2.8b
2525 bit
\\t%
0.8b, %
3.8b, %
1.8b
2526 bif
\\t%
0.8b, %
2.8b, %
1.8b
2528 "&& REG_P (operands[
0]) && GP_REGNUM_P (REGNO (operands[
0]))"
2529 [(match_dup
0) (match_dup
1) (match_dup
2) (match_dup
3)]
2531 /* Split back to individual operations. If we're before reload, and
2532 able to create a temporary register, do so. If we're after reload,
2533 we've got an early-clobber destination register, so use that.
2534 Otherwise, we can't create pseudos and we can't yet guarantee that
2535 operands[
0] is safe to write, so FAIL to split. */
2538 if (reload_completed)
2539 scratch = operands[
0];
2540 else if (can_create_pseudo_p ())
2541 scratch = gen_reg_rtx (DImode);
2545 emit_insn (gen_xordi3 (scratch, operands[
2], operands[
3]));
2546 emit_insn (gen_anddi3 (scratch, scratch, operands[
1]));
2547 emit_insn (gen_xordi3 (operands[
0], scratch, operands[
2]));
2550 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2551 (set_attr "length" "
4,
4,
4,
12")]
2554 (define_expand "aarch64_simd_bsl<mode>"
2555 [(match_operand:VALLDIF
0 "register_operand")
2556 (match_operand:<V_INT_EQUIV>
1 "register_operand")
2557 (match_operand:VALLDIF
2 "register_operand")
2558 (match_operand:VALLDIF
3 "register_operand")]
2561 /* We can't alias operands together if they have different modes. */
2562 rtx tmp = operands[
0];
2563 if (FLOAT_MODE_P (<MODE>mode))
2565 operands[
2] = gen_lowpart (<V_INT_EQUIV>mode, operands[
2]);
2566 operands[
3] = gen_lowpart (<V_INT_EQUIV>mode, operands[
3]);
2567 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2569 operands[
1] = gen_lowpart (<V_INT_EQUIV>mode, operands[
1]);
2570 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2574 if (tmp != operands[
0])
2575 emit_move_insn (operands[
0], gen_lowpart (<MODE>mode, tmp));
2580 (define_expand "vcond_mask_<mode><v_int_equiv>"
2581 [(match_operand:VALLDI
0 "register_operand")
2582 (match_operand:VALLDI
1 "nonmemory_operand")
2583 (match_operand:VALLDI
2 "nonmemory_operand")
2584 (match_operand:<V_INT_EQUIV>
3 "register_operand")]
2587 /* If we have (a = (P) ? -
1 :
0);
2588 Then we can simply move the generated mask (result must be int). */
2589 if (operands[
1] == CONSTM1_RTX (<MODE>mode)
2590 && operands[
2] == CONST0_RTX (<MODE>mode))
2591 emit_move_insn (operands[
0], operands[
3]);
2592 /* Similarly, (a = (P) ?
0 : -
1) is just inverting the generated mask. */
2593 else if (operands[
1] == CONST0_RTX (<MODE>mode)
2594 && operands[
2] == CONSTM1_RTX (<MODE>mode))
2595 emit_insn (gen_one_cmpl<v_int_equiv>
2 (operands[
0], operands[
3]));
2598 if (!REG_P (operands[
1]))
2599 operands[
1] = force_reg (<MODE>mode, operands[
1]);
2600 if (!REG_P (operands[
2]))
2601 operands[
2] = force_reg (<MODE>mode, operands[
2]);
2602 emit_insn (gen_aarch64_simd_bsl<mode> (operands[
0], operands[
3],
2603 operands[
1], operands[
2]));
2609 ;; Patterns comparing two vectors to produce a mask.
2611 (define_expand "vec_cmp<mode><mode>"
2612 [(set (match_operand:VSDQ_I_DI
0 "register_operand")
2613 (match_operator
1 "comparison_operator"
2614 [(match_operand:VSDQ_I_DI
2 "register_operand")
2615 (match_operand:VSDQ_I_DI
3 "nonmemory_operand")]))]
2618 rtx mask = operands[
0];
2619 enum rtx_code code = GET_CODE (operands[
1]);
2629 if (operands[
3] == CONST0_RTX (<MODE>mode))
2634 if (!REG_P (operands[
3]))
2635 operands[
3] = force_reg (<MODE>mode, operands[
3]);
2643 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[
2], operands[
3]));
2647 emit_insn (gen_aarch64_cmge<mode> (mask, operands[
2], operands[
3]));
2651 emit_insn (gen_aarch64_cmle<mode> (mask, operands[
2], operands[
3]));
2655 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[
2], operands[
3]));
2659 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[
3], operands[
2]));
2663 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[
2], operands[
3]));
2667 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[
3], operands[
2]));
2671 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[
2], operands[
3]));
2675 /* Handle NE as !EQ. */
2676 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[
2], operands[
3]));
2677 emit_insn (gen_one_cmpl<v_int_equiv>
2 (mask, mask));
2681 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[
2], operands[
3]));
2691 (define_expand "vec_cmp<mode><v_int_equiv>"
2692 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand")
2693 (match_operator
1 "comparison_operator"
2694 [(match_operand:VDQF
2 "register_operand")
2695 (match_operand:VDQF
3 "nonmemory_operand")]))]
2698 int use_zero_form =
0;
2699 enum rtx_code code = GET_CODE (operands[
1]);
2700 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2702 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2711 if (operands[
3] == CONST0_RTX (<MODE>mode))
2718 if (!REG_P (operands[
3]))
2719 operands[
3] = force_reg (<MODE>mode, operands[
3]);
2729 comparison = gen_aarch64_cmlt<mode>;
2734 std::swap (operands[
2], operands[
3]);
2738 comparison = gen_aarch64_cmgt<mode>;
2743 comparison = gen_aarch64_cmle<mode>;
2748 std::swap (operands[
2], operands[
3]);
2752 comparison = gen_aarch64_cmge<mode>;
2756 comparison = gen_aarch64_cmeq<mode>;
2774 /* All of the above must not raise any FP exceptions. Thus we first
2775 check each operand for NaNs and force any elements containing NaN to
2776 zero before using them in the compare.
2777 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2778 (cm<cc> (isnan (a) ?
0.0 : a,
2779 isnan (b) ?
0.0 : b))
2780 We use the following transformations for doing the comparisions:
2784 a UNLT b -> b GT a. */
2786 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2787 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2788 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2789 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[
2], operands[
2]));
2790 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[
3], operands[
3]));
2791 emit_insn (gen_and<v_int_equiv>
3 (tmp2, tmp0, tmp1));
2792 emit_insn (gen_and<v_int_equiv>
3 (tmp0, tmp0,
2793 lowpart_subreg (<V_INT_EQUIV>mode,
2796 emit_insn (gen_and<v_int_equiv>
3 (tmp1, tmp1,
2797 lowpart_subreg (<V_INT_EQUIV>mode,
2800 gcc_assert (comparison != NULL);
2801 emit_insn (comparison (operands[
0],
2802 lowpart_subreg (<MODE>mode,
2803 tmp0, <V_INT_EQUIV>mode),
2804 lowpart_subreg (<MODE>mode,
2805 tmp1, <V_INT_EQUIV>mode)));
2806 emit_insn (gen_orn<v_int_equiv>
3 (operands[
0], tmp2, operands[
0]));
2816 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2817 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2823 a NE b -> ~(a EQ b) */
2824 gcc_assert (comparison != NULL);
2825 emit_insn (comparison (operands[
0], operands[
2], operands[
3]));
2827 emit_insn (gen_one_cmpl<v_int_equiv>
2 (operands[
0], operands[
0]));
2831 /* LTGT is not guranteed to not generate a FP exception. So let's
2832 go the faster way : ((a > b) || (b > a)). */
2833 emit_insn (gen_aarch64_cmgt<mode> (operands[
0],
2834 operands[
2], operands[
3]));
2835 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[
3], operands[
2]));
2836 emit_insn (gen_ior<v_int_equiv>
3 (operands[
0], operands[
0], tmp));
2842 /* cmeq (a, a) & cmeq (b, b). */
2843 emit_insn (gen_aarch64_cmeq<mode> (operands[
0],
2844 operands[
2], operands[
2]));
2845 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[
3], operands[
3]));
2846 emit_insn (gen_and<v_int_equiv>
3 (operands[
0], operands[
0], tmp));
2848 if (code == UNORDERED)
2849 emit_insn (gen_one_cmpl<v_int_equiv>
2 (operands[
0], operands[
0]));
2850 else if (code == UNEQ)
2852 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[
2], operands[
3]));
2853 emit_insn (gen_orn<v_int_equiv>
3 (operands[
0], operands[
0], tmp));
2864 (define_expand "vec_cmpu<mode><mode>"
2865 [(set (match_operand:VSDQ_I_DI
0 "register_operand")
2866 (match_operator
1 "comparison_operator"
2867 [(match_operand:VSDQ_I_DI
2 "register_operand")
2868 (match_operand:VSDQ_I_DI
3 "nonmemory_operand")]))]
2871 emit_insn (gen_vec_cmp<mode><mode> (operands[
0], operands[
1],
2872 operands[
2], operands[
3]));
2876 (define_expand "vcond<mode><mode>"
2877 [(set (match_operand:VALLDI
0 "register_operand")
2878 (if_then_else:VALLDI
2879 (match_operator
3 "comparison_operator"
2880 [(match_operand:VALLDI
4 "register_operand")
2881 (match_operand:VALLDI
5 "nonmemory_operand")])
2882 (match_operand:VALLDI
1 "nonmemory_operand")
2883 (match_operand:VALLDI
2 "nonmemory_operand")))]
2886 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2887 enum rtx_code code = GET_CODE (operands[
3]);
2889 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2890 it as well as switch operands
1/
2 in order to avoid the additional
2894 operands[
3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[
3]),
2895 operands[
4], operands[
5]);
2896 std::swap (operands[
1], operands[
2]);
2898 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[
3],
2899 operands[
4], operands[
5]));
2900 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[
0], operands[
1],
2901 operands[
2], mask));
2906 (define_expand "vcond<v_cmp_mixed><mode>"
2907 [(set (match_operand:<V_cmp_mixed>
0 "register_operand")
2908 (if_then_else:<V_cmp_mixed>
2909 (match_operator
3 "comparison_operator"
2910 [(match_operand:VDQF_COND
4 "register_operand")
2911 (match_operand:VDQF_COND
5 "nonmemory_operand")])
2912 (match_operand:<V_cmp_mixed>
1 "nonmemory_operand")
2913 (match_operand:<V_cmp_mixed>
2 "nonmemory_operand")))]
2916 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2917 enum rtx_code code = GET_CODE (operands[
3]);
2919 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2920 it as well as switch operands
1/
2 in order to avoid the additional
2924 operands[
3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[
3]),
2925 operands[
4], operands[
5]);
2926 std::swap (operands[
1], operands[
2]);
2928 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[
3],
2929 operands[
4], operands[
5]));
2930 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2931 operands[
0], operands[
1],
2932 operands[
2], mask));
2937 (define_expand "vcondu<mode><mode>"
2938 [(set (match_operand:VSDQ_I_DI
0 "register_operand")
2939 (if_then_else:VSDQ_I_DI
2940 (match_operator
3 "comparison_operator"
2941 [(match_operand:VSDQ_I_DI
4 "register_operand")
2942 (match_operand:VSDQ_I_DI
5 "nonmemory_operand")])
2943 (match_operand:VSDQ_I_DI
1 "nonmemory_operand")
2944 (match_operand:VSDQ_I_DI
2 "nonmemory_operand")))]
2947 rtx mask = gen_reg_rtx (<MODE>mode);
2948 enum rtx_code code = GET_CODE (operands[
3]);
2950 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2951 it as well as switch operands
1/
2 in order to avoid the additional
2955 operands[
3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[
3]),
2956 operands[
4], operands[
5]);
2957 std::swap (operands[
1], operands[
2]);
2959 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[
3],
2960 operands[
4], operands[
5]));
2961 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[
0], operands[
1],
2962 operands[
2], mask));
2966 (define_expand "vcondu<mode><v_cmp_mixed>"
2967 [(set (match_operand:VDQF
0 "register_operand")
2969 (match_operator
3 "comparison_operator"
2970 [(match_operand:<V_cmp_mixed>
4 "register_operand")
2971 (match_operand:<V_cmp_mixed>
5 "nonmemory_operand")])
2972 (match_operand:VDQF
1 "nonmemory_operand")
2973 (match_operand:VDQF
2 "nonmemory_operand")))]
2976 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2977 enum rtx_code code = GET_CODE (operands[
3]);
2979 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2980 it as well as switch operands
1/
2 in order to avoid the additional
2984 operands[
3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[
3]),
2985 operands[
4], operands[
5]);
2986 std::swap (operands[
1], operands[
2]);
2988 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2990 operands[
4], operands[
5]));
2991 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[
0], operands[
1],
2992 operands[
2], mask));
2996 ;; Patterns for AArch64 SIMD Intrinsics.
2998 ;; Lane extraction with sign extension to general purpose register.
2999 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3000 [(set (match_operand:GPI
0 "register_operand" "=r")
3003 (match_operand:VDQQH
1 "register_operand" "w")
3004 (parallel [(match_operand:SI
2 "immediate_operand" "i")]))))]
3007 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
3008 return "smov
\\t%<GPI:w>
0, %
1.<VDQQH:Vetype>[%
2]";
3010 [(set_attr "type" "neon_to_gp<q>")]
3013 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3014 [(set (match_operand:SI
0 "register_operand" "=r")
3017 (match_operand:VDQQH
1 "register_operand" "w")
3018 (parallel [(match_operand:SI
2 "immediate_operand" "i")]))))]
3021 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
3022 return "umov
\\t%w0, %
1.<Vetype>[%
2]";
3024 [(set_attr "type" "neon_to_gp<q>")]
3027 ;; Lane extraction of a value, neither sign nor zero extension
3028 ;; is guaranteed so upper bits should be considered undefined.
3029 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3030 (define_insn "aarch64_get_lane<mode>"
3031 [(set (match_operand:<VEL>
0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3033 (match_operand:VALL_F16
1 "register_operand" "w, w, w")
3034 (parallel [(match_operand:SI
2 "immediate_operand" "i, i, i")])))]
3037 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
3038 switch (which_alternative)
3041 return "umov
\\t%<vwcore>
0, %
1.<Vetype>[%
2]";
3043 return "dup
\\t%<Vetype>
0, %
1.<Vetype>[%
2]";
3045 return "st1
\\t{%
1.<Vetype>}[%
2], %
0";
3050 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3053 (define_insn "load_pair_lanes<mode>"
3054 [(set (match_operand:<VDBL>
0 "register_operand" "=w")
3056 (match_operand:VDC
1 "memory_operand" "Utq")
3057 (match_operand:VDC
2 "memory_operand" "m")))]
3058 "TARGET_SIMD && !STRICT_ALIGNMENT
3059 && rtx_equal_p (XEXP (operands[
2],
0),
3060 plus_constant (Pmode,
3061 XEXP (operands[
1],
0),
3062 GET_MODE_SIZE (<MODE>mode)))"
3064 [(set_attr "type" "neon_load1_1reg_q")]
3067 (define_insn "store_pair_lanes<mode>"
3068 [(set (match_operand:<VDBL>
0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3070 (match_operand:VDC
1 "register_operand" "w, r")
3071 (match_operand:VDC
2 "register_operand" "w, r")))]
3075 stp
\\t%x1, %x2, %y0"
3076 [(set_attr "type" "neon_stp, store_16")]
3079 ;; In this insn, operand
1 should be low, and operand
2 the high part of the
3082 (define_insn "*aarch64_combinez<mode>"
3083 [(set (match_operand:<VDBL>
0 "register_operand" "=w,w,w")
3085 (match_operand:VDC
1 "general_operand" "w,?r,m")
3086 (match_operand:VDC
2 "aarch64_simd_or_scalar_imm_zero")))]
3087 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3092 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3093 (set_attr "simd" "yes,*,yes")
3094 (set_attr "fp" "*,yes,*")]
3097 (define_insn "*aarch64_combinez_be<mode>"
3098 [(set (match_operand:<VDBL>
0 "register_operand" "=w,w,w")
3100 (match_operand:VDC
2 "aarch64_simd_or_scalar_imm_zero")
3101 (match_operand:VDC
1 "general_operand" "w,?r,m")))]
3102 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3107 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3108 (set_attr "simd" "yes,*,yes")
3109 (set_attr "fp" "*,yes,*")]
3112 (define_expand "aarch64_combine<mode>"
3113 [(match_operand:<VDBL>
0 "register_operand")
3114 (match_operand:VDC
1 "register_operand")
3115 (match_operand:VDC
2 "register_operand")]
3118 aarch64_split_simd_combine (operands[
0], operands[
1], operands[
2]);
3124 (define_expand "aarch64_simd_combine<mode>"
3125 [(match_operand:<VDBL>
0 "register_operand")
3126 (match_operand:VDC
1 "register_operand")
3127 (match_operand:VDC
2 "register_operand")]
3130 emit_insn (gen_move_lo_quad_<Vdbl> (operands[
0], operands[
1]));
3131 emit_insn (gen_move_hi_quad_<Vdbl> (operands[
0], operands[
2]));
3134 [(set_attr "type" "multiple")]
3137 ;; <su><addsub>l<q>.
3139 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3140 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3141 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3142 (match_operand:VQW
1 "register_operand" "w")
3143 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
3144 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3145 (match_operand:VQW
2 "register_operand" "w")
3148 "<ANY_EXTEND:su><ADDSUB:optab>l2
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
3149 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3152 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3153 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3154 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3155 (match_operand:VQW
1 "register_operand" "w")
3156 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
3157 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3158 (match_operand:VQW
2 "register_operand" "w")
3161 "<ANY_EXTEND:su><ADDSUB:optab>l
\t%
0.<Vwtype>, %
1.<Vhalftype>, %
2.<Vhalftype>"
3162 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3166 (define_expand "aarch64_saddl2<mode>"
3167 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3168 (match_operand:VQW
1 "register_operand" "w")
3169 (match_operand:VQW
2 "register_operand" "w")]
3172 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3173 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[
0], operands[
1],
3178 (define_expand "aarch64_uaddl2<mode>"
3179 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3180 (match_operand:VQW
1 "register_operand" "w")
3181 (match_operand:VQW
2 "register_operand" "w")]
3184 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3185 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[
0], operands[
1],
3190 (define_expand "aarch64_ssubl2<mode>"
3191 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3192 (match_operand:VQW
1 "register_operand" "w")
3193 (match_operand:VQW
2 "register_operand" "w")]
3196 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3197 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[
0], operands[
1],
3202 (define_expand "aarch64_usubl2<mode>"
3203 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3204 (match_operand:VQW
1 "register_operand" "w")
3205 (match_operand:VQW
2 "register_operand" "w")]
3208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3209 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[
0], operands[
1],
3214 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3215 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3216 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3217 (match_operand:VD_BHSI
1 "register_operand" "w"))
3219 (match_operand:VD_BHSI
2 "register_operand" "w"))))]
3221 "<ANY_EXTEND:su><ADDSUB:optab>l
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
3222 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3225 ;; <su><addsub>w<q>.
3227 (define_expand "widen_ssum<mode>
3"
3228 [(set (match_operand:<VDBLW>
0 "register_operand" "")
3229 (plus:<VDBLW> (sign_extend:<VDBLW>
3230 (match_operand:VQW
1 "register_operand" ""))
3231 (match_operand:<VDBLW>
2 "register_operand" "")))]
3234 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3235 rtx temp = gen_reg_rtx (GET_MODE (operands[
0]));
3237 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[
2],
3239 emit_insn (gen_aarch64_saddw2<mode> (operands[
0], temp, operands[
1]));
3244 (define_expand "widen_ssum<mode>
3"
3245 [(set (match_operand:<VWIDE>
0 "register_operand" "")
3246 (plus:<VWIDE> (sign_extend:<VWIDE>
3247 (match_operand:VD_BHSI
1 "register_operand" ""))
3248 (match_operand:<VWIDE>
2 "register_operand" "")))]
3251 emit_insn (gen_aarch64_saddw<mode> (operands[
0], operands[
2], operands[
1]));
3255 (define_expand "widen_usum<mode>
3"
3256 [(set (match_operand:<VDBLW>
0 "register_operand" "")
3257 (plus:<VDBLW> (zero_extend:<VDBLW>
3258 (match_operand:VQW
1 "register_operand" ""))
3259 (match_operand:<VDBLW>
2 "register_operand" "")))]
3262 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3263 rtx temp = gen_reg_rtx (GET_MODE (operands[
0]));
3265 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[
2],
3267 emit_insn (gen_aarch64_uaddw2<mode> (operands[
0], temp, operands[
1]));
3272 (define_expand "widen_usum<mode>
3"
3273 [(set (match_operand:<VWIDE>
0 "register_operand" "")
3274 (plus:<VWIDE> (zero_extend:<VWIDE>
3275 (match_operand:VD_BHSI
1 "register_operand" ""))
3276 (match_operand:<VWIDE>
2 "register_operand" "")))]
3279 emit_insn (gen_aarch64_uaddw<mode> (operands[
0], operands[
2], operands[
1]));
3283 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3284 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3285 (ADDSUB:<VWIDE> (match_operand:<VWIDE>
1 "register_operand" "w")
3287 (match_operand:VD_BHSI
2 "register_operand" "w"))))]
3289 "<ANY_EXTEND:su><ADDSUB:optab>w
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vtype>"
3290 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3293 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3294 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3295 (ADDSUB:<VWIDE> (match_operand:<VWIDE>
1 "register_operand" "w")
3298 (match_operand:VQW
2 "register_operand" "w")
3299 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))))]
3301 "<ANY_EXTEND:su><ADDSUB:optab>w
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vhalftype>"
3302 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3305 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3306 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3307 (ADDSUB:<VWIDE> (match_operand:<VWIDE>
1 "register_operand" "w")
3310 (match_operand:VQW
2 "register_operand" "w")
3311 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))))]
3313 "<ANY_EXTEND:su><ADDSUB:optab>w2
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vtype>"
3314 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3317 (define_expand "aarch64_saddw2<mode>"
3318 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3319 (match_operand:<VWIDE>
1 "register_operand" "w")
3320 (match_operand:VQW
2 "register_operand" "w")]
3323 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3324 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[
0], operands[
1],
3329 (define_expand "aarch64_uaddw2<mode>"
3330 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3331 (match_operand:<VWIDE>
1 "register_operand" "w")
3332 (match_operand:VQW
2 "register_operand" "w")]
3335 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3336 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[
0], operands[
1],
3342 (define_expand "aarch64_ssubw2<mode>"
3343 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3344 (match_operand:<VWIDE>
1 "register_operand" "w")
3345 (match_operand:VQW
2 "register_operand" "w")]
3348 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3349 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[
0], operands[
1],
3354 (define_expand "aarch64_usubw2<mode>"
3355 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3356 (match_operand:<VWIDE>
1 "register_operand" "w")
3357 (match_operand:VQW
2 "register_operand" "w")]
3360 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3361 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[
0], operands[
1],
3366 ;; <su><r>h<addsub>.
3368 (define_insn "aarch64_<sur>h<addsub><mode>"
3369 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
3370 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI
1 "register_operand" "w")
3371 (match_operand:VDQ_BHSI
2 "register_operand" "w")]
3374 "<sur>h<addsub>
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
3375 [(set_attr "type" "neon_<addsub>_halve<q>")]
3378 ;; <r><addsub>hn<q>.
3380 (define_insn "aarch64_<sur><addsub>hn<mode>"
3381 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
3382 (unspec:<VNARROWQ> [(match_operand:VQN
1 "register_operand" "w")
3383 (match_operand:VQN
2 "register_operand" "w")]
3386 "<sur><addsub>hn
\\t%
0.<Vntype>, %
1.<Vtype>, %
2.<Vtype>"
3387 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3390 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3391 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
3392 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ>
1 "register_operand" "
0")
3393 (match_operand:VQN
2 "register_operand" "w")
3394 (match_operand:VQN
3 "register_operand" "w")]
3397 "<sur><addsub>hn2
\\t%
0.<V2ntype>, %
2.<Vtype>, %
3.<Vtype>"
3398 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3403 (define_insn "aarch64_pmul<mode>"
3404 [(set (match_operand:VB
0 "register_operand" "=w")
3405 (unspec:VB [(match_operand:VB
1 "register_operand" "w")
3406 (match_operand:VB
2 "register_operand" "w")]
3409 "pmul
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
3410 [(set_attr "type" "neon_mul_<Vetype><q>")]
3415 (define_insn "aarch64_fmulx<mode>"
3416 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
3418 [(match_operand:VHSDF_HSDF
1 "register_operand" "w")
3419 (match_operand:VHSDF_HSDF
2 "register_operand" "w")]
3422 "fmulx
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
3423 [(set_attr "type" "neon_fp_mul_<stype>")]
3426 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3428 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3429 [(set (match_operand:VDQSF
0 "register_operand" "=w")
3431 [(match_operand:VDQSF
1 "register_operand" "w")
3432 (vec_duplicate:VDQSF
3434 (match_operand:<VSWAP_WIDTH>
2 "register_operand" "w")
3435 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))]
3439 operands[
3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
3]));
3440 return "fmulx
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
3442 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3445 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3447 (define_insn "*aarch64_mulx_elt<mode>"
3448 [(set (match_operand:VDQF
0 "register_operand" "=w")
3450 [(match_operand:VDQF
1 "register_operand" "w")
3453 (match_operand:VDQF
2 "register_operand" "w")
3454 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))]
3458 operands[
3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
3]));
3459 return "fmulx
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
3461 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3466 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3467 [(set (match_operand:VHSDF
0 "register_operand" "=w")
3469 [(match_operand:VHSDF
1 "register_operand" "w")
3470 (vec_duplicate:VHSDF
3471 (match_operand:<VEL>
2 "register_operand" "<h_con>"))]
3474 "fmulx
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[
0]";
3475 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3478 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3479 ;; vmulxd_lane_f64 == vmulx_lane_f64
3480 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3482 (define_insn "*aarch64_vgetfmulx<mode>"
3483 [(set (match_operand:<VEL>
0 "register_operand" "=w")
3485 [(match_operand:<VEL>
1 "register_operand" "w")
3487 (match_operand:VDQF
2 "register_operand" "w")
3488 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
3492 operands[
3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
3]));
3493 return "fmulx
\t%<Vetype>
0, %<Vetype>
1, %
2.<Vetype>[%
3]";
3495 [(set_attr "type" "fmul<Vetype>")]
3499 (define_insn "aarch64_<su_optab><optab><mode>"
3500 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
3501 (BINQOPS:VSDQ_I (match_operand:VSDQ_I
1 "register_operand" "w")
3502 (match_operand:VSDQ_I
2 "register_operand" "w")))]
3504 "<su_optab><optab>
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
3505 [(set_attr "type" "neon_<optab><q>")]
3508 ;; suqadd and usqadd
3510 (define_insn "aarch64_<sur>qadd<mode>"
3511 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
3512 (unspec:VSDQ_I [(match_operand:VSDQ_I
1 "register_operand" "
0")
3513 (match_operand:VSDQ_I
2 "register_operand" "w")]
3516 "<sur>qadd
\\t%<v>
0<Vmtype>, %<v>
2<Vmtype>"
3517 [(set_attr "type" "neon_qadd<q>")]
3522 (define_insn "aarch64_sqmovun<mode>"
3523 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
3524 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI
1 "register_operand" "w")]
3527 "sqxtun
\\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>"
3528 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3531 ;; sqmovn and uqmovn
3533 (define_insn "aarch64_<sur>qmovn<mode>"
3534 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
3535 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI
1 "register_operand" "w")]
3538 "<sur>qxtn
\\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>"
3539 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3544 (define_insn "aarch64_s<optab><mode>"
3545 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
3547 (match_operand:VSDQ_I
1 "register_operand" "w")))]
3549 "s<optab>
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>"
3550 [(set_attr "type" "neon_<optab><q>")]
3555 (define_insn "aarch64_sq<r>dmulh<mode>"
3556 [(set (match_operand:VSDQ_HSI
0 "register_operand" "=w")
3558 [(match_operand:VSDQ_HSI
1 "register_operand" "w")
3559 (match_operand:VSDQ_HSI
2 "register_operand" "w")]
3562 "sq<r>dmulh
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
3563 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3568 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3569 [(set (match_operand:VDQHS
0 "register_operand" "=w")
3571 [(match_operand:VDQHS
1 "register_operand" "w")
3573 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
3574 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
3578 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
3579 return
\"sq<r>dmulh
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[%
3]
\";"
3580 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3583 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3584 [(set (match_operand:VDQHS
0 "register_operand" "=w")
3586 [(match_operand:VDQHS
1 "register_operand" "w")
3588 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
3589 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
3593 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
3594 return
\"sq<r>dmulh
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[%
3]
\";"
3595 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3598 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3599 [(set (match_operand:SD_HSI
0 "register_operand" "=w")
3601 [(match_operand:SD_HSI
1 "register_operand" "w")
3603 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
3604 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
3608 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
3609 return
\"sq<r>dmulh
\\t%<v>
0, %<v>
1, %
2.<v>[%
3]
\";"
3610 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3613 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3614 [(set (match_operand:SD_HSI
0 "register_operand" "=w")
3616 [(match_operand:SD_HSI
1 "register_operand" "w")
3618 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
3619 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
3623 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
3624 return
\"sq<r>dmulh
\\t%<v>
0, %<v>
1, %
2.<v>[%
3]
\";"
3625 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3630 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3631 [(set (match_operand:VSDQ_HSI
0 "register_operand" "=w")
3633 [(match_operand:VSDQ_HSI
1 "register_operand" "
0")
3634 (match_operand:VSDQ_HSI
2 "register_operand" "w")
3635 (match_operand:VSDQ_HSI
3 "register_operand" "w")]
3638 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%<v>
0<Vmtype>, %<v>
2<Vmtype>, %<v>
3<Vmtype>"
3639 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3642 ;; sqrdml[as]h_lane.
3644 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3645 [(set (match_operand:VDQHS
0 "register_operand" "=w")
3647 [(match_operand:VDQHS
1 "register_operand" "
0")
3648 (match_operand:VDQHS
2 "register_operand" "w")
3650 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
3651 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))]
3655 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
3657 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vetype>[%
4]";
3659 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3662 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3663 [(set (match_operand:SD_HSI
0 "register_operand" "=w")
3665 [(match_operand:SD_HSI
1 "register_operand" "
0")
3666 (match_operand:SD_HSI
2 "register_operand" "w")
3668 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
3669 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))]
3673 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
3675 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%<v>
0, %<v>
2, %
3.<Vetype>[%
4]";
3677 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3680 ;; sqrdml[as]h_laneq.
3682 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3683 [(set (match_operand:VDQHS
0 "register_operand" "=w")
3685 [(match_operand:VDQHS
1 "register_operand" "
0")
3686 (match_operand:VDQHS
2 "register_operand" "w")
3688 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
3689 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))]
3693 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
3695 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vetype>[%
4]";
3697 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3700 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3701 [(set (match_operand:SD_HSI
0 "register_operand" "=w")
3703 [(match_operand:SD_HSI
1 "register_operand" "
0")
3704 (match_operand:SD_HSI
2 "register_operand" "w")
3706 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
3707 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))]
3711 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
3713 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%<v>
0, %<v>
2, %
3.<v>[%
4]";
3715 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3720 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3721 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3723 (match_operand:<VWIDE>
1 "register_operand" "
0")
3726 (sign_extend:<VWIDE>
3727 (match_operand:VSD_HSI
2 "register_operand" "w"))
3728 (sign_extend:<VWIDE>
3729 (match_operand:VSD_HSI
3 "register_operand" "w")))
3732 "sqdml<SBINQOPS:as>l
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %<v>
3<Vmtype>"
3733 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3738 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3739 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3741 (match_operand:<VWIDE>
1 "register_operand" "
0")
3744 (sign_extend:<VWIDE>
3745 (match_operand:VD_HSI
2 "register_operand" "w"))
3746 (sign_extend:<VWIDE>
3747 (vec_duplicate:VD_HSI
3749 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
3750 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
3755 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
3757 "sqdml<SBINQOPS:as>l
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
3759 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3762 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3763 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3765 (match_operand:<VWIDE>
1 "register_operand" "
0")
3768 (sign_extend:<VWIDE>
3769 (match_operand:VD_HSI
2 "register_operand" "w"))
3770 (sign_extend:<VWIDE>
3771 (vec_duplicate:VD_HSI
3773 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
3774 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
3779 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
3781 "sqdml<SBINQOPS:as>l
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
3783 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3786 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3787 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3789 (match_operand:<VWIDE>
1 "register_operand" "
0")
3792 (sign_extend:<VWIDE>
3793 (match_operand:SD_HSI
2 "register_operand" "w"))
3794 (sign_extend:<VWIDE>
3796 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
3797 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
3802 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
3804 "sqdml<SBINQOPS:as>l
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
3806 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3809 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3810 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3812 (match_operand:<VWIDE>
1 "register_operand" "
0")
3815 (sign_extend:<VWIDE>
3816 (match_operand:SD_HSI
2 "register_operand" "w"))
3817 (sign_extend:<VWIDE>
3819 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
3820 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
3825 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
3827 "sqdml<SBINQOPS:as>l
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
3829 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3834 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3835 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3837 (match_operand:<VWIDE>
1 "register_operand" "
0")
3840 (sign_extend:<VWIDE>
3841 (match_operand:VD_HSI
2 "register_operand" "w"))
3842 (sign_extend:<VWIDE>
3843 (vec_duplicate:VD_HSI
3844 (match_operand:<VEL>
3 "register_operand" "<vwx>"))))
3847 "sqdml<SBINQOPS:as>l
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[
0]"
3848 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3853 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3854 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3856 (match_operand:<VWIDE>
1 "register_operand" "
0")
3859 (sign_extend:<VWIDE>
3861 (match_operand:VQ_HSI
2 "register_operand" "w")
3862 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
3863 (sign_extend:<VWIDE>
3865 (match_operand:VQ_HSI
3 "register_operand" "w")
3869 "sqdml<SBINQOPS:as>l2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %<v>
3<Vmtype>"
3870 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3873 (define_expand "aarch64_sqdmlal2<mode>"
3874 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3875 (match_operand:<VWIDE>
1 "register_operand" "w")
3876 (match_operand:VQ_HSI
2 "register_operand" "w")
3877 (match_operand:VQ_HSI
3 "register_operand" "w")]
3880 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3881 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[
0], operands[
1],
3882 operands[
2], operands[
3], p));
3886 (define_expand "aarch64_sqdmlsl2<mode>"
3887 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3888 (match_operand:<VWIDE>
1 "register_operand" "w")
3889 (match_operand:VQ_HSI
2 "register_operand" "w")
3890 (match_operand:VQ_HSI
3 "register_operand" "w")]
3893 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3894 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[
0], operands[
1],
3895 operands[
2], operands[
3], p));
3901 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3902 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3904 (match_operand:<VWIDE>
1 "register_operand" "
0")
3907 (sign_extend:<VWIDE>
3909 (match_operand:VQ_HSI
2 "register_operand" "w")
3910 (match_operand:VQ_HSI
5 "vect_par_cnst_hi_half" "")))
3911 (sign_extend:<VWIDE>
3912 (vec_duplicate:<VHALF>
3914 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
3915 (parallel [(match_operand:SI
4 "immediate_operand" "i")])
3920 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
3922 "sqdml<SBINQOPS:as>l2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
3924 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3927 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3928 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3930 (match_operand:<VWIDE>
1 "register_operand" "
0")
3933 (sign_extend:<VWIDE>
3935 (match_operand:VQ_HSI
2 "register_operand" "w")
3936 (match_operand:VQ_HSI
5 "vect_par_cnst_hi_half" "")))
3937 (sign_extend:<VWIDE>
3938 (vec_duplicate:<VHALF>
3940 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
3941 (parallel [(match_operand:SI
4 "immediate_operand" "i")])
3946 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
3948 "sqdml<SBINQOPS:as>l2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
3950 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3953 (define_expand "aarch64_sqdmlal2_lane<mode>"
3954 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3955 (match_operand:<VWIDE>
1 "register_operand" "w")
3956 (match_operand:VQ_HSI
2 "register_operand" "w")
3957 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
3958 (match_operand:SI
4 "immediate_operand" "i")]
3961 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3962 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[
0], operands[
1],
3963 operands[
2], operands[
3],
3968 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3969 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3970 (match_operand:<VWIDE>
1 "register_operand" "w")
3971 (match_operand:VQ_HSI
2 "register_operand" "w")
3972 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
3973 (match_operand:SI
4 "immediate_operand" "i")]
3976 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3977 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[
0], operands[
1],
3978 operands[
2], operands[
3],
3983 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3984 [(match_operand:<VWIDE>
0 "register_operand" "=w")
3985 (match_operand:<VWIDE>
1 "register_operand" "w")
3986 (match_operand:VQ_HSI
2 "register_operand" "w")
3987 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
3988 (match_operand:SI
4 "immediate_operand" "i")]
3991 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3992 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[
0], operands[
1],
3993 operands[
2], operands[
3],
3998 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3999 [(match_operand:<VWIDE>
0 "register_operand" "=w")
4000 (match_operand:<VWIDE>
1 "register_operand" "w")
4001 (match_operand:VQ_HSI
2 "register_operand" "w")
4002 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
4003 (match_operand:SI
4 "immediate_operand" "i")]
4006 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4007 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[
0], operands[
1],
4008 operands[
2], operands[
3],
4013 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4014 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4016 (match_operand:<VWIDE>
1 "register_operand" "
0")
4019 (sign_extend:<VWIDE>
4021 (match_operand:VQ_HSI
2 "register_operand" "w")
4022 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
4023 (sign_extend:<VWIDE>
4024 (vec_duplicate:<VHALF>
4025 (match_operand:<VEL>
3 "register_operand" "<vwx>"))))
4028 "sqdml<SBINQOPS:as>l2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[
0]"
4029 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4032 (define_expand "aarch64_sqdmlal2_n<mode>"
4033 [(match_operand:<VWIDE>
0 "register_operand" "=w")
4034 (match_operand:<VWIDE>
1 "register_operand" "w")
4035 (match_operand:VQ_HSI
2 "register_operand" "w")
4036 (match_operand:<VEL>
3 "register_operand" "w")]
4039 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4040 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[
0], operands[
1],
4041 operands[
2], operands[
3],
4046 (define_expand "aarch64_sqdmlsl2_n<mode>"
4047 [(match_operand:<VWIDE>
0 "register_operand" "=w")
4048 (match_operand:<VWIDE>
1 "register_operand" "w")
4049 (match_operand:VQ_HSI
2 "register_operand" "w")
4050 (match_operand:<VEL>
3 "register_operand" "w")]
4053 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4054 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[
0], operands[
1],
4055 operands[
2], operands[
3],
4062 (define_insn "aarch64_sqdmull<mode>"
4063 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4066 (sign_extend:<VWIDE>
4067 (match_operand:VSD_HSI
1 "register_operand" "w"))
4068 (sign_extend:<VWIDE>
4069 (match_operand:VSD_HSI
2 "register_operand" "w")))
4072 "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
4073 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4078 (define_insn "aarch64_sqdmull_lane<mode>"
4079 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4082 (sign_extend:<VWIDE>
4083 (match_operand:VD_HSI
1 "register_operand" "w"))
4084 (sign_extend:<VWIDE>
4085 (vec_duplicate:VD_HSI
4087 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
4088 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
4093 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
4094 return "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
4096 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4099 (define_insn "aarch64_sqdmull_laneq<mode>"
4100 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4103 (sign_extend:<VWIDE>
4104 (match_operand:VD_HSI
1 "register_operand" "w"))
4105 (sign_extend:<VWIDE>
4106 (vec_duplicate:VD_HSI
4108 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
4109 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
4114 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
4115 return "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
4117 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4120 (define_insn "aarch64_sqdmull_lane<mode>"
4121 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4124 (sign_extend:<VWIDE>
4125 (match_operand:SD_HSI
1 "register_operand" "w"))
4126 (sign_extend:<VWIDE>
4128 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
4129 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))
4134 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
4135 return "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
4137 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4140 (define_insn "aarch64_sqdmull_laneq<mode>"
4141 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4144 (sign_extend:<VWIDE>
4145 (match_operand:SD_HSI
1 "register_operand" "w"))
4146 (sign_extend:<VWIDE>
4148 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
4149 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))
4154 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
4155 return "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
4157 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4162 (define_insn "aarch64_sqdmull_n<mode>"
4163 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4166 (sign_extend:<VWIDE>
4167 (match_operand:VD_HSI
1 "register_operand" "w"))
4168 (sign_extend:<VWIDE>
4169 (vec_duplicate:VD_HSI
4170 (match_operand:<VEL>
2 "register_operand" "<vwx>")))
4174 "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[
0]"
4175 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4182 (define_insn "aarch64_sqdmull2<mode>_internal"
4183 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4186 (sign_extend:<VWIDE>
4188 (match_operand:VQ_HSI
1 "register_operand" "w")
4189 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
4190 (sign_extend:<VWIDE>
4192 (match_operand:VQ_HSI
2 "register_operand" "w")
4197 "sqdmull2
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
4198 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4201 (define_expand "aarch64_sqdmull2<mode>"
4202 [(match_operand:<VWIDE>
0 "register_operand" "=w")
4203 (match_operand:VQ_HSI
1 "register_operand" "w")
4204 (match_operand:VQ_HSI
2 "register_operand" "w")]
4207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4208 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[
0], operands[
1],
4215 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4216 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4219 (sign_extend:<VWIDE>
4221 (match_operand:VQ_HSI
1 "register_operand" "w")
4222 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
4223 (sign_extend:<VWIDE>
4224 (vec_duplicate:<VHALF>
4226 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
4227 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
4232 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
4233 return "sqdmull2
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
4235 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4238 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4239 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4242 (sign_extend:<VWIDE>
4244 (match_operand:VQ_HSI
1 "register_operand" "w")
4245 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
4246 (sign_extend:<VWIDE>
4247 (vec_duplicate:<VHALF>
4249 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
4250 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
4255 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
4256 return "sqdmull2
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
4258 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4261 (define_expand "aarch64_sqdmull2_lane<mode>"
4262 [(match_operand:<VWIDE>
0 "register_operand" "=w")
4263 (match_operand:VQ_HSI
1 "register_operand" "w")
4264 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
4265 (match_operand:SI
3 "immediate_operand" "i")]
4268 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4269 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[
0], operands[
1],
4270 operands[
2], operands[
3],
4275 (define_expand "aarch64_sqdmull2_laneq<mode>"
4276 [(match_operand:<VWIDE>
0 "register_operand" "=w")
4277 (match_operand:VQ_HSI
1 "register_operand" "w")
4278 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
4279 (match_operand:SI
3 "immediate_operand" "i")]
4282 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4283 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[
0], operands[
1],
4284 operands[
2], operands[
3],
4291 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4292 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4295 (sign_extend:<VWIDE>
4297 (match_operand:VQ_HSI
1 "register_operand" "w")
4298 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
4299 (sign_extend:<VWIDE>
4300 (vec_duplicate:<VHALF>
4301 (match_operand:<VEL>
2 "register_operand" "<vwx>")))
4305 "sqdmull2
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[
0]"
4306 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4309 (define_expand "aarch64_sqdmull2_n<mode>"
4310 [(match_operand:<VWIDE>
0 "register_operand" "=w")
4311 (match_operand:VQ_HSI
1 "register_operand" "w")
4312 (match_operand:<VEL>
2 "register_operand" "w")]
4315 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4316 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[
0], operands[
1],
4323 (define_insn "aarch64_<sur>shl<mode>"
4324 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
4326 [(match_operand:VSDQ_I_DI
1 "register_operand" "w")
4327 (match_operand:VSDQ_I_DI
2 "register_operand" "w")]
4330 "<sur>shl
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>";
4331 [(set_attr "type" "neon_shift_reg<q>")]
4337 (define_insn "aarch64_<sur>q<r>shl<mode>"
4338 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
4340 [(match_operand:VSDQ_I
1 "register_operand" "w")
4341 (match_operand:VSDQ_I
2 "register_operand" "w")]
4344 "<sur>q<r>shl
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>";
4345 [(set_attr "type" "neon_sat_shift_reg<q>")]
4350 (define_insn "aarch64_<sur>shll_n<mode>"
4351 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4352 (unspec:<VWIDE> [(match_operand:VD_BHSI
1 "register_operand" "w")
4354 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4358 if (INTVAL (operands[
2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4359 return "shll
\\t%
0.<Vwtype>, %
1.<Vtype>, %
2";
4361 return "<sur>shll
\\t%
0.<Vwtype>, %
1.<Vtype>, %
2";
4363 [(set_attr "type" "neon_shift_imm_long")]
4368 (define_insn "aarch64_<sur>shll2_n<mode>"
4369 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4370 (unspec:<VWIDE> [(match_operand:VQW
1 "register_operand" "w")
4371 (match_operand:SI
2 "immediate_operand" "i")]
4375 if (INTVAL (operands[
2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4376 return "shll2
\\t%
0.<Vwtype>, %
1.<Vtype>, %
2";
4378 return "<sur>shll2
\\t%
0.<Vwtype>, %
1.<Vtype>, %
2";
4380 [(set_attr "type" "neon_shift_imm_long")]
4385 (define_insn "aarch64_<sur>shr_n<mode>"
4386 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
4387 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI
1 "register_operand" "w")
4389 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4392 "<sur>shr
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %
2"
4393 [(set_attr "type" "neon_sat_shift_imm<q>")]
4398 (define_insn "aarch64_<sur>sra_n<mode>"
4399 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
4400 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI
1 "register_operand" "
0")
4401 (match_operand:VSDQ_I_DI
2 "register_operand" "w")
4403 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4406 "<sur>sra
\\t%<v>
0<Vmtype>, %<v>
2<Vmtype>, %
3"
4407 [(set_attr "type" "neon_shift_acc<q>")]
4412 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4413 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
4414 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI
1 "register_operand" "
0")
4415 (match_operand:VSDQ_I_DI
2 "register_operand" "w")
4417 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4420 "s<lr>i
\\t%<v>
0<Vmtype>, %<v>
2<Vmtype>, %
3"
4421 [(set_attr "type" "neon_shift_imm<q>")]
4426 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4427 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
4428 (unspec:VSDQ_I [(match_operand:VSDQ_I
1 "register_operand" "w")
4430 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4433 "<sur>qshl<u>
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %
2"
4434 [(set_attr "type" "neon_sat_shift_imm<q>")]
4440 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4441 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
4442 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI
1 "register_operand" "w")
4444 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4447 "<sur>q<r>shr<u>n
\\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
4448 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4452 ;; cm(eq|ge|gt|lt|le)
4453 ;; Note, we have constraints for Dz and Z as different expanders
4454 ;; have different ideas of what should be passed to this pattern.
4456 (define_insn "aarch64_cm<optab><mode>"
4457 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w,w")
4459 (COMPARISONS:<V_INT_EQUIV>
4460 (match_operand:VDQ_I
1 "register_operand" "w,w")
4461 (match_operand:VDQ_I
2 "aarch64_simd_reg_or_zero" "w,ZDz")
4465 cm<n_optab>
\t%<v>
0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4466 cm<optab>
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, #
0"
4467 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4470 (define_insn_and_split "aarch64_cm<optab>di"
4471 [(set (match_operand:DI
0 "register_operand" "=w,w,r")
4474 (match_operand:DI
1 "register_operand" "w,w,r")
4475 (match_operand:DI
2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4477 (clobber (reg:CC CC_REGNUM))]
4480 "&& reload_completed"
4481 [(set (match_operand:DI
0 "register_operand")
4484 (match_operand:DI
1 "register_operand")
4485 (match_operand:DI
2 "aarch64_simd_reg_or_zero")
4488 /* If we are in the general purpose register file,
4489 we split to a sequence of comparison and store. */
4490 if (GP_REGNUM_P (REGNO (operands[
0]))
4491 && GP_REGNUM_P (REGNO (operands[
1])))
4493 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[
1], operands[
2]);
4494 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[
1], operands[
2]);
4495 rtx comparison = gen_rtx_<CMP> (mode, operands[
1], operands[
2]);
4496 emit_insn (gen_cstoredi_neg (operands[
0], comparison, cc_reg));
4499 /* Otherwise, we expand to a similar pattern which does not
4500 clobber CC_REGNUM. */
4502 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4505 (define_insn "*aarch64_cm<optab>di"
4506 [(set (match_operand:DI
0 "register_operand" "=w,w")
4509 (match_operand:DI
1 "register_operand" "w,w")
4510 (match_operand:DI
2 "aarch64_simd_reg_or_zero" "w,ZDz")
4512 "TARGET_SIMD && reload_completed"
4514 cm<n_optab>
\t%d0, %d<cmp_1>, %d<cmp_2>
4515 cm<optab>
\t%d0, %d1, #
0"
4516 [(set_attr "type" "neon_compare, neon_compare_zero")]
4521 (define_insn "aarch64_cm<optab><mode>"
4522 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w")
4524 (UCOMPARISONS:<V_INT_EQUIV>
4525 (match_operand:VDQ_I
1 "register_operand" "w")
4526 (match_operand:VDQ_I
2 "register_operand" "w")
4529 "cm<n_optab>
\t%<v>
0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4530 [(set_attr "type" "neon_compare<q>")]
4533 (define_insn_and_split "aarch64_cm<optab>di"
4534 [(set (match_operand:DI
0 "register_operand" "=w,r")
4537 (match_operand:DI
1 "register_operand" "w,r")
4538 (match_operand:DI
2 "aarch64_simd_reg_or_zero" "w,r")
4540 (clobber (reg:CC CC_REGNUM))]
4543 "&& reload_completed"
4544 [(set (match_operand:DI
0 "register_operand")
4547 (match_operand:DI
1 "register_operand")
4548 (match_operand:DI
2 "aarch64_simd_reg_or_zero")
4551 /* If we are in the general purpose register file,
4552 we split to a sequence of comparison and store. */
4553 if (GP_REGNUM_P (REGNO (operands[
0]))
4554 && GP_REGNUM_P (REGNO (operands[
1])))
4556 machine_mode mode = CCmode;
4557 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[
1], operands[
2]);
4558 rtx comparison = gen_rtx_<CMP> (mode, operands[
1], operands[
2]);
4559 emit_insn (gen_cstoredi_neg (operands[
0], comparison, cc_reg));
4562 /* Otherwise, we expand to a similar pattern which does not
4563 clobber CC_REGNUM. */
4565 [(set_attr "type" "neon_compare,multiple")]
4568 (define_insn "*aarch64_cm<optab>di"
4569 [(set (match_operand:DI
0 "register_operand" "=w")
4572 (match_operand:DI
1 "register_operand" "w")
4573 (match_operand:DI
2 "aarch64_simd_reg_or_zero" "w")
4575 "TARGET_SIMD && reload_completed"
4576 "cm<n_optab>
\t%d0, %d<cmp_1>, %d<cmp_2>"
4577 [(set_attr "type" "neon_compare")]
4582 ;; Although neg (ne (and x y)
0) is the natural way of expressing a cmtst,
4583 ;; we don't have any insns using ne, and aarch64_vcond outputs
4584 ;; not (neg (eq (and x y)
0))
4585 ;; which is rewritten by simplify_rtx as
4586 ;; plus (eq (and x y)
0) -
1.
4588 (define_insn "aarch64_cmtst<mode>"
4589 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w")
4593 (match_operand:VDQ_I
1 "register_operand" "w")
4594 (match_operand:VDQ_I
2 "register_operand" "w"))
4595 (match_operand:VDQ_I
3 "aarch64_simd_imm_zero"))
4596 (match_operand:<V_INT_EQUIV>
4 "aarch64_simd_imm_minus_one")))
4599 "cmtst
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
4600 [(set_attr "type" "neon_tst<q>")]
4603 (define_insn_and_split "aarch64_cmtstdi"
4604 [(set (match_operand:DI
0 "register_operand" "=w,r")
4608 (match_operand:DI
1 "register_operand" "w,r")
4609 (match_operand:DI
2 "register_operand" "w,r"))
4611 (clobber (reg:CC CC_REGNUM))]
4614 "&& reload_completed"
4615 [(set (match_operand:DI
0 "register_operand")
4619 (match_operand:DI
1 "register_operand")
4620 (match_operand:DI
2 "register_operand"))
4623 /* If we are in the general purpose register file,
4624 we split to a sequence of comparison and store. */
4625 if (GP_REGNUM_P (REGNO (operands[
0]))
4626 && GP_REGNUM_P (REGNO (operands[
1])))
4628 rtx and_tree = gen_rtx_AND (DImode, operands[
1], operands[
2]);
4629 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4630 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4631 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4632 emit_insn (gen_cstoredi_neg (operands[
0], comparison, cc_reg));
4635 /* Otherwise, we expand to a similar pattern which does not
4636 clobber CC_REGNUM. */
4638 [(set_attr "type" "neon_tst,multiple")]
4641 (define_insn "*aarch64_cmtstdi"
4642 [(set (match_operand:DI
0 "register_operand" "=w")
4646 (match_operand:DI
1 "register_operand" "w")
4647 (match_operand:DI
2 "register_operand" "w"))
4650 "cmtst
\t%d0, %d1, %d2"
4651 [(set_attr "type" "neon_tst")]
4654 ;; fcm(eq|ge|gt|le|lt)
4656 (define_insn "aarch64_cm<optab><mode>"
4657 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w,w")
4659 (COMPARISONS:<V_INT_EQUIV>
4660 (match_operand:VHSDF_HSDF
1 "register_operand" "w,w")
4661 (match_operand:VHSDF_HSDF
2 "aarch64_simd_reg_or_zero" "w,YDz")
4665 fcm<n_optab>
\t%<v>
0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4666 fcm<optab>
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>,
0"
4667 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4671 ;; Note we can also handle what would be fac(le|lt) by
4672 ;; generating fac(ge|gt).
4674 (define_insn "aarch64_fac<optab><mode>"
4675 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w")
4677 (FAC_COMPARISONS:<V_INT_EQUIV>
4679 (match_operand:VHSDF_HSDF
1 "register_operand" "w"))
4681 (match_operand:VHSDF_HSDF
2 "register_operand" "w"))
4684 "fac<n_optab>
\t%<v>
0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4685 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4690 (define_insn "aarch64_addp<mode>"
4691 [(set (match_operand:VD_BHSI
0 "register_operand" "=w")
4693 [(match_operand:VD_BHSI
1 "register_operand" "w")
4694 (match_operand:VD_BHSI
2 "register_operand" "w")]
4697 "addp
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
4698 [(set_attr "type" "neon_reduc_add<q>")]
4701 (define_insn "aarch64_addpdi"
4702 [(set (match_operand:DI
0 "register_operand" "=w")
4704 [(match_operand:V2DI
1 "register_operand" "w")]
4708 [(set_attr "type" "neon_reduc_add")]
4713 (define_expand "sqrt<mode>
2"
4714 [(set (match_operand:VHSDF
0 "register_operand" "=w")
4715 (sqrt:VHSDF (match_operand:VHSDF
1 "register_operand" "w")))]
4718 if (aarch64_emit_approx_sqrt (operands[
0], operands[
1], false))
4722 (define_insn "*sqrt<mode>
2"
4723 [(set (match_operand:VHSDF
0 "register_operand" "=w")
4724 (sqrt:VHSDF (match_operand:VHSDF
1 "register_operand" "w")))]
4726 "fsqrt
\\t%
0.<Vtype>, %
1.<Vtype>"
4727 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4730 ;; Patterns for vector struct loads and stores.
4732 (define_insn "aarch64_simd_ld2<mode>"
4733 [(set (match_operand:OI
0 "register_operand" "=w")
4734 (unspec:OI [(match_operand:OI
1 "aarch64_simd_struct_operand" "Utv")
4735 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4738 "ld2
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
4739 [(set_attr "type" "neon_load2_2reg<q>")]
4742 (define_insn "aarch64_simd_ld2r<mode>"
4743 [(set (match_operand:OI
0 "register_operand" "=w")
4744 (unspec:OI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
4745 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY) ]
4748 "ld2r
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
4749 [(set_attr "type" "neon_load2_all_lanes<q>")]
4752 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4753 [(set (match_operand:OI
0 "register_operand" "=w")
4754 (unspec:OI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
4755 (match_operand:OI
2 "register_operand" "
0")
4756 (match_operand:SI
3 "immediate_operand" "i")
4757 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY) ]
4761 operands[
3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
3]));
4762 return "ld2
\\t{%S0.<Vetype> - %T0.<Vetype>}[%
3], %
1";
4764 [(set_attr "type" "neon_load2_one_lane")]
4767 (define_expand "vec_load_lanesoi<mode>"
4768 [(set (match_operand:OI
0 "register_operand" "=w")
4769 (unspec:OI [(match_operand:OI
1 "aarch64_simd_struct_operand" "Utv")
4770 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4774 if (BYTES_BIG_ENDIAN)
4776 rtx tmp = gen_reg_rtx (OImode);
4777 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4778 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[
1]));
4779 emit_insn (gen_aarch64_rev_reglistoi (operands[
0], tmp, mask));
4782 emit_insn (gen_aarch64_simd_ld2<mode> (operands[
0], operands[
1]));
4786 (define_insn "aarch64_simd_st2<mode>"
4787 [(set (match_operand:OI
0 "aarch64_simd_struct_operand" "=Utv")
4788 (unspec:OI [(match_operand:OI
1 "register_operand" "w")
4789 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4792 "st2
\\t{%S1.<Vtype> - %T1.<Vtype>}, %
0"
4793 [(set_attr "type" "neon_store2_2reg<q>")]
4796 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4797 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4798 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
4799 (unspec:BLK [(match_operand:OI
1 "register_operand" "w")
4800 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)
4801 (match_operand:SI
2 "immediate_operand" "i")]
4805 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
4806 return "st2
\\t{%S1.<Vetype> - %T1.<Vetype>}[%
2], %
0";
4808 [(set_attr "type" "neon_store2_one_lane<q>")]
4811 (define_expand "vec_store_lanesoi<mode>"
4812 [(set (match_operand:OI
0 "aarch64_simd_struct_operand" "=Utv")
4813 (unspec:OI [(match_operand:OI
1 "register_operand" "w")
4814 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4818 if (BYTES_BIG_ENDIAN)
4820 rtx tmp = gen_reg_rtx (OImode);
4821 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4822 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[
1], mask));
4823 emit_insn (gen_aarch64_simd_st2<mode> (operands[
0], tmp));
4826 emit_insn (gen_aarch64_simd_st2<mode> (operands[
0], operands[
1]));
4830 (define_insn "aarch64_simd_ld3<mode>"
4831 [(set (match_operand:CI
0 "register_operand" "=w")
4832 (unspec:CI [(match_operand:CI
1 "aarch64_simd_struct_operand" "Utv")
4833 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4836 "ld3
\\t{%S0.<Vtype> - %U0.<Vtype>}, %
1"
4837 [(set_attr "type" "neon_load3_3reg<q>")]
4840 (define_insn "aarch64_simd_ld3r<mode>"
4841 [(set (match_operand:CI
0 "register_operand" "=w")
4842 (unspec:CI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
4843 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY) ]
4846 "ld3r
\\t{%S0.<Vtype> - %U0.<Vtype>}, %
1"
4847 [(set_attr "type" "neon_load3_all_lanes<q>")]
4850 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4851 [(set (match_operand:CI
0 "register_operand" "=w")
4852 (unspec:CI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
4853 (match_operand:CI
2 "register_operand" "
0")
4854 (match_operand:SI
3 "immediate_operand" "i")
4855 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4859 operands[
3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
3]));
4860 return "ld3
\\t{%S0.<Vetype> - %U0.<Vetype>}[%
3], %
1";
4862 [(set_attr "type" "neon_load3_one_lane")]
4865 (define_expand "vec_load_lanesci<mode>"
4866 [(set (match_operand:CI
0 "register_operand" "=w")
4867 (unspec:CI [(match_operand:CI
1 "aarch64_simd_struct_operand" "Utv")
4868 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4872 if (BYTES_BIG_ENDIAN)
4874 rtx tmp = gen_reg_rtx (CImode);
4875 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4876 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[
1]));
4877 emit_insn (gen_aarch64_rev_reglistci (operands[
0], tmp, mask));
4880 emit_insn (gen_aarch64_simd_ld3<mode> (operands[
0], operands[
1]));
4884 (define_insn "aarch64_simd_st3<mode>"
4885 [(set (match_operand:CI
0 "aarch64_simd_struct_operand" "=Utv")
4886 (unspec:CI [(match_operand:CI
1 "register_operand" "w")
4887 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4890 "st3
\\t{%S1.<Vtype> - %U1.<Vtype>}, %
0"
4891 [(set_attr "type" "neon_store3_3reg<q>")]
4894 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4895 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4896 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
4897 (unspec:BLK [(match_operand:CI
1 "register_operand" "w")
4898 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)
4899 (match_operand:SI
2 "immediate_operand" "i")]
4903 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
4904 return "st3
\\t{%S1.<Vetype> - %U1.<Vetype>}[%
2], %
0";
4906 [(set_attr "type" "neon_store3_one_lane<q>")]
4909 (define_expand "vec_store_lanesci<mode>"
4910 [(set (match_operand:CI
0 "aarch64_simd_struct_operand" "=Utv")
4911 (unspec:CI [(match_operand:CI
1 "register_operand" "w")
4912 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4916 if (BYTES_BIG_ENDIAN)
4918 rtx tmp = gen_reg_rtx (CImode);
4919 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4920 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[
1], mask));
4921 emit_insn (gen_aarch64_simd_st3<mode> (operands[
0], tmp));
4924 emit_insn (gen_aarch64_simd_st3<mode> (operands[
0], operands[
1]));
4928 (define_insn "aarch64_simd_ld4<mode>"
4929 [(set (match_operand:XI
0 "register_operand" "=w")
4930 (unspec:XI [(match_operand:XI
1 "aarch64_simd_struct_operand" "Utv")
4931 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4934 "ld4
\\t{%S0.<Vtype> - %V0.<Vtype>}, %
1"
4935 [(set_attr "type" "neon_load4_4reg<q>")]
4938 (define_insn "aarch64_simd_ld4r<mode>"
4939 [(set (match_operand:XI
0 "register_operand" "=w")
4940 (unspec:XI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
4941 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY) ]
4944 "ld4r
\\t{%S0.<Vtype> - %V0.<Vtype>}, %
1"
4945 [(set_attr "type" "neon_load4_all_lanes<q>")]
4948 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4949 [(set (match_operand:XI
0 "register_operand" "=w")
4950 (unspec:XI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
4951 (match_operand:XI
2 "register_operand" "
0")
4952 (match_operand:SI
3 "immediate_operand" "i")
4953 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4957 operands[
3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
3]));
4958 return "ld4
\\t{%S0.<Vetype> - %V0.<Vetype>}[%
3], %
1";
4960 [(set_attr "type" "neon_load4_one_lane")]
4963 (define_expand "vec_load_lanesxi<mode>"
4964 [(set (match_operand:XI
0 "register_operand" "=w")
4965 (unspec:XI [(match_operand:XI
1 "aarch64_simd_struct_operand" "Utv")
4966 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4970 if (BYTES_BIG_ENDIAN)
4972 rtx tmp = gen_reg_rtx (XImode);
4973 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4974 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[
1]));
4975 emit_insn (gen_aarch64_rev_reglistxi (operands[
0], tmp, mask));
4978 emit_insn (gen_aarch64_simd_ld4<mode> (operands[
0], operands[
1]));
4982 (define_insn "aarch64_simd_st4<mode>"
4983 [(set (match_operand:XI
0 "aarch64_simd_struct_operand" "=Utv")
4984 (unspec:XI [(match_operand:XI
1 "register_operand" "w")
4985 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
4988 "st4
\\t{%S1.<Vtype> - %V1.<Vtype>}, %
0"
4989 [(set_attr "type" "neon_store4_4reg<q>")]
4992 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4993 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4994 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
4995 (unspec:BLK [(match_operand:XI
1 "register_operand" "w")
4996 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)
4997 (match_operand:SI
2 "immediate_operand" "i")]
5001 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
5002 return "st4
\\t{%S1.<Vetype> - %V1.<Vetype>}[%
2], %
0";
5004 [(set_attr "type" "neon_store4_one_lane<q>")]
5007 (define_expand "vec_store_lanesxi<mode>"
5008 [(set (match_operand:XI
0 "aarch64_simd_struct_operand" "=Utv")
5009 (unspec:XI [(match_operand:XI
1 "register_operand" "w")
5010 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5014 if (BYTES_BIG_ENDIAN)
5016 rtx tmp = gen_reg_rtx (XImode);
5017 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5018 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[
1], mask));
5019 emit_insn (gen_aarch64_simd_st4<mode> (operands[
0], tmp));
5022 emit_insn (gen_aarch64_simd_st4<mode> (operands[
0], operands[
1]));
5026 (define_insn_and_split "aarch64_rev_reglist<mode>"
5027 [(set (match_operand:VSTRUCT
0 "register_operand" "=&w")
5029 [(match_operand:VSTRUCT
1 "register_operand" "w")
5030 (match_operand:V16QI
2 "register_operand" "w")]
5031 UNSPEC_REV_REGLIST))]
5034 "&& reload_completed"
5038 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5039 for (i =
0; i < nregs; i++)
5041 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[
0]) + i);
5042 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[
1]) + i);
5043 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[
2]));
5047 [(set_attr "type" "neon_tbl1_q")
5048 (set_attr "length" "<insn_count>")]
5051 ;; Reload patterns for AdvSIMD register list operands.
5053 (define_expand "mov<mode>"
5054 [(set (match_operand:VSTRUCT
0 "nonimmediate_operand" "")
5055 (match_operand:VSTRUCT
1 "general_operand" ""))]
5058 if (can_create_pseudo_p ())
5060 if (GET_CODE (operands[
0]) != REG)
5061 operands[
1] = force_reg (<MODE>mode, operands[
1]);
5064 /* If we have a paradoxical subreg trying to write to <MODE> from and the
5065 registers don't overlap then we need to break it apart. What it's trying
5066 to do is give two kind of information at the same time. It's trying to
5067 convey liveness information by saying that the entire register will be
5068 written to eventually, but it also only wants to write a single part of the
5069 register. Hence the paradoxical subreg.
5071 Instead of allowing this we will split the two concerns. The liveness
5072 information will be conveyed using a clobber and then we break apart the
5073 paradoxical subreg into just a normal write of the part that it wanted to
5074 write originally. */
5076 if (REG_P (operands[
0]) && paradoxical_subreg_p (operands[
1]))
5078 if (!reg_overlap_mentioned_p (operands[
0], operands[
1]))
5079 emit_clobber (operands[
0]);
5080 operands[
1] = SUBREG_REG (operands[
1]);
5081 operands[
0] = gen_lowpart (GET_MODE (operands[
1]), operands[
0]);
5086 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5087 [(match_operand:CI
0 "register_operand" "=w")
5088 (match_operand:DI
1 "register_operand" "r")
5089 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5092 rtx mem = gen_rtx_MEM (CImode, operands[
1]);
5093 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[
0], mem));
5097 (define_insn "aarch64_ld1_x3_<mode>"
5098 [(set (match_operand:CI
0 "register_operand" "=w")
5100 [(match_operand:CI
1 "aarch64_simd_struct_operand" "Utv")
5101 (unspec:VALLDIF [(const_int
3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5103 "ld1
\\t{%S0.<Vtype> - %U0.<Vtype>}, %
1"
5104 [(set_attr "type" "neon_load1_3reg<q>")]
5107 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5108 [(match_operand:XI
0 "register_operand" "=w")
5109 (match_operand:DI
1 "register_operand" "r")
5110 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5113 rtx mem = gen_rtx_MEM (XImode, operands[
1]);
5114 emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[
0], mem));
5118 (define_insn "aarch64_ld1_x4_<mode>"
5119 [(set (match_operand:XI
0 "register_operand" "=w")
5121 [(match_operand:XI
1 "aarch64_simd_struct_operand" "Utv")
5122 (unspec:VALLDIF [(const_int
4)] UNSPEC_VSTRUCTDUMMY)]
5125 "ld1
\\t{%S0.<Vtype> - %V0.<Vtype>}, %
1"
5126 [(set_attr "type" "neon_load1_4reg<q>")]
5129 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5130 [(match_operand:DI
0 "register_operand" "")
5131 (match_operand:OI
1 "register_operand" "")
5132 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5135 rtx mem = gen_rtx_MEM (OImode, operands[
0]);
5136 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[
1]));
5140 (define_insn "aarch64_st1_x2_<mode>"
5141 [(set (match_operand:OI
0 "aarch64_simd_struct_operand" "=Utv")
5143 [(match_operand:OI
1 "register_operand" "w")
5144 (unspec:VALLDIF [(const_int
2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5146 "st1
\\t{%S1.<Vtype> - %T1.<Vtype>}, %
0"
5147 [(set_attr "type" "neon_store1_2reg<q>")]
5150 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5151 [(match_operand:DI
0 "register_operand" "")
5152 (match_operand:CI
1 "register_operand" "")
5153 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5156 rtx mem = gen_rtx_MEM (CImode, operands[
0]);
5157 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[
1]));
5161 (define_insn "aarch64_st1_x3_<mode>"
5162 [(set (match_operand:CI
0 "aarch64_simd_struct_operand" "=Utv")
5164 [(match_operand:CI
1 "register_operand" "w")
5165 (unspec:VALLDIF [(const_int
3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5167 "st1
\\t{%S1.<Vtype> - %U1.<Vtype>}, %
0"
5168 [(set_attr "type" "neon_store1_3reg<q>")]
5171 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5172 [(match_operand:DI
0 "register_operand" "")
5173 (match_operand:XI
1 "register_operand" "")
5174 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5177 rtx mem = gen_rtx_MEM (XImode, operands[
0]);
5178 emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[
1]));
5182 (define_insn "aarch64_st1_x4_<mode>"
5183 [(set (match_operand:XI
0 "aarch64_simd_struct_operand" "=Utv")
5185 [(match_operand:XI
1 "register_operand" "w")
5186 (unspec:VALLDIF [(const_int
4)] UNSPEC_VSTRUCTDUMMY)]
5189 "st1
\\t{%S1.<Vtype> - %V1.<Vtype>}, %
0"
5190 [(set_attr "type" "neon_store1_4reg<q>")]
5193 (define_insn "*aarch64_mov<mode>"
5194 [(set (match_operand:VSTRUCT
0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5195 (match_operand:VSTRUCT
1 "aarch64_simd_general_operand" " w,w,Utv"))]
5196 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5197 && (register_operand (operands[
0], <MODE>mode)
5198 || register_operand (operands[
1], <MODE>mode))"
5201 st1
\\t{%S1.16b - %<Vendreg>
1.16b}, %
0
5202 ld1
\\t{%S0.16b - %<Vendreg>
0.16b}, %
1"
5203 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5204 neon_load<nregs>_<nregs>reg_q")
5205 (set_attr "length" "<insn_count>,
4,
4")]
5208 (define_insn "aarch64_be_ld1<mode>"
5209 [(set (match_operand:VALLDI_F16
0 "register_operand" "=w")
5210 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16
1
5211 "aarch64_simd_struct_operand" "Utv")]
5214 "ld1
\\t{%
0<Vmtype>}, %
1"
5215 [(set_attr "type" "neon_load1_1reg<q>")]
5218 (define_insn "aarch64_be_st1<mode>"
5219 [(set (match_operand:VALLDI_F16
0 "aarch64_simd_struct_operand" "=Utv")
5220 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16
1 "register_operand" "w")]
5223 "st1
\\t{%
1<Vmtype>}, %
0"
5224 [(set_attr "type" "neon_store1_1reg<q>")]
5227 (define_insn "*aarch64_be_movoi"
5228 [(set (match_operand:OI
0 "nonimmediate_operand" "=w,m,w")
5229 (match_operand:OI
1 "general_operand" " w,w,m"))]
5230 "TARGET_SIMD && BYTES_BIG_ENDIAN
5231 && (register_operand (operands[
0], OImode)
5232 || register_operand (operands[
1], OImode))"
5237 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5238 (set_attr "length" "
8,
4,
4")]
5241 (define_insn "*aarch64_be_movci"
5242 [(set (match_operand:CI
0 "nonimmediate_operand" "=w,o,w")
5243 (match_operand:CI
1 "general_operand" " w,w,o"))]
5244 "TARGET_SIMD && BYTES_BIG_ENDIAN
5245 && (register_operand (operands[
0], CImode)
5246 || register_operand (operands[
1], CImode))"
5248 [(set_attr "type" "multiple")
5249 (set_attr "length" "
12,
4,
4")]
5252 (define_insn "*aarch64_be_movxi"
5253 [(set (match_operand:XI
0 "nonimmediate_operand" "=w,o,w")
5254 (match_operand:XI
1 "general_operand" " w,w,o"))]
5255 "TARGET_SIMD && BYTES_BIG_ENDIAN
5256 && (register_operand (operands[
0], XImode)
5257 || register_operand (operands[
1], XImode))"
5259 [(set_attr "type" "multiple")
5260 (set_attr "length" "
16,
4,
4")]
5264 [(set (match_operand:OI
0 "register_operand")
5265 (match_operand:OI
1 "register_operand"))]
5266 "TARGET_SIMD && reload_completed"
5269 aarch64_simd_emit_reg_reg_move (operands, TImode,
2);
5274 [(set (match_operand:CI
0 "nonimmediate_operand")
5275 (match_operand:CI
1 "general_operand"))]
5276 "TARGET_SIMD && reload_completed"
5279 if (register_operand (operands[
0], CImode)
5280 && register_operand (operands[
1], CImode))
5282 aarch64_simd_emit_reg_reg_move (operands, TImode,
3);
5285 else if (BYTES_BIG_ENDIAN)
5287 emit_move_insn (simplify_gen_subreg (OImode, operands[
0], CImode,
0),
5288 simplify_gen_subreg (OImode, operands[
1], CImode,
0));
5289 emit_move_insn (gen_lowpart (V16QImode,
5290 simplify_gen_subreg (TImode, operands[
0],
5292 gen_lowpart (V16QImode,
5293 simplify_gen_subreg (TImode, operands[
1],
5302 [(set (match_operand:XI
0 "nonimmediate_operand")
5303 (match_operand:XI
1 "general_operand"))]
5304 "TARGET_SIMD && reload_completed"
5307 if (register_operand (operands[
0], XImode)
5308 && register_operand (operands[
1], XImode))
5310 aarch64_simd_emit_reg_reg_move (operands, TImode,
4);
5313 else if (BYTES_BIG_ENDIAN)
5315 emit_move_insn (simplify_gen_subreg (OImode, operands[
0], XImode,
0),
5316 simplify_gen_subreg (OImode, operands[
1], XImode,
0));
5317 emit_move_insn (simplify_gen_subreg (OImode, operands[
0], XImode,
32),
5318 simplify_gen_subreg (OImode, operands[
1], XImode,
32));
5325 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5326 [(match_operand:VSTRUCT
0 "register_operand" "=w")
5327 (match_operand:DI
1 "register_operand" "w")
5328 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5331 rtx mem = gen_rtx_MEM (BLKmode, operands[
1]);
5332 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5335 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[
0],
5340 (define_insn "aarch64_ld2<mode>_dreg"
5341 [(set (match_operand:OI
0 "register_operand" "=w")
5342 (unspec:OI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
5343 (unspec:VD [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5346 "ld2
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
5347 [(set_attr "type" "neon_load2_2reg<q>")]
5350 (define_insn "aarch64_ld2<mode>_dreg"
5351 [(set (match_operand:OI
0 "register_operand" "=w")
5352 (unspec:OI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
5353 (unspec:DX [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5356 "ld1
\\t{%S0.1d - %T0.1d}, %
1"
5357 [(set_attr "type" "neon_load1_2reg<q>")]
5360 (define_insn "aarch64_ld3<mode>_dreg"
5361 [(set (match_operand:CI
0 "register_operand" "=w")
5362 (unspec:CI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
5363 (unspec:VD [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5366 "ld3
\\t{%S0.<Vtype> - %U0.<Vtype>}, %
1"
5367 [(set_attr "type" "neon_load3_3reg<q>")]
5370 (define_insn "aarch64_ld3<mode>_dreg"
5371 [(set (match_operand:CI
0 "register_operand" "=w")
5372 (unspec:CI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
5373 (unspec:DX [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5376 "ld1
\\t{%S0.1d - %U0.1d}, %
1"
5377 [(set_attr "type" "neon_load1_3reg<q>")]
5380 (define_insn "aarch64_ld4<mode>_dreg"
5381 [(set (match_operand:XI
0 "register_operand" "=w")
5382 (unspec:XI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
5383 (unspec:VD [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5386 "ld4
\\t{%S0.<Vtype> - %V0.<Vtype>}, %
1"
5387 [(set_attr "type" "neon_load4_4reg<q>")]
5390 (define_insn "aarch64_ld4<mode>_dreg"
5391 [(set (match_operand:XI
0 "register_operand" "=w")
5392 (unspec:XI [(match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
5393 (unspec:DX [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5396 "ld1
\\t{%S0.1d - %V0.1d}, %
1"
5397 [(set_attr "type" "neon_load1_4reg<q>")]
5400 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5401 [(match_operand:VSTRUCT
0 "register_operand" "=w")
5402 (match_operand:DI
1 "register_operand" "r")
5403 (unspec:VDC [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5406 rtx mem = gen_rtx_MEM (BLKmode, operands[
1]);
5407 set_mem_size (mem, <VSTRUCT:nregs> *
8);
5409 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[
0], mem));
5413 (define_expand "aarch64_ld1<VALL_F16:mode>"
5414 [(match_operand:VALL_F16
0 "register_operand")
5415 (match_operand:DI
1 "register_operand")]
5418 machine_mode mode = <VALL_F16:MODE>mode;
5419 rtx mem = gen_rtx_MEM (mode, operands[
1]);
5421 if (BYTES_BIG_ENDIAN)
5422 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[
0], mem));
5424 emit_move_insn (operands[
0], mem);
5428 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5429 [(match_operand:VSTRUCT
0 "register_operand" "=w")
5430 (match_operand:DI
1 "register_operand" "r")
5431 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5434 machine_mode mode = <VSTRUCT:MODE>mode;
5435 rtx mem = gen_rtx_MEM (mode, operands[
1]);
5437 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[
0], mem));
5441 (define_expand "aarch64_ld1x2<VQ:mode>"
5442 [(match_operand:OI
0 "register_operand" "=w")
5443 (match_operand:DI
1 "register_operand" "r")
5444 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5447 machine_mode mode = OImode;
5448 rtx mem = gen_rtx_MEM (mode, operands[
1]);
5450 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[
0], mem));
5454 (define_expand "aarch64_ld1x2<VDC:mode>"
5455 [(match_operand:OI
0 "register_operand" "=w")
5456 (match_operand:DI
1 "register_operand" "r")
5457 (unspec:VDC [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5460 machine_mode mode = OImode;
5461 rtx mem = gen_rtx_MEM (mode, operands[
1]);
5463 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[
0], mem));
5468 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5469 [(match_operand:VSTRUCT
0 "register_operand" "=w")
5470 (match_operand:DI
1 "register_operand" "w")
5471 (match_operand:VSTRUCT
2 "register_operand" "
0")
5472 (match_operand:SI
3 "immediate_operand" "i")
5473 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5476 rtx mem = gen_rtx_MEM (BLKmode, operands[
1]);
5477 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5480 aarch64_simd_lane_bounds (operands[
3],
0, <VALLDIF:nunits>, NULL);
5481 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5482 operands[
0], mem, operands[
2], operands[
3]));
5486 ;; Expanders for builtins to extract vector registers from large
5487 ;; opaque integer modes.
5491 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5492 [(match_operand:VDC
0 "register_operand" "=w")
5493 (match_operand:VSTRUCT
1 "register_operand" "w")
5494 (match_operand:SI
2 "immediate_operand" "i")]
5497 int part = INTVAL (operands[
2]);
5498 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5499 int offset = part *
16;
5501 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[
1], offset));
5502 emit_move_insn (operands[
0], gen_lowpart (<VDC:MODE>mode, temp));
5508 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5509 [(match_operand:VQ
0 "register_operand" "=w")
5510 (match_operand:VSTRUCT
1 "register_operand" "w")
5511 (match_operand:SI
2 "immediate_operand" "i")]
5514 int part = INTVAL (operands[
2]);
5515 int offset = part *
16;
5517 emit_move_insn (operands[
0],
5518 gen_rtx_SUBREG (<VQ:MODE>mode, operands[
1], offset));
5522 ;; Permuted-store expanders for neon intrinsics.
5524 ;; Permute instructions
5528 (define_expand "vec_perm<mode>"
5529 [(match_operand:VB
0 "register_operand")
5530 (match_operand:VB
1 "register_operand")
5531 (match_operand:VB
2 "register_operand")
5532 (match_operand:VB
3 "register_operand")]
5535 aarch64_expand_vec_perm (operands[
0], operands[
1],
5536 operands[
2], operands[
3], <nunits>);
5540 (define_insn "aarch64_tbl1<mode>"
5541 [(set (match_operand:VB
0 "register_operand" "=w")
5542 (unspec:VB [(match_operand:V16QI
1 "register_operand" "w")
5543 (match_operand:VB
2 "register_operand" "w")]
5546 "tbl
\\t%
0.<Vtype>, {%
1.16b}, %
2.<Vtype>"
5547 [(set_attr "type" "neon_tbl1<q>")]
5550 ;; Two source registers.
5552 (define_insn "aarch64_tbl2v16qi"
5553 [(set (match_operand:V16QI
0 "register_operand" "=w")
5554 (unspec:V16QI [(match_operand:OI
1 "register_operand" "w")
5555 (match_operand:V16QI
2 "register_operand" "w")]
5558 "tbl
\\t%
0.16b, {%S1.16b - %T1.16b}, %
2.16b"
5559 [(set_attr "type" "neon_tbl2_q")]
5562 (define_insn "aarch64_tbl3<mode>"
5563 [(set (match_operand:VB
0 "register_operand" "=w")
5564 (unspec:VB [(match_operand:OI
1 "register_operand" "w")
5565 (match_operand:VB
2 "register_operand" "w")]
5568 "tbl
\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5569 [(set_attr "type" "neon_tbl3")]
5572 (define_insn "aarch64_tbx4<mode>"
5573 [(set (match_operand:VB
0 "register_operand" "=w")
5574 (unspec:VB [(match_operand:VB
1 "register_operand" "
0")
5575 (match_operand:OI
2 "register_operand" "w")
5576 (match_operand:VB
3 "register_operand" "w")]
5579 "tbx
\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5580 [(set_attr "type" "neon_tbl4")]
5583 ;; Three source registers.
5585 (define_insn "aarch64_qtbl3<mode>"
5586 [(set (match_operand:VB
0 "register_operand" "=w")
5587 (unspec:VB [(match_operand:CI
1 "register_operand" "w")
5588 (match_operand:VB
2 "register_operand" "w")]
5591 "tbl
\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5592 [(set_attr "type" "neon_tbl3")]
5595 (define_insn "aarch64_qtbx3<mode>"
5596 [(set (match_operand:VB
0 "register_operand" "=w")
5597 (unspec:VB [(match_operand:VB
1 "register_operand" "
0")
5598 (match_operand:CI
2 "register_operand" "w")
5599 (match_operand:VB
3 "register_operand" "w")]
5602 "tbx
\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5603 [(set_attr "type" "neon_tbl3")]
5606 ;; Four source registers.
5608 (define_insn "aarch64_qtbl4<mode>"
5609 [(set (match_operand:VB
0 "register_operand" "=w")
5610 (unspec:VB [(match_operand:XI
1 "register_operand" "w")
5611 (match_operand:VB
2 "register_operand" "w")]
5614 "tbl
\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5615 [(set_attr "type" "neon_tbl4")]
5618 (define_insn "aarch64_qtbx4<mode>"
5619 [(set (match_operand:VB
0 "register_operand" "=w")
5620 (unspec:VB [(match_operand:VB
1 "register_operand" "
0")
5621 (match_operand:XI
2 "register_operand" "w")
5622 (match_operand:VB
3 "register_operand" "w")]
5625 "tbx
\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5626 [(set_attr "type" "neon_tbl4")]
5629 (define_insn_and_split "aarch64_combinev16qi"
5630 [(set (match_operand:OI
0 "register_operand" "=w")
5631 (unspec:OI [(match_operand:V16QI
1 "register_operand" "w")
5632 (match_operand:V16QI
2 "register_operand" "w")]
5636 "&& reload_completed"
5639 aarch64_split_combinev16qi (operands);
5642 [(set_attr "type" "multiple")]
5645 ;; This instruction's pattern is generated directly by
5646 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5647 ;; need corresponding changes there.
5648 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5649 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
5650 (unspec:VALL_F16 [(match_operand:VALL_F16
1 "register_operand" "w")
5651 (match_operand:VALL_F16
2 "register_operand" "w")]
5654 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
5655 [(set_attr "type" "neon_permute<q>")]
5658 ;; This instruction's pattern is generated directly by
5659 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5660 ;; need corresponding changes there. Note that the immediate (third)
5661 ;; operand is a lane index not a byte index.
5662 (define_insn "aarch64_ext<mode>"
5663 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
5664 (unspec:VALL_F16 [(match_operand:VALL_F16
1 "register_operand" "w")
5665 (match_operand:VALL_F16
2 "register_operand" "w")
5666 (match_operand:SI
3 "immediate_operand" "i")]
5670 operands[
3] = GEN_INT (INTVAL (operands[
3])
5671 * GET_MODE_UNIT_SIZE (<MODE>mode));
5672 return "ext
\\t%
0.<Vbtype>, %
1.<Vbtype>, %
2.<Vbtype>, #%
3";
5674 [(set_attr "type" "neon_ext<q>")]
5677 ;; This instruction's pattern is generated directly by
5678 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5679 ;; need corresponding changes there.
5680 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5681 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
5682 (unspec:VALL_F16 [(match_operand:VALL_F16
1 "register_operand" "w")]
5685 "rev<REVERSE:rev_op>
\\t%
0.<Vtype>, %
1.<Vtype>"
5686 [(set_attr "type" "neon_rev<q>")]
5689 (define_insn "aarch64_st2<mode>_dreg"
5690 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
5691 (unspec:BLK [(match_operand:OI
1 "register_operand" "w")
5692 (unspec:VD [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5695 "st2
\\t{%S1.<Vtype> - %T1.<Vtype>}, %
0"
5696 [(set_attr "type" "neon_store2_2reg")]
5699 (define_insn "aarch64_st2<mode>_dreg"
5700 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
5701 (unspec:BLK [(match_operand:OI
1 "register_operand" "w")
5702 (unspec:DX [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5705 "st1
\\t{%S1.1d - %T1.1d}, %
0"
5706 [(set_attr "type" "neon_store1_2reg")]
5709 (define_insn "aarch64_st3<mode>_dreg"
5710 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
5711 (unspec:BLK [(match_operand:CI
1 "register_operand" "w")
5712 (unspec:VD [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5715 "st3
\\t{%S1.<Vtype> - %U1.<Vtype>}, %
0"
5716 [(set_attr "type" "neon_store3_3reg")]
5719 (define_insn "aarch64_st3<mode>_dreg"
5720 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
5721 (unspec:BLK [(match_operand:CI
1 "register_operand" "w")
5722 (unspec:DX [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5725 "st1
\\t{%S1.1d - %U1.1d}, %
0"
5726 [(set_attr "type" "neon_store1_3reg")]
5729 (define_insn "aarch64_st4<mode>_dreg"
5730 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
5731 (unspec:BLK [(match_operand:XI
1 "register_operand" "w")
5732 (unspec:VD [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5735 "st4
\\t{%S1.<Vtype> - %V1.<Vtype>}, %
0"
5736 [(set_attr "type" "neon_store4_4reg")]
5739 (define_insn "aarch64_st4<mode>_dreg"
5740 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
5741 (unspec:BLK [(match_operand:XI
1 "register_operand" "w")
5742 (unspec:DX [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5745 "st1
\\t{%S1.1d - %V1.1d}, %
0"
5746 [(set_attr "type" "neon_store1_4reg")]
5749 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5750 [(match_operand:DI
0 "register_operand" "r")
5751 (match_operand:VSTRUCT
1 "register_operand" "w")
5752 (unspec:VDC [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5755 rtx mem = gen_rtx_MEM (BLKmode, operands[
0]);
5756 set_mem_size (mem, <VSTRUCT:nregs> *
8);
5758 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[
1]));
5762 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5763 [(match_operand:DI
0 "register_operand" "r")
5764 (match_operand:VSTRUCT
1 "register_operand" "w")
5765 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5768 machine_mode mode = <VSTRUCT:MODE>mode;
5769 rtx mem = gen_rtx_MEM (mode, operands[
0]);
5771 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[
1]));
5775 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5776 [(match_operand:DI
0 "register_operand" "r")
5777 (match_operand:VSTRUCT
1 "register_operand" "w")
5778 (unspec:VALLDIF [(const_int
0)] UNSPEC_VSTRUCTDUMMY)
5779 (match_operand:SI
2 "immediate_operand")]
5782 rtx mem = gen_rtx_MEM (BLKmode, operands[
0]);
5783 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5786 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5787 mem, operands[
1], operands[
2]));
5791 (define_expand "aarch64_st1<VALL_F16:mode>"
5792 [(match_operand:DI
0 "register_operand")
5793 (match_operand:VALL_F16
1 "register_operand")]
5796 machine_mode mode = <VALL_F16:MODE>mode;
5797 rtx mem = gen_rtx_MEM (mode, operands[
0]);
5799 if (BYTES_BIG_ENDIAN)
5800 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[
1]));
5802 emit_move_insn (mem, operands[
1]);
5806 ;; Expander for builtins to insert vector registers into large
5807 ;; opaque integer modes.
5809 ;; Q-register list. We don't need a D-reg inserter as we zero
5810 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5812 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5813 [(match_operand:VSTRUCT
0 "register_operand" "+w")
5814 (match_operand:VSTRUCT
1 "register_operand" "
0")
5815 (match_operand:VQ
2 "register_operand" "w")
5816 (match_operand:SI
3 "immediate_operand" "i")]
5819 int part = INTVAL (operands[
3]);
5820 int offset = part *
16;
5822 emit_move_insn (operands[
0], operands[
1]);
5823 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[
0], offset),
5828 ;; Standard pattern name vec_init<mode><Vel>.
5830 (define_expand "vec_init<mode><Vel>"
5831 [(match_operand:VALL_F16
0 "register_operand" "")
5832 (match_operand
1 "" "")]
5835 aarch64_expand_vector_init (operands[
0], operands[
1]);
5839 (define_insn "*aarch64_simd_ld1r<mode>"
5840 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
5841 (vec_duplicate:VALL_F16
5842 (match_operand:<VEL>
1 "aarch64_simd_struct_operand" "Utv")))]
5844 "ld1r
\\t{%
0.<Vtype>}, %
1"
5845 [(set_attr "type" "neon_load1_all_lanes")]
5848 (define_insn "aarch64_simd_ld1<mode>_x2"
5849 [(set (match_operand:OI
0 "register_operand" "=w")
5850 (unspec:OI [(match_operand:OI
1 "aarch64_simd_struct_operand" "Utv")
5851 (unspec:VQ [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5854 "ld1
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
5855 [(set_attr "type" "neon_load1_2reg<q>")]
5858 (define_insn "aarch64_simd_ld1<mode>_x2"
5859 [(set (match_operand:OI
0 "register_operand" "=w")
5860 (unspec:OI [(match_operand:OI
1 "aarch64_simd_struct_operand" "Utv")
5861 (unspec:VDC [(const_int
0)] UNSPEC_VSTRUCTDUMMY)]
5864 "ld1
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
5865 [(set_attr "type" "neon_load1_2reg<q>")]
5869 (define_insn "aarch64_frecpe<mode>"
5870 [(set (match_operand:VHSDF
0 "register_operand" "=w")
5871 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")]
5874 "frecpe
\\t%
0.<Vtype>, %
1.<Vtype>"
5875 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5878 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5879 [(set (match_operand:GPF_F16
0 "register_operand" "=w")
5880 (unspec:GPF_F16 [(match_operand:GPF_F16
1 "register_operand" "w")]
5883 "frecp<FRECP:frecp_suffix>
\\t%<s>
0, %<s>
1"
5884 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5887 (define_insn "aarch64_frecps<mode>"
5888 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
5890 [(match_operand:VHSDF_HSDF
1 "register_operand" "w")
5891 (match_operand:VHSDF_HSDF
2 "register_operand" "w")]
5894 "frecps
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
5895 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5898 (define_insn "aarch64_urecpe<mode>"
5899 [(set (match_operand:VDQ_SI
0 "register_operand" "=w")
5900 (unspec:VDQ_SI [(match_operand:VDQ_SI
1 "register_operand" "w")]
5903 "urecpe
\\t%
0.<Vtype>, %
1.<Vtype>"
5904 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5906 ;; Standard pattern name vec_extract<mode><Vel>.
5908 (define_expand "vec_extract<mode><Vel>"
5909 [(match_operand:<VEL>
0 "aarch64_simd_nonimmediate_operand" "")
5910 (match_operand:VALL_F16
1 "register_operand" "")
5911 (match_operand:SI
2 "immediate_operand" "")]
5915 (gen_aarch64_get_lane<mode> (operands[
0], operands[
1], operands[
2]));
5921 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5922 [(set (match_operand:V16QI
0 "register_operand" "=w")
5923 (unspec:V16QI [(match_operand:V16QI
1 "register_operand" "
0")
5924 (match_operand:V16QI
2 "register_operand" "w")]
5926 "TARGET_SIMD && TARGET_AES"
5927 "aes<aes_op>
\\t%
0.16b, %
2.16b"
5928 [(set_attr "type" "crypto_aese")]
5931 ;; When AES/AESMC fusion is enabled we want the register allocation to
5935 ;; So prefer to tie operand
1 to operand
0 when fusing.
5937 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5938 [(set (match_operand:V16QI
0 "register_operand" "=w,w")
5939 (unspec:V16QI [(match_operand:V16QI
1 "register_operand" "
0,w")]
5941 "TARGET_SIMD && TARGET_AES"
5942 "aes<aesmc_op>
\\t%
0.16b, %
1.16b"
5943 [(set_attr "type" "crypto_aesmc")
5944 (set_attr_alternative "enabled"
5945 [(if_then_else (match_test
5946 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5947 (const_string "yes" )
5948 (const_string "no"))
5949 (const_string "yes")])]
5954 (define_insn "aarch64_crypto_sha1hsi"
5955 [(set (match_operand:SI
0 "register_operand" "=w")
5956 (unspec:SI [(match_operand:SI
1
5957 "register_operand" "w")]
5959 "TARGET_SIMD && TARGET_SHA2"
5961 [(set_attr "type" "crypto_sha1_fast")]
5964 (define_insn "aarch64_crypto_sha1hv4si"
5965 [(set (match_operand:SI
0 "register_operand" "=w")
5966 (unspec:SI [(vec_select:SI (match_operand:V4SI
1 "register_operand" "w")
5967 (parallel [(const_int
0)]))]
5969 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5971 [(set_attr "type" "crypto_sha1_fast")]
5974 (define_insn "aarch64_be_crypto_sha1hv4si"
5975 [(set (match_operand:SI
0 "register_operand" "=w")
5976 (unspec:SI [(vec_select:SI (match_operand:V4SI
1 "register_operand" "w")
5977 (parallel [(const_int
3)]))]
5979 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5981 [(set_attr "type" "crypto_sha1_fast")]
5984 (define_insn "aarch64_crypto_sha1su1v4si"
5985 [(set (match_operand:V4SI
0 "register_operand" "=w")
5986 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
5987 (match_operand:V4SI
2 "register_operand" "w")]
5989 "TARGET_SIMD && TARGET_SHA2"
5990 "sha1su1
\\t%
0.4s, %
2.4s"
5991 [(set_attr "type" "crypto_sha1_fast")]
5994 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5995 [(set (match_operand:V4SI
0 "register_operand" "=w")
5996 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
5997 (match_operand:SI
2 "register_operand" "w")
5998 (match_operand:V4SI
3 "register_operand" "w")]
6000 "TARGET_SIMD && TARGET_SHA2"
6001 "sha1<sha1_op>
\\t%q0, %s2, %
3.4s"
6002 [(set_attr "type" "crypto_sha1_slow")]
6005 (define_insn "aarch64_crypto_sha1su0v4si"
6006 [(set (match_operand:V4SI
0 "register_operand" "=w")
6007 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
6008 (match_operand:V4SI
2 "register_operand" "w")
6009 (match_operand:V4SI
3 "register_operand" "w")]
6011 "TARGET_SIMD && TARGET_SHA2"
6012 "sha1su0
\\t%
0.4s, %
2.4s, %
3.4s"
6013 [(set_attr "type" "crypto_sha1_xor")]
6018 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6019 [(set (match_operand:V4SI
0 "register_operand" "=w")
6020 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
6021 (match_operand:V4SI
2 "register_operand" "w")
6022 (match_operand:V4SI
3 "register_operand" "w")]
6024 "TARGET_SIMD && TARGET_SHA2"
6025 "sha256h<sha256_op>
\\t%q0, %q2, %
3.4s"
6026 [(set_attr "type" "crypto_sha256_slow")]
6029 (define_insn "aarch64_crypto_sha256su0v4si"
6030 [(set (match_operand:V4SI
0 "register_operand" "=w")
6031 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
6032 (match_operand:V4SI
2 "register_operand" "w")]
6034 "TARGET_SIMD && TARGET_SHA2"
6035 "sha256su0
\\t%
0.4s, %
2.4s"
6036 [(set_attr "type" "crypto_sha256_fast")]
6039 (define_insn "aarch64_crypto_sha256su1v4si"
6040 [(set (match_operand:V4SI
0 "register_operand" "=w")
6041 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
6042 (match_operand:V4SI
2 "register_operand" "w")
6043 (match_operand:V4SI
3 "register_operand" "w")]
6045 "TARGET_SIMD && TARGET_SHA2"
6046 "sha256su1
\\t%
0.4s, %
2.4s, %
3.4s"
6047 [(set_attr "type" "crypto_sha256_slow")]
6052 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6053 [(set (match_operand:V2DI
0 "register_operand" "=w")
6054 (unspec:V2DI [(match_operand:V2DI
1 "register_operand" "
0")
6055 (match_operand:V2DI
2 "register_operand" "w")
6056 (match_operand:V2DI
3 "register_operand" "w")]
6058 "TARGET_SIMD && TARGET_SHA3"
6059 "sha512h<sha512_op>
\\t%q0, %q2, %
3.2d"
6060 [(set_attr "type" "crypto_sha512")]
6063 (define_insn "aarch64_crypto_sha512su0qv2di"
6064 [(set (match_operand:V2DI
0 "register_operand" "=w")
6065 (unspec:V2DI [(match_operand:V2DI
1 "register_operand" "
0")
6066 (match_operand:V2DI
2 "register_operand" "w")]
6068 "TARGET_SIMD && TARGET_SHA3"
6069 "sha512su0
\\t%
0.2d, %
2.2d"
6070 [(set_attr "type" "crypto_sha512")]
6073 (define_insn "aarch64_crypto_sha512su1qv2di"
6074 [(set (match_operand:V2DI
0 "register_operand" "=w")
6075 (unspec:V2DI [(match_operand:V2DI
1 "register_operand" "
0")
6076 (match_operand:V2DI
2 "register_operand" "w")
6077 (match_operand:V2DI
3 "register_operand" "w")]
6079 "TARGET_SIMD && TARGET_SHA3"
6080 "sha512su1
\\t%
0.2d, %
2.2d, %
3.2d"
6081 [(set_attr "type" "crypto_sha512")]
6086 (define_insn "aarch64_eor3qv8hi"
6087 [(set (match_operand:V8HI
0 "register_operand" "=w")
6090 (match_operand:V8HI
2 "register_operand" "%w")
6091 (match_operand:V8HI
3 "register_operand" "w"))
6092 (match_operand:V8HI
1 "register_operand" "w")))]
6093 "TARGET_SIMD && TARGET_SHA3"
6094 "eor3
\\t%
0.16b, %
1.16b, %
2.16b, %
3.16b"
6095 [(set_attr "type" "crypto_sha3")]
6098 (define_insn "aarch64_rax1qv2di"
6099 [(set (match_operand:V2DI
0 "register_operand" "=w")
6102 (match_operand:V2DI
2 "register_operand" "w")
6104 (match_operand:V2DI
1 "register_operand" "w")))]
6105 "TARGET_SIMD && TARGET_SHA3"
6106 "rax1
\\t%
0.2d, %
1.2d, %
2.2d"
6107 [(set_attr "type" "crypto_sha3")]
6110 (define_insn "aarch64_xarqv2di"
6111 [(set (match_operand:V2DI
0 "register_operand" "=w")
6114 (match_operand:V2DI
1 "register_operand" "%w")
6115 (match_operand:V2DI
2 "register_operand" "w"))
6116 (match_operand:SI
3 "aarch64_simd_shift_imm_di" "Usd")))]
6117 "TARGET_SIMD && TARGET_SHA3"
6118 "xar
\\t%
0.2d, %
1.2d, %
2.2d, %
3"
6119 [(set_attr "type" "crypto_sha3")]
6122 (define_insn "aarch64_bcaxqv8hi"
6123 [(set (match_operand:V8HI
0 "register_operand" "=w")
6126 (not:V8HI (match_operand:V8HI
3 "register_operand" "w"))
6127 (match_operand:V8HI
2 "register_operand" "w"))
6128 (match_operand:V8HI
1 "register_operand" "w")))]
6129 "TARGET_SIMD && TARGET_SHA3"
6130 "bcax
\\t%
0.16b, %
1.16b, %
2.16b, %
3.16b"
6131 [(set_attr "type" "crypto_sha3")]
6136 (define_insn "aarch64_sm3ss1qv4si"
6137 [(set (match_operand:V4SI
0 "register_operand" "=w")
6138 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "w")
6139 (match_operand:V4SI
2 "register_operand" "w")
6140 (match_operand:V4SI
3 "register_operand" "w")]
6142 "TARGET_SIMD && TARGET_SM4"
6143 "sm3ss1
\\t%
0.4s, %
1.4s, %
2.4s, %
3.4s"
6144 [(set_attr "type" "crypto_sm3")]
6148 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6149 [(set (match_operand:V4SI
0 "register_operand" "=w")
6150 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
6151 (match_operand:V4SI
2 "register_operand" "w")
6152 (match_operand:V4SI
3 "register_operand" "w")
6153 (match_operand:SI
4 "aarch64_imm2" "Ui2")]
6155 "TARGET_SIMD && TARGET_SM4"
6156 "sm3tt<sm3tt_op>
\\t%
0.4s, %
2.4s, %
3.4s[%
4]"
6157 [(set_attr "type" "crypto_sm3")]
6160 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6161 [(set (match_operand:V4SI
0 "register_operand" "=w")
6162 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
6163 (match_operand:V4SI
2 "register_operand" "w")
6164 (match_operand:V4SI
3 "register_operand" "w")]
6166 "TARGET_SIMD && TARGET_SM4"
6167 "sm3partw<sm3part_op>
\\t%
0.4s, %
2.4s, %
3.4s"
6168 [(set_attr "type" "crypto_sm3")]
6173 (define_insn "aarch64_sm4eqv4si"
6174 [(set (match_operand:V4SI
0 "register_operand" "=w")
6175 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
6176 (match_operand:V4SI
2 "register_operand" "w")]
6178 "TARGET_SIMD && TARGET_SM4"
6179 "sm4e
\\t%
0.4s, %
2.4s"
6180 [(set_attr "type" "crypto_sm4")]
6183 (define_insn "aarch64_sm4ekeyqv4si"
6184 [(set (match_operand:V4SI
0 "register_operand" "=w")
6185 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "w")
6186 (match_operand:V4SI
2 "register_operand" "w")]
6188 "TARGET_SIMD && TARGET_SM4"
6189 "sm4ekey
\\t%
0.4s, %
1.4s, %
2.4s"
6190 [(set_attr "type" "crypto_sm4")]
6195 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6196 [(set (match_operand:VDQSF
0 "register_operand" "=w")
6198 [(match_operand:VDQSF
1 "register_operand" "
0")
6199 (match_operand:<VFMLA_W>
2 "register_operand" "w")
6200 (match_operand:<VFMLA_W>
3 "register_operand" "w")]
6204 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6205 <nunits> *
2, false);
6206 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6207 <nunits> *
2, false);
6209 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[
0],
6218 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6219 [(set (match_operand:VDQSF
0 "register_operand" "=w")
6221 [(match_operand:VDQSF
1 "register_operand" "
0")
6222 (match_operand:<VFMLA_W>
2 "register_operand" "w")
6223 (match_operand:<VFMLA_W>
3 "register_operand" "w")]
6227 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> *
2, true);
6228 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> *
2, true);
6230 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[
0],
6238 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6239 [(set (match_operand:VDQSF
0 "register_operand" "=w")
6242 (vec_select:<VFMLA_SEL_W>
6243 (match_operand:<VFMLA_W>
2 "register_operand" "w")
6244 (match_operand:<VFMLA_W>
4 "vect_par_cnst_lo_half" "")))
6246 (vec_select:<VFMLA_SEL_W>
6247 (match_operand:<VFMLA_W>
3 "register_operand" "w")
6248 (match_operand:<VFMLA_W>
5 "vect_par_cnst_lo_half" "")))
6249 (match_operand:VDQSF
1 "register_operand" "
0")))]
6251 "fmlal
\\t%
0.<nunits>s, %
2.<nunits>h, %
3.<nunits>h"
6252 [(set_attr "type" "neon_fp_mul_s")]
6255 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6256 [(set (match_operand:VDQSF
0 "register_operand" "=w")
6260 (vec_select:<VFMLA_SEL_W>
6261 (match_operand:<VFMLA_W>
2 "register_operand" "w")
6262 (match_operand:<VFMLA_W>
4 "vect_par_cnst_lo_half" ""))))
6264 (vec_select:<VFMLA_SEL_W>
6265 (match_operand:<VFMLA_W>
3 "register_operand" "w")
6266 (match_operand:<VFMLA_W>
5 "vect_par_cnst_lo_half" "")))
6267 (match_operand:VDQSF
1 "register_operand" "
0")))]
6269 "fmlsl
\\t%
0.<nunits>s, %
2.<nunits>h, %
3.<nunits>h"
6270 [(set_attr "type" "neon_fp_mul_s")]
6273 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6274 [(set (match_operand:VDQSF
0 "register_operand" "=w")
6277 (vec_select:<VFMLA_SEL_W>
6278 (match_operand:<VFMLA_W>
2 "register_operand" "w")
6279 (match_operand:<VFMLA_W>
4 "vect_par_cnst_hi_half" "")))
6281 (vec_select:<VFMLA_SEL_W>
6282 (match_operand:<VFMLA_W>
3 "register_operand" "w")
6283 (match_operand:<VFMLA_W>
5 "vect_par_cnst_hi_half" "")))
6284 (match_operand:VDQSF
1 "register_operand" "
0")))]
6286 "fmlal2
\\t%
0.<nunits>s, %
2.<nunits>h, %
3.<nunits>h"
6287 [(set_attr "type" "neon_fp_mul_s")]
6290 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6291 [(set (match_operand:VDQSF
0 "register_operand" "=w")
6295 (vec_select:<VFMLA_SEL_W>
6296 (match_operand:<VFMLA_W>
2 "register_operand" "w")
6297 (match_operand:<VFMLA_W>
4 "vect_par_cnst_hi_half" ""))))
6299 (vec_select:<VFMLA_SEL_W>
6300 (match_operand:<VFMLA_W>
3 "register_operand" "w")
6301 (match_operand:<VFMLA_W>
5 "vect_par_cnst_hi_half" "")))
6302 (match_operand:VDQSF
1 "register_operand" "
0")))]
6304 "fmlsl2
\\t%
0.<nunits>s, %
2.<nunits>h, %
3.<nunits>h"
6305 [(set_attr "type" "neon_fp_mul_s")]
6308 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6309 [(set (match_operand:V2SF
0 "register_operand" "")
6310 (unspec:V2SF [(match_operand:V2SF
1 "register_operand" "")
6311 (match_operand:V4HF
2 "register_operand" "")
6312 (match_operand:V4HF
3 "register_operand" "")
6313 (match_operand:SI
4 "aarch64_imm2" "")]
6317 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
4, false);
6318 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
6320 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[
0],
6329 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6330 [(set (match_operand:V2SF
0 "register_operand" "")
6331 (unspec:V2SF [(match_operand:V2SF
1 "register_operand" "")
6332 (match_operand:V4HF
2 "register_operand" "")
6333 (match_operand:V4HF
3 "register_operand" "")
6334 (match_operand:SI
4 "aarch64_imm2" "")]
6338 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
4, true);
6339 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
6341 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[
0],
6349 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6350 [(set (match_operand:V2SF
0 "register_operand" "=w")
6354 (match_operand:V4HF
2 "register_operand" "w")
6355 (match_operand:V4HF
4 "vect_par_cnst_lo_half" "")))
6359 (match_operand:V4HF
3 "register_operand" "x")
6360 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
6361 (match_operand:V2SF
1 "register_operand" "
0")))]
6363 "fmlal
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
6364 [(set_attr "type" "neon_fp_mul_s")]
6367 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6368 [(set (match_operand:V2SF
0 "register_operand" "=w")
6373 (match_operand:V4HF
2 "register_operand" "w")
6374 (match_operand:V4HF
4 "vect_par_cnst_lo_half" ""))))
6378 (match_operand:V4HF
3 "register_operand" "x")
6379 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
6380 (match_operand:V2SF
1 "register_operand" "
0")))]
6382 "fmlsl
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
6383 [(set_attr "type" "neon_fp_mul_s")]
6386 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6387 [(set (match_operand:V2SF
0 "register_operand" "=w")
6391 (match_operand:V4HF
2 "register_operand" "w")
6392 (match_operand:V4HF
4 "vect_par_cnst_hi_half" "")))
6396 (match_operand:V4HF
3 "register_operand" "x")
6397 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
6398 (match_operand:V2SF
1 "register_operand" "
0")))]
6400 "fmlal2
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
6401 [(set_attr "type" "neon_fp_mul_s")]
6404 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6405 [(set (match_operand:V2SF
0 "register_operand" "=w")
6410 (match_operand:V4HF
2 "register_operand" "w")
6411 (match_operand:V4HF
4 "vect_par_cnst_hi_half" ""))))
6415 (match_operand:V4HF
3 "register_operand" "x")
6416 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
6417 (match_operand:V2SF
1 "register_operand" "
0")))]
6419 "fmlsl2
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
6420 [(set_attr "type" "neon_fp_mul_s")]
6423 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6424 [(set (match_operand:V4SF
0 "register_operand" "")
6425 (unspec:V4SF [(match_operand:V4SF
1 "register_operand" "")
6426 (match_operand:V8HF
2 "register_operand" "")
6427 (match_operand:V8HF
3 "register_operand" "")
6428 (match_operand:SI
4 "aarch64_lane_imm3" "")]
6432 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
8, false);
6433 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[
4]));
6435 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[
0],
6443 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6444 [(set (match_operand:V4SF
0 "register_operand" "")
6445 (unspec:V4SF [(match_operand:V4SF
1 "register_operand" "")
6446 (match_operand:V8HF
2 "register_operand" "")
6447 (match_operand:V8HF
3 "register_operand" "")
6448 (match_operand:SI
4 "aarch64_lane_imm3" "")]
6452 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
8, true);
6453 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[
4]));
6455 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[
0],
6463 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6464 [(set (match_operand:V4SF
0 "register_operand" "=w")
6468 (match_operand:V8HF
2 "register_operand" "w")
6469 (match_operand:V8HF
4 "vect_par_cnst_lo_half" "")))
6473 (match_operand:V8HF
3 "register_operand" "x")
6474 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
6475 (match_operand:V4SF
1 "register_operand" "
0")))]
6477 "fmlal
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
6478 [(set_attr "type" "neon_fp_mul_s")]
6481 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6482 [(set (match_operand:V4SF
0 "register_operand" "=w")
6487 (match_operand:V8HF
2 "register_operand" "w")
6488 (match_operand:V8HF
4 "vect_par_cnst_lo_half" ""))))
6492 (match_operand:V8HF
3 "register_operand" "x")
6493 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
6494 (match_operand:V4SF
1 "register_operand" "
0")))]
6496 "fmlsl
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
6497 [(set_attr "type" "neon_fp_mul_s")]
6500 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6501 [(set (match_operand:V4SF
0 "register_operand" "=w")
6505 (match_operand:V8HF
2 "register_operand" "w")
6506 (match_operand:V8HF
4 "vect_par_cnst_hi_half" "")))
6510 (match_operand:V8HF
3 "register_operand" "x")
6511 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
6512 (match_operand:V4SF
1 "register_operand" "
0")))]
6514 "fmlal2
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
6515 [(set_attr "type" "neon_fp_mul_s")]
6518 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6519 [(set (match_operand:V4SF
0 "register_operand" "=w")
6524 (match_operand:V8HF
2 "register_operand" "w")
6525 (match_operand:V8HF
4 "vect_par_cnst_hi_half" ""))))
6529 (match_operand:V8HF
3 "register_operand" "x")
6530 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
6531 (match_operand:V4SF
1 "register_operand" "
0")))]
6533 "fmlsl2
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
6534 [(set_attr "type" "neon_fp_mul_s")]
6537 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6538 [(set (match_operand:V2SF
0 "register_operand" "")
6539 (unspec:V2SF [(match_operand:V2SF
1 "register_operand" "")
6540 (match_operand:V4HF
2 "register_operand" "")
6541 (match_operand:V8HF
3 "register_operand" "")
6542 (match_operand:SI
4 "aarch64_lane_imm3" "")]
6546 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
4, false);
6547 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[
4]));
6549 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[
0],
6558 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6559 [(set (match_operand:V2SF
0 "register_operand" "")
6560 (unspec:V2SF [(match_operand:V2SF
1 "register_operand" "")
6561 (match_operand:V4HF
2 "register_operand" "")
6562 (match_operand:V8HF
3 "register_operand" "")
6563 (match_operand:SI
4 "aarch64_lane_imm3" "")]
6567 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
4, true);
6568 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[
4]));
6570 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[
0],
6579 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6580 [(set (match_operand:V2SF
0 "register_operand" "=w")
6584 (match_operand:V4HF
2 "register_operand" "w")
6585 (match_operand:V4HF
4 "vect_par_cnst_lo_half" "")))
6589 (match_operand:V8HF
3 "register_operand" "x")
6590 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
6591 (match_operand:V2SF
1 "register_operand" "
0")))]
6593 "fmlal
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
6594 [(set_attr "type" "neon_fp_mul_s")]
6597 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6598 [(set (match_operand:V2SF
0 "register_operand" "=w")
6603 (match_operand:V4HF
2 "register_operand" "w")
6604 (match_operand:V4HF
4 "vect_par_cnst_lo_half" ""))))
6608 (match_operand:V8HF
3 "register_operand" "x")
6609 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
6610 (match_operand:V2SF
1 "register_operand" "
0")))]
6612 "fmlsl
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
6613 [(set_attr "type" "neon_fp_mul_s")]
6616 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6617 [(set (match_operand:V2SF
0 "register_operand" "=w")
6621 (match_operand:V4HF
2 "register_operand" "w")
6622 (match_operand:V4HF
4 "vect_par_cnst_hi_half" "")))
6626 (match_operand:V8HF
3 "register_operand" "x")
6627 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
6628 (match_operand:V2SF
1 "register_operand" "
0")))]
6630 "fmlal2
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
6631 [(set_attr "type" "neon_fp_mul_s")]
6634 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6635 [(set (match_operand:V2SF
0 "register_operand" "=w")
6640 (match_operand:V4HF
2 "register_operand" "w")
6641 (match_operand:V4HF
4 "vect_par_cnst_hi_half" ""))))
6645 (match_operand:V8HF
3 "register_operand" "x")
6646 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
6647 (match_operand:V2SF
1 "register_operand" "
0")))]
6649 "fmlsl2
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
6650 [(set_attr "type" "neon_fp_mul_s")]
6653 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6654 [(set (match_operand:V4SF
0 "register_operand" "")
6655 (unspec:V4SF [(match_operand:V4SF
1 "register_operand" "")
6656 (match_operand:V8HF
2 "register_operand" "")
6657 (match_operand:V4HF
3 "register_operand" "")
6658 (match_operand:SI
4 "aarch64_imm2" "")]
6662 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
8, false);
6663 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
6665 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[
0],
6673 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6674 [(set (match_operand:V4SF
0 "register_operand" "")
6675 (unspec:V4SF [(match_operand:V4SF
1 "register_operand" "")
6676 (match_operand:V8HF
2 "register_operand" "")
6677 (match_operand:V4HF
3 "register_operand" "")
6678 (match_operand:SI
4 "aarch64_imm2" "")]
6682 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
8, true);
6683 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
6685 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[
0],
6693 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6694 [(set (match_operand:V4SF
0 "register_operand" "=w")
6698 (match_operand:V8HF
2 "register_operand" "w")
6699 (match_operand:V8HF
4 "vect_par_cnst_lo_half" "")))
6703 (match_operand:V4HF
3 "register_operand" "x")
6704 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
6705 (match_operand:V4SF
1 "register_operand" "
0")))]
6707 "fmlal
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
6708 [(set_attr "type" "neon_fp_mul_s")]
6711 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6712 [(set (match_operand:V4SF
0 "register_operand" "=w")
6717 (match_operand:V8HF
2 "register_operand" "w")
6718 (match_operand:V8HF
4 "vect_par_cnst_lo_half" ""))))
6722 (match_operand:V4HF
3 "register_operand" "x")
6723 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
6724 (match_operand:V4SF
1 "register_operand" "
0")))]
6726 "fmlsl
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
6727 [(set_attr "type" "neon_fp_mul_s")]
6730 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6731 [(set (match_operand:V4SF
0 "register_operand" "=w")
6735 (match_operand:V8HF
2 "register_operand" "w")
6736 (match_operand:V8HF
4 "vect_par_cnst_hi_half" "")))
6740 (match_operand:V4HF
3 "register_operand" "x")
6741 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
6742 (match_operand:V4SF
1 "register_operand" "
0")))]
6744 "fmlal2
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
6745 [(set_attr "type" "neon_fp_mul_s")]
6748 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6749 [(set (match_operand:V4SF
0 "register_operand" "=w")
6754 (match_operand:V8HF
2 "register_operand" "w")
6755 (match_operand:V8HF
4 "vect_par_cnst_hi_half" ""))))
6759 (match_operand:V4HF
3 "register_operand" "x")
6760 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
6761 (match_operand:V4SF
1 "register_operand" "
0")))]
6763 "fmlsl2
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
6764 [(set_attr "type" "neon_fp_mul_s")]
6769 (define_insn "aarch64_crypto_pmulldi"
6770 [(set (match_operand:TI
0 "register_operand" "=w")
6771 (unspec:TI [(match_operand:DI
1 "register_operand" "w")
6772 (match_operand:DI
2 "register_operand" "w")]
6774 "TARGET_SIMD && TARGET_AES"
6775 "pmull
\\t%
0.1q, %
1.1d, %
2.1d"
6776 [(set_attr "type" "crypto_pmull")]
6779 (define_insn "aarch64_crypto_pmullv2di"
6780 [(set (match_operand:TI
0 "register_operand" "=w")
6781 (unspec:TI [(match_operand:V2DI
1 "register_operand" "w")
6782 (match_operand:V2DI
2 "register_operand" "w")]
6784 "TARGET_SIMD && TARGET_AES"
6785 "pmull2
\\t%
0.1q, %
1.2d, %
2.2d"
6786 [(set_attr "type" "crypto_pmull")]