1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C)
2011-
2023 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version
3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The following define_subst rules are used to produce patterns representing
22 ;; the implicit zeroing effect of
64-bit Advanced SIMD operations, in effect
23 ;; a vec_concat with zeroes. The order of the vec_concat operands differs
24 ;; for big-endian so we have a separate define_subst rule for each endianness.
25 (define_subst "add_vec_concat_subst_le"
26 [(set (match_operand:VDZ
0)
27 (match_operand:VDZ
1))]
29 [(set (match_operand:<VDBL>
0 "register_operand" "=w")
32 (match_operand:VDZ
2 "aarch64_simd_or_scalar_imm_zero")))])
34 (define_subst "add_vec_concat_subst_be"
35 [(set (match_operand:VDZ
0)
36 (match_operand:VDZ
1))]
38 [(set (match_operand:<VDBL>
0 "register_operand" "=w")
40 (match_operand:VDZ
2 "aarch64_simd_or_scalar_imm_zero")
43 ;; The subst_attr definitions used to annotate patterns further in the file.
44 ;; Patterns that need to have the above substitutions added to them should
45 ;; have <vczle><vczbe> added to their name.
46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
49 (define_expand "mov<mode>"
50 [(set (match_operand:VALL_F16
0 "nonimmediate_operand")
51 (match_operand:VALL_F16
1 "general_operand"))]
54 /* Force the operand into a register if it is not an
55 immediate whose use can be replaced with xzr.
56 If the mode is
16 bytes wide, then we will be doing
57 a stp in DI mode, so we check the validity of that.
58 If the mode is
8 bytes wide, then we will do doing a
59 normal str, so the check need not apply. */
60 if (GET_CODE (operands[
0]) == MEM
61 && !(aarch64_simd_imm_zero (operands[
1], <MODE>mode)
62 && ((known_eq (GET_MODE_SIZE (<MODE>mode),
16)
63 && aarch64_mem_pair_operand (operands[
0], DImode))
64 || known_eq (GET_MODE_SIZE (<MODE>mode),
8))))
65 operands[
1] = force_reg (<MODE>mode, operands[
1]);
67 /* If a constant is too complex to force to memory (e.g. because it
68 contains CONST_POLY_INTs), build it up from individual elements instead.
69 We should only need to do this before RA; aarch64_legitimate_constant_p
70 should ensure that we don't try to rematerialize the constant later. */
71 if (GET_CODE (operands[
1]) == CONST_VECTOR
72 && targetm.cannot_force_const_mem (<MODE>mode, operands[
1]))
74 aarch64_expand_vector_init (operands[
0], operands[
1]);
80 (define_expand "movmisalign<mode>"
81 [(set (match_operand:VALL_F16
0 "nonimmediate_operand")
82 (match_operand:VALL_F16
1 "general_operand"))]
83 "TARGET_FLOAT && !STRICT_ALIGNMENT"
85 /* This pattern is not permitted to fail during expansion: if both arguments
86 are non-registers (e.g. memory := constant, which can be created by the
87 auto-vectorizer), force operand
1 into a register. */
88 if (!register_operand (operands[
0], <MODE>mode)
89 && !register_operand (operands[
1], <MODE>mode))
90 operands[
1] = force_reg (<MODE>mode, operands[
1]);
93 (define_insn "aarch64_simd_dup<mode>"
94 [(set (match_operand:VDQ_I
0 "register_operand" "=w, w")
96 (match_operand:<VEL>
1 "register_operand" "w,?r")))]
99 dup
\\t%
0.<Vtype>, %
1.<Vetype>[
0]
100 dup
\\t%
0.<Vtype>, %<vwcore>
1"
101 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
104 (define_insn "aarch64_simd_dup<mode>"
105 [(set (match_operand:VDQF_F16
0 "register_operand" "=w,w")
106 (vec_duplicate:VDQF_F16
107 (match_operand:<VEL>
1 "register_operand" "w,r")))]
110 dup
\\t%
0.<Vtype>, %
1.<Vetype>[
0]
111 dup
\\t%
0.<Vtype>, %<vwcore>
1"
112 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
115 (define_insn "aarch64_dup_lane<mode>"
116 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
117 (vec_duplicate:VALL_F16
119 (match_operand:VALL_F16
1 "register_operand" "w")
120 (parallel [(match_operand:SI
2 "immediate_operand" "i")])
124 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
125 return "dup
\\t%
0.<Vtype>, %
1.<Vetype>[%
2]";
127 [(set_attr "type" "neon_dup<q>")]
130 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
131 [(set (match_operand:VALL_F16_NO_V2Q
0 "register_operand" "=w")
132 (vec_duplicate:VALL_F16_NO_V2Q
134 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "w")
135 (parallel [(match_operand:SI
2 "immediate_operand" "i")])
139 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
140 return "dup
\\t%
0.<Vtype>, %
1.<Vetype>[%
2]";
142 [(set_attr "type" "neon_dup<q>")]
145 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
146 [(set (match_operand:VDMOV
0 "nonimmediate_operand"
147 "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
148 (match_operand:VDMOV
1 "general_operand"
149 "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
151 && (register_operand (operands[
0], <MODE>mode)
152 || aarch64_simd_reg_or_zero (operands[
1], <MODE>mode))"
159 * return TARGET_SIMD ?
\"mov
\t%
0.<Vbtype>, %
1.<Vbtype>
\" :
\"fmov
\t%d0, %d1
\";
160 * return TARGET_SIMD ?
\"umov
\t%
0, %
1.d[
0]
\" :
\"fmov
\t%x0, %d1
\";
163 * return aarch64_output_simd_mov_immediate (operands[
1],
64);
165 [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
166 store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
167 mov_reg, neon_move<q>, f_mcr")
168 (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
171 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
172 [(set (match_operand:VQMOV
0 "nonimmediate_operand"
173 "=w, Umn, m, w, ?r, ?w, ?r, w, w")
174 (match_operand:VQMOV
1 "general_operand"
175 "m, Dz, w, w, w, r, r, Dn, Dz"))]
177 && (register_operand (operands[
0], <MODE>mode)
178 || aarch64_simd_reg_or_zero (operands[
1], <MODE>mode))"
183 mov
\t%
0.<Vbtype>, %
1.<Vbtype>
187 * return aarch64_output_simd_mov_immediate (operands[
1],
128);
189 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
190 neon_logic<q>, multiple, multiple,\
191 multiple, neon_move<q>, fmov")
192 (set_attr "length" "
4,
4,
4,
4,
8,
8,
8,
4,
4")
193 (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
196 ;; When storing lane zero we can use the normal STR and its more permissive
199 (define_insn "aarch64_store_lane0<mode>"
200 [(set (match_operand:<VEL>
0 "memory_operand" "=m")
201 (vec_select:<VEL> (match_operand:VALL_F16
1 "register_operand" "w")
202 (parallel [(match_operand
2 "const_int_operand" "n")])))]
204 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[
2])) ==
0"
205 "str
\\t%<Vetype>
1, %
0"
206 [(set_attr "type" "neon_store1_1reg<q>")]
209 (define_insn "load_pair<DREG:mode><DREG2:mode>"
210 [(set (match_operand:DREG
0 "register_operand" "=w,r")
211 (match_operand:DREG
1 "aarch64_mem_pair_operand" "Ump,Ump"))
212 (set (match_operand:DREG2
2 "register_operand" "=w,r")
213 (match_operand:DREG2
3 "memory_operand" "m,m"))]
215 && rtx_equal_p (XEXP (operands[
3],
0),
216 plus_constant (Pmode,
217 XEXP (operands[
1],
0),
218 GET_MODE_SIZE (<DREG:MODE>mode)))"
222 [(set_attr "type" "neon_ldp,load_16")]
225 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
226 [(set (match_operand:DREG
0 "aarch64_mem_pair_operand" "=Ump,Ump")
227 (match_operand:DREG
1 "register_operand" "w,r"))
228 (set (match_operand:DREG2
2 "memory_operand" "=m,m")
229 (match_operand:DREG2
3 "register_operand" "w,r"))]
231 && rtx_equal_p (XEXP (operands[
2],
0),
232 plus_constant (Pmode,
233 XEXP (operands[
0],
0),
234 GET_MODE_SIZE (<DREG:MODE>mode)))"
238 [(set_attr "type" "neon_stp,store_16")]
241 (define_insn "aarch64_simd_stp<mode>"
242 [(set (match_operand:VP_2E
0 "aarch64_mem_pair_lanes_operand" "=Umn,Umn")
243 (vec_duplicate:VP_2E (match_operand:<VEL>
1 "register_operand" "w,r")))]
246 stp
\\t%<Vetype>
1, %<Vetype>
1, %y0
247 stp
\\t%<vw>
1, %<vw>
1, %y0"
248 [(set_attr "type" "neon_stp, store_<ldpstp_vel_sz>")]
251 (define_insn "load_pair<VQ:mode><VQ2:mode>"
252 [(set (match_operand:VQ
0 "register_operand" "=w")
253 (match_operand:VQ
1 "aarch64_mem_pair_operand" "Ump"))
254 (set (match_operand:VQ2
2 "register_operand" "=w")
255 (match_operand:VQ2
3 "memory_operand" "m"))]
257 && rtx_equal_p (XEXP (operands[
3],
0),
258 plus_constant (Pmode,
259 XEXP (operands[
1],
0),
260 GET_MODE_SIZE (<VQ:MODE>mode)))"
261 "ldp
\\t%q0, %q2, %z1"
262 [(set_attr "type" "neon_ldp_q")]
265 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
266 [(set (match_operand:VQ
0 "aarch64_mem_pair_operand" "=Ump")
267 (match_operand:VQ
1 "register_operand" "w"))
268 (set (match_operand:VQ2
2 "memory_operand" "=m")
269 (match_operand:VQ2
3 "register_operand" "w"))]
271 && rtx_equal_p (XEXP (operands[
2],
0),
272 plus_constant (Pmode,
273 XEXP (operands[
0],
0),
274 GET_MODE_SIZE (<VQ:MODE>mode)))"
275 "stp
\\t%q1, %q3, %z0"
276 [(set_attr "type" "neon_stp_q")]
281 [(set (match_operand:VQMOV
0 "register_operand" "")
282 (match_operand:VQMOV
1 "register_operand" ""))]
285 && GP_REGNUM_P (REGNO (operands[
0]))
286 && GP_REGNUM_P (REGNO (operands[
1]))"
289 aarch64_simd_emit_reg_reg_move (operands, DImode,
2);
294 [(set (match_operand:VQMOV
0 "register_operand" "")
295 (match_operand:VQMOV
1 "register_operand" ""))]
298 && ((FP_REGNUM_P (REGNO (operands[
0])) && GP_REGNUM_P (REGNO (operands[
1])))
299 || (GP_REGNUM_P (REGNO (operands[
0])) && FP_REGNUM_P (REGNO (operands[
1]))))"
302 aarch64_split_simd_move (operands[
0], operands[
1]);
306 (define_expand "@aarch64_split_simd_mov<mode>"
307 [(set (match_operand:VQMOV
0)
308 (match_operand:VQMOV
1))]
311 rtx dst = operands[
0];
312 rtx src = operands[
1];
314 if (GP_REGNUM_P (REGNO (src)))
316 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
317 rtx src_high_part = gen_highpart (<VHALF>mode, src);
318 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
320 emit_move_insn (dst_low_part, src_low_part);
321 emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
326 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
327 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
328 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
329 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
330 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
331 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
337 (define_expand "aarch64_get_half<mode>"
338 [(set (match_operand:<VHALF>
0 "register_operand")
340 (match_operand:VQMOV
1 "register_operand")
341 (match_operand
2 "ascending_int_parallel")))]
344 if (vect_par_cnst_lo_half (operands[
2], <MODE>mode))
346 emit_move_insn (operands[
0], gen_lowpart (<VHALF>mode, operands[
1]));
352 (define_expand "aarch64_get_low<mode>"
353 [(match_operand:<VHALF>
0 "register_operand")
354 (match_operand:VQMOV
1 "register_operand")]
357 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
358 emit_insn (gen_aarch64_get_half<mode> (operands[
0], operands[
1], lo));
363 (define_expand "aarch64_get_high<mode>"
364 [(match_operand:<VHALF>
0 "register_operand")
365 (match_operand:VQMOV
1 "register_operand")]
368 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
369 emit_insn (gen_aarch64_get_half<mode> (operands[
0], operands[
1], hi));
374 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
375 [(set (match_operand:<VHALF>
0 "register_operand" "=w,?r")
377 (match_operand:VQMOV_NO2E
1 "register_operand" "w,w")
378 (match_operand:VQMOV_NO2E
2 "vect_par_cnst_lo_half" "")))]
383 "&& reload_completed && aarch64_simd_register (operands[
0], <VHALF>mode)"
384 [(set (match_dup
0) (match_dup
1))]
386 operands[
1] = aarch64_replace_reg_mode (operands[
1], <VHALF>mode);
388 [(set_attr "type" "mov_reg,neon_to_gp<q>")
389 (set_attr "length" "
4")]
392 (define_insn "aarch64_simd_mov_from_<mode>high"
393 [(set (match_operand:<VHALF>
0 "register_operand" "=w,?r,?r")
395 (match_operand:VQMOV_NO2E
1 "register_operand" "w,w,w")
396 (match_operand:VQMOV_NO2E
2 "vect_par_cnst_hi_half" "")))]
402 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc")
403 (set_attr "arch" "simd,simd,*")
404 (set_attr "length" "
4")]
407 (define_insn "orn<mode>
3<vczle><vczbe>"
408 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
409 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w"))
410 (match_operand:VDQ_I
2 "register_operand" "w")))]
412 "orn
\t%
0.<Vbtype>, %
2.<Vbtype>, %
1.<Vbtype>"
413 [(set_attr "type" "neon_logic<q>")]
416 (define_insn "bic<mode>
3<vczle><vczbe>"
417 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
418 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w"))
419 (match_operand:VDQ_I
2 "register_operand" "w")))]
421 "bic
\t%
0.<Vbtype>, %
2.<Vbtype>, %
1.<Vbtype>"
422 [(set_attr "type" "neon_logic<q>")]
425 (define_insn "add<mode>
3<vczle><vczbe>"
426 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
427 (plus:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
428 (match_operand:VDQ_I
2 "register_operand" "w")))]
430 "add
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
431 [(set_attr "type" "neon_add<q>")]
434 (define_insn "sub<mode>
3<vczle><vczbe>"
435 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
436 (minus:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
437 (match_operand:VDQ_I
2 "register_operand" "w")))]
439 "sub
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
440 [(set_attr "type" "neon_sub<q>")]
443 (define_insn "mul<mode>
3<vczle><vczbe>"
444 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
445 (mult:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "w")
446 (match_operand:VDQ_BHSI
2 "register_operand" "w")))]
448 "mul
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
449 [(set_attr "type" "neon_mul_<Vetype><q>")]
452 ;; Advanced SIMD does not support vector DImode MUL, but SVE does.
453 ;; Make use of the overlap between Z and V registers to implement the V2DI
454 ;; optab for TARGET_SVE. The mulvnx2di3 expander can
455 ;; handle the TARGET_SVE2 case transparently.
456 (define_expand "mulv2di3"
457 [(set (match_operand:V2DI
0 "register_operand")
458 (mult:V2DI (match_operand:V2DI
1 "register_operand")
459 (match_operand:V2DI
2 "aarch64_sve_vsm_operand")))]
462 machine_mode sve_mode = VNx2DImode;
463 rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[
0], V2DImode,
0);
464 rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[
1], V2DImode,
0);
465 rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[
2], V2DImode,
0);
467 emit_insn (gen_mulvnx2di3 (sve_op0, sve_op1, sve_op2));
472 (define_insn "bswap<mode>
2"
473 [(set (match_operand:VDQHSD
0 "register_operand" "=w")
474 (bswap:VDQHSD (match_operand:VDQHSD
1 "register_operand" "w")))]
476 "rev<Vrevsuff>
\\t%
0.<Vbtype>, %
1.<Vbtype>"
477 [(set_attr "type" "neon_rev<q>")]
480 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
481 [(set (match_operand:VB
0 "register_operand" "=w")
482 (unspec:VB [(match_operand:VB
1 "register_operand" "w")]
485 "rbit
\\t%
0.<Vbtype>, %
1.<Vbtype>"
486 [(set_attr "type" "neon_rbit")]
489 (define_expand "ctz<mode>
2"
490 [(set (match_operand:VS
0 "register_operand")
491 (ctz:VS (match_operand:VS
1 "register_operand")))]
494 emit_insn (gen_bswap<mode>
2 (operands[
0], operands[
1]));
495 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[
0],
497 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
498 emit_insn (gen_clz<mode>
2 (operands[
0], operands[
0]));
503 (define_expand "xorsign<mode>
3"
504 [(match_operand:VHSDF
0 "register_operand")
505 (match_operand:VHSDF
1 "register_operand")
506 (match_operand:VHSDF
2 "register_operand")]
510 machine_mode imode = <V_INT_EQUIV>mode;
511 rtx v_bitmask = gen_reg_rtx (imode);
512 rtx op1x = gen_reg_rtx (imode);
513 rtx op2x = gen_reg_rtx (imode);
515 rtx arg1 = lowpart_subreg (imode, operands[
1], <MODE>mode);
516 rtx arg2 = lowpart_subreg (imode, operands[
2], <MODE>mode);
518 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) -
1;
520 emit_move_insn (v_bitmask,
521 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
522 HOST_WIDE_INT_M1U << bits));
524 emit_insn (gen_and<v_int_equiv>
3 (op2x, v_bitmask, arg2));
525 emit_insn (gen_xor<v_int_equiv>
3 (op1x, arg1, op2x));
526 emit_move_insn (operands[
0],
527 lowpart_subreg (<MODE>mode, op1x, imode));
532 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
533 ;; fact that their usage need to guarantee that the source vectors are
534 ;; contiguous. It would be wrong to describe the operation without being able
535 ;; to describe the permute that is also required, but even if that is done
536 ;; the permute would have been created as a LOAD_LANES which means the values
537 ;; in the registers are in the wrong order.
538 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
539 [(set (match_operand:VHSDF
0 "register_operand" "=w")
540 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")
541 (match_operand:VHSDF
2 "register_operand" "w")]
544 "fcadd
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>, #<rot>"
545 [(set_attr "type" "neon_fcadd")]
548 (define_expand "cadd<rot><mode>
3"
549 [(set (match_operand:VHSDF
0 "register_operand")
550 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand")
551 (match_operand:VHSDF
2 "register_operand")]
553 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
556 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
557 [(set (match_operand:VHSDF
0 "register_operand" "=w")
558 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF
2 "register_operand" "w")
559 (match_operand:VHSDF
3 "register_operand" "w")]
561 (match_operand:VHSDF
1 "register_operand" "
0")))]
563 "fcmla
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vtype>, #<rot>"
564 [(set_attr "type" "neon_fcmla")]
568 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
569 [(set (match_operand:VHSDF
0 "register_operand" "=w")
570 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF
2 "register_operand" "w")
571 (match_operand:VHSDF
3 "register_operand" "w")
572 (match_operand:SI
4 "const_int_operand" "n")]
574 (match_operand:VHSDF
1 "register_operand" "
0")))]
577 operands[
4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[
4]));
578 return "fcmla
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<FCMLA_maybe_lane>, #<rot>";
580 [(set_attr "type" "neon_fcmla")]
583 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
584 [(set (match_operand:V4HF
0 "register_operand" "=w")
585 (plus:V4HF (unspec:V4HF [(match_operand:V4HF
2 "register_operand" "w")
586 (match_operand:V8HF
3 "register_operand" "w")
587 (match_operand:SI
4 "const_int_operand" "n")]
589 (match_operand:V4HF
1 "register_operand" "
0")))]
592 operands[
4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
593 return "fcmla
\t%
0.4h, %
2.4h, %
3.h[%
4], #<rot>";
595 [(set_attr "type" "neon_fcmla")]
598 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
599 [(set (match_operand:VQ_HSF
0 "register_operand" "=w")
600 (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF
2 "register_operand" "w")
601 (match_operand:<VHALF>
3 "register_operand" "w")
602 (match_operand:SI
4 "const_int_operand" "n")]
604 (match_operand:VQ_HSF
1 "register_operand" "
0")))]
607 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
609 = gen_int_mode (ENDIAN_LANE_N (nunits /
2, INTVAL (operands[
4])), SImode);
610 return "fcmla
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<FCMLA_maybe_lane>, #<rot>";
612 [(set_attr "type" "neon_fcmla")]
615 ;; The complex mla/mls operations always need to expand to two instructions.
616 ;; The first operation does half the computation and the second does the
617 ;; remainder. Because of this, expand early.
618 (define_expand "cml<fcmac1><conj_op><mode>
4"
619 [(set (match_operand:VHSDF
0 "register_operand")
620 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand")
621 (match_operand:VHSDF
2 "register_operand")]
623 (match_operand:VHSDF
3 "register_operand")))]
624 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
626 rtx tmp = gen_reg_rtx (<MODE>mode);
627 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[
3],
628 operands[
2], operands[
1]));
629 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[
0], tmp,
630 operands[
2], operands[
1]));
634 ;; The complex mul operations always need to expand to two instructions.
635 ;; The first operation does half the computation and the second does the
636 ;; remainder. Because of this, expand early.
637 (define_expand "cmul<conj_op><mode>
3"
638 [(set (match_operand:VHSDF
0 "register_operand")
639 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand")
640 (match_operand:VHSDF
2 "register_operand")]
642 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
644 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
645 rtx res1 = gen_reg_rtx (<MODE>mode);
646 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
647 operands[
2], operands[
1]));
648 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[
0], res1,
649 operands[
2], operands[
1]));
653 ;; These expands map to the Dot Product optab the vectorizer checks for
654 ;; and to the intrinsics patttern.
655 ;; The auto-vectorizer expects a dot product builtin that also does an
656 ;; accumulation into the provided register.
657 ;; Given the following pattern
659 ;; for (i=
0; i<len; i++) {
665 ;; This can be auto-vectorized to
666 ;; r = a[
0]*b[
0] + a[
1]*b[
1] + a[
2]*b[
2] + a[
3]*b[
3];
668 ;; given enough iterations. However the vectorizer can keep unrolling the loop
669 ;; r += a[
4]*b[
4] + a[
5]*b[
5] + a[
6]*b[
6] + a[
7]*b[
7];
670 ;; r += a[
8]*b[
8] + a[
9]*b[
9] + a[
10]*b[
10] + a[
11]*b[
11];
673 ;; and so the vectorizer provides r, in which the result has to be accumulated.
674 (define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
675 [(set (match_operand:VS
0 "register_operand" "=w")
677 (unspec:VS [(match_operand:<VSI2QI>
1 "register_operand" "w")
678 (match_operand:<VSI2QI>
2 "register_operand" "w")]
680 (match_operand:VS
3 "register_operand" "
0")))]
682 "<sur>dot
\\t%
0.<Vtype>, %
1.<Vdottype>, %
2.<Vdottype>"
683 [(set_attr "type" "neon_dot<q>")]
686 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
687 ;; (vector) Dot Product operation and the vectorized optab.
688 (define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
689 [(set (match_operand:VS
0 "register_operand" "=w")
691 (unspec:VS [(match_operand:<VSI2QI>
1 "register_operand" "w")
692 (match_operand:<VSI2QI>
2 "register_operand" "w")]
694 (match_operand:VS
3 "register_operand" "
0")))]
696 "usdot
\\t%
0.<Vtype>, %
1.<Vdottype>, %
2.<Vdottype>"
697 [(set_attr "type" "neon_dot<q>")]
700 ;; These instructions map to the __builtins for the Dot Product
701 ;; indexed operations.
702 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
703 [(set (match_operand:VS
0 "register_operand" "=w")
705 (unspec:VS [(match_operand:<VSI2QI>
2 "register_operand" "w")
706 (match_operand:V8QI
3 "register_operand" "<h_con>")
707 (match_operand:SI
4 "immediate_operand" "i")]
709 (match_operand:VS
1 "register_operand" "
0")))]
712 operands[
4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[
4]));
713 return "<sur>dot
\\t%
0.<Vtype>, %
2.<Vdottype>, %
3.4b[%
4]";
715 [(set_attr "type" "neon_dot<q>")]
718 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
719 [(set (match_operand:VS
0 "register_operand" "=w")
721 (unspec:VS [(match_operand:<VSI2QI>
2 "register_operand" "w")
722 (match_operand:V16QI
3 "register_operand" "<h_con>")
723 (match_operand:SI
4 "immediate_operand" "i")]
725 (match_operand:VS
1 "register_operand" "
0")))]
728 operands[
4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[
4]));
729 return "<sur>dot
\\t%
0.<Vtype>, %
2.<Vdottype>, %
3.4b[%
4]";
731 [(set_attr "type" "neon_dot<q>")]
734 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
735 ;; (by element) Dot Product operations.
736 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
737 [(set (match_operand:VS
0 "register_operand" "=w")
739 (unspec:VS [(match_operand:<VS:VSI2QI>
2 "register_operand" "w")
740 (match_operand:VB
3 "register_operand" "w")
741 (match_operand:SI
4 "immediate_operand" "i")]
743 (match_operand:VS
1 "register_operand" "
0")))]
746 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
747 int lane = INTVAL (operands[
4]);
748 operands[
4] = gen_int_mode (ENDIAN_LANE_N (nunits /
4, lane), SImode);
749 return "<DOTPROD_I8MM:sur>dot
\\t%
0.<VS:Vtype>, %
2.<VS:Vdottype>, %
3.4b[%
4]";
751 [(set_attr "type" "neon_dot<VS:q>")]
754 (define_expand "copysign<mode>
3"
755 [(match_operand:VHSDF
0 "register_operand")
756 (match_operand:VHSDF
1 "register_operand")
757 (match_operand:VHSDF
2 "register_operand")]
760 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
761 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) -
1;
763 emit_move_insn (v_bitmask,
764 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
765 HOST_WIDE_INT_M1U << bits));
766 emit_insn (gen_aarch64_simd_bsl<mode> (operands[
0], v_bitmask,
767 operands[
2], operands[
1]));
772 (define_insn "mul_lane<mode>
3"
773 [(set (match_operand:VMULD
0 "register_operand" "=w")
777 (match_operand:<VCOND>
2 "register_operand" "<h_con>")
778 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
779 (match_operand:VMULD
1 "register_operand" "w")))]
782 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
783 return "<f>mul
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[%
3]";
785 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
788 (define_insn "mul_laneq<mode>
3"
789 [(set (match_operand:VMUL
0 "register_operand" "=w")
793 (match_operand:<VCONQ>
2 "register_operand" "<h_con>")
794 (parallel [(match_operand:SI
3 "immediate_operand")])))
795 (match_operand:VMUL
1 "register_operand" "w")))]
798 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
799 return "<f>mul
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[%
3]";
801 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
804 (define_insn "mul_n<mode>
3"
805 [(set (match_operand:VMUL
0 "register_operand" "=w")
808 (match_operand:<VEL>
2 "register_operand" "<h_con>"))
809 (match_operand:VMUL
1 "register_operand" "w")))]
811 "<f>mul
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[
0]";
812 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
815 (define_insn "@aarch64_rsqrte<mode>"
816 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
817 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF
1 "register_operand" "w")]
820 "frsqrte
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>"
821 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
823 (define_insn "@aarch64_rsqrts<mode>"
824 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
825 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF
1 "register_operand" "w")
826 (match_operand:VHSDF_HSDF
2 "register_operand" "w")]
829 "frsqrts
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
830 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
832 (define_expand "rsqrt<mode>
2"
833 [(set (match_operand:VALLF
0 "register_operand")
834 (unspec:VALLF [(match_operand:VALLF
1 "register_operand")]
838 aarch64_emit_approx_sqrt (operands[
0], operands[
1], true);
842 (define_insn "aarch64_ursqrte<mode>"
843 [(set (match_operand:VDQ_SI
0 "register_operand" "=w")
844 (unspec:VDQ_SI [(match_operand:VDQ_SI
1 "register_operand" "w")]
847 "ursqrte
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>"
848 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
850 (define_insn "*aarch64_mul3_elt_to_64v2df"
851 [(set (match_operand:DF
0 "register_operand" "=w")
854 (match_operand:V2DF
1 "register_operand" "w")
855 (parallel [(match_operand:SI
2 "immediate_operand")]))
856 (match_operand:DF
3 "register_operand" "w")))]
859 operands[
2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[
2]));
860 return "fmul
\\t%
0.2d, %
3.2d, %
1.d[%
2]";
862 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
865 (define_insn "neg<mode>
2<vczle><vczbe>"
866 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
867 (neg:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")))]
869 "neg
\t%
0.<Vtype>, %
1.<Vtype>"
870 [(set_attr "type" "neon_neg<q>")]
873 (define_insn "abs<mode>
2<vczle><vczbe>"
874 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
875 (abs:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")))]
877 "abs
\t%
0.<Vtype>, %
1.<Vtype>"
878 [(set_attr "type" "neon_abs<q>")]
881 ;; The intrinsic version of integer ABS must not be allowed to
882 ;; combine with any operation with an integrated ABS step, such
884 (define_insn "aarch64_abs<mode><vczle><vczbe>"
885 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
887 [(match_operand:VSDQ_I_DI
1 "register_operand" "w")]
890 "abs
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>"
891 [(set_attr "type" "neon_abs<q>")]
894 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
895 ;; This isn't accurate as ABS treats always its input as a signed value.
896 ;; So (ABS:QI (minus:QI
64 -
128)) == (ABS:QI (
192 or -
64 signed)) ==
64.
897 ;; Whereas SABD would return
192 (-
64 signed) on the above example.
898 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
899 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
900 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
903 (match_operand:VDQ_BHSI
1 "register_operand" "w")
904 (match_operand:VDQ_BHSI
2 "register_operand" "w"))
909 "<su>abd
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
910 [(set_attr "type" "neon_abd<q>")]
913 (define_expand "<su>abd<mode>
3"
914 [(match_operand:VDQ_BHSI
0 "register_operand")
916 (match_operand:VDQ_BHSI
1 "register_operand")
917 (match_operand:VDQ_BHSI
2 "register_operand"))]
920 emit_insn (gen_aarch64_<su>abd<mode> (operands[
0], operands[
1], operands[
2]));
925 (define_insn "aarch64_<su>abdl<mode>"
926 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
930 (match_operand:VD_BHSI
1 "register_operand" "w")
931 (match_operand:VD_BHSI
2 "register_operand" "w"))
936 "<su>abdl
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
937 [(set_attr "type" "neon_abd<q>")]
940 (define_insn "aarch64_<su>abdl2<mode>_insn"
941 [(set (match_operand:<VDBLW>
0 "register_operand" "=w")
946 (match_operand:VQW
1 "register_operand" "w")
947 (match_operand:VQW
3 "vect_par_cnst_hi_half" ""))
949 (match_operand:VQW
2 "register_operand" "w")
960 "<su>abdl2
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
961 [(set_attr "type" "neon_abd<q>")]
964 (define_expand "aarch64_<su>abdl2<mode>"
965 [(match_operand:<VDBLW>
0 "register_operand")
967 (match_operand:VQW
1 "register_operand")
968 (match_operand:VQW
2 "register_operand"))]
971 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
972 emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[
0], operands[
1],
978 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
979 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
984 (match_operand:VQW
1 "register_operand" "w")
985 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
988 (match_operand:VQW
2 "register_operand" "w")
991 "<su>abdl2
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
992 [(set_attr "type" "neon_abd_long")]
995 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
996 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1001 (match_operand:VQW
1 "register_operand" "w")
1002 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
1005 (match_operand:VQW
2 "register_operand" "w")
1008 "<su>abdl
\t%
0.<Vwtype>, %
1.<Vhalftype>, %
2.<Vhalftype>"
1009 [(set_attr "type" "neon_abd_long")]
1012 (define_expand "vec_widen_<su>abd_hi_<mode>"
1013 [(match_operand:<VWIDE>
0 "register_operand")
1014 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))
1015 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand"))]
1018 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1019 emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[
0], operands[
1],
1025 (define_expand "vec_widen_<su>abd_lo_<mode>"
1026 [(match_operand:<VWIDE>
0 "register_operand")
1027 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))
1028 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand"))]
1031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1032 emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[
0], operands[
1],
1038 (define_insn "aarch64_<su>abal<mode>"
1039 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1041 (zero_extend:<VWIDE>
1044 (match_operand:VD_BHSI
2 "register_operand" "w")
1045 (match_operand:VD_BHSI
3 "register_operand" "w"))
1049 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
1051 "<su>abal
\t%
0.<Vwtype>, %
2.<Vtype>, %
3.<Vtype>"
1052 [(set_attr "type" "neon_arith_acc<q>")]
1055 (define_insn "aarch64_<su>abal2<mode>_insn"
1056 [(set (match_operand:<VDBLW>
0 "register_operand" "=w")
1058 (zero_extend:<VDBLW>
1062 (match_operand:VQW
2 "register_operand" "w")
1063 (match_operand:VQW
4 "vect_par_cnst_hi_half" ""))
1065 (match_operand:VQW
3 "register_operand" "w")
1074 (match_operand:<VDBLW>
1 "register_operand" "
0")))]
1076 "<su>abal2
\t%
0.<Vwtype>, %
2.<Vtype>, %
3.<Vtype>"
1077 [(set_attr "type" "neon_arith_acc<q>")]
1080 (define_expand "aarch64_<su>abal2<mode>"
1081 [(match_operand:<VDBLW>
0 "register_operand")
1082 (match_operand:<VDBLW>
1 "register_operand")
1084 (match_operand:VQW
2 "register_operand")
1085 (match_operand:VQW
3 "register_operand"))]
1088 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1089 emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[
0], operands[
1],
1090 operands[
2], operands[
3], hi));
1095 (define_expand "aarch64_<su>adalp<mode>"
1096 [(set (match_operand:<VDBLW>
0 "register_operand")
1100 (ANY_EXTEND:<V2XWIDE>
1101 (match_operand:VDQV_L
2 "register_operand"))
1103 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup
2))
1105 (match_operand:<VDBLW>
1 "register_operand")))]
1108 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () /
2;
1109 operands[
3] = aarch64_gen_stepped_int_parallel (nunits,
0,
2);
1110 operands[
4] = aarch64_gen_stepped_int_parallel (nunits,
1,
2);
1114 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1115 [(set (match_operand:<VDBLW>
0 "register_operand" "=w")
1119 (ANY_EXTEND:<V2XWIDE>
1120 (match_operand:VDQV_L
2 "register_operand" "w"))
1121 (match_operand:<V2XWIDE>
3 "vect_par_cnst_even_or_odd_half" ""))
1122 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup
2))
1123 (match_operand:<V2XWIDE>
4 "vect_par_cnst_even_or_odd_half" "")))
1124 (match_operand:<VDBLW>
1 "register_operand" "
0")))]
1126 && !rtx_equal_p (operands[
3], operands[
4])"
1127 "<su>adalp
\t%
0.<Vwhalf>, %
2.<Vtype>"
1128 [(set_attr "type" "neon_reduc_add<q>")]
1131 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1132 ;; inputs in operands
1 and
2. The sequence also has to perform a widening
1133 ;; reduction of the difference into a V4SI vector and accumulate that into
1134 ;; operand
3 before copying that into the result operand
0.
1135 ;; Perform that with a sequence of:
1136 ;; UABDL2 tmp
.8h, op1.16b, op2.16b
1137 ;; UABAL tmp
.8h, op1.8b, op2.8b
1138 ;; UADALP op3.4s, tmp
.8h
1139 ;; MOV op0, op3 // should be eliminated in later passes.
1141 ;; For TARGET_DOTPROD we do:
1142 ;; MOV tmp1.16b, #
1 // Can be CSE'd and hoisted out of loops.
1143 ;; UABD tmp2.16b, op1.16b, op2.16b
1144 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1145 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
1147 ;; The signed version just uses the signed variants of the above instructions
1148 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1151 (define_expand "<su>sadv16qi"
1152 [(use (match_operand:V4SI
0 "register_operand"))
1153 (USMAX:V16QI (match_operand:V16QI
1 "register_operand")
1154 (match_operand:V16QI
2 "register_operand"))
1155 (use (match_operand:V4SI
3 "register_operand"))]
1160 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1161 rtx abd = gen_reg_rtx (V16QImode);
1162 emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[
1], operands[
2]));
1163 emit_insn (gen_udot_prodv16qi (operands[
0], abd, ones, operands[
3]));
1166 rtx reduc = gen_reg_rtx (V8HImode);
1167 emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[
1],
1169 emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1170 gen_lowpart (V8QImode, operands[
1]),
1171 gen_lowpart (V8QImode,
1173 emit_insn (gen_aarch64_<su>adalpv8hi (operands[
3], operands[
3], reduc));
1174 emit_move_insn (operands[
0], operands[
3]);
1179 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1180 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1181 (plus:VDQ_BHSI (minus:VDQ_BHSI
1183 (match_operand:VDQ_BHSI
2 "register_operand" "w")
1184 (match_operand:VDQ_BHSI
3 "register_operand" "w"))
1188 (match_operand:VDQ_BHSI
1 "register_operand" "
0")))]
1190 "<su>aba
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vtype>"
1191 [(set_attr "type" "neon_arith_acc<q>")]
1194 (define_insn "fabd<mode>
3<vczle><vczbe>"
1195 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
1198 (match_operand:VHSDF_HSDF
1 "register_operand" "w")
1199 (match_operand:VHSDF_HSDF
2 "register_operand" "w"))))]
1201 "fabd
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
1202 [(set_attr "type" "neon_fp_abd_<stype><q>")]
1205 ;; For AND (vector, register) and BIC (vector, immediate)
1206 (define_insn "and<mode>
3<vczle><vczbe>"
1207 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w")
1208 (and:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w,
0")
1209 (match_operand:VDQ_I
2 "aarch64_reg_or_bic_imm" "w,Db")))]
1212 and
\t%
0.<Vbtype>, %
1.<Vbtype>, %
2.<Vbtype>
1213 * return aarch64_output_simd_mov_immediate (operands[
2], <bitsize>,\
1214 AARCH64_CHECK_BIC);"
1215 [(set_attr "type" "neon_logic<q>")]
1218 ;; For ORR (vector, register) and ORR (vector, immediate)
1219 (define_insn "ior<mode>
3<vczle><vczbe>"
1220 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w")
1221 (ior:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w,
0")
1222 (match_operand:VDQ_I
2 "aarch64_reg_or_orr_imm" "w,Do")))]
1225 orr
\t%
0.<Vbtype>, %
1.<Vbtype>, %
2.<Vbtype>
1226 * return aarch64_output_simd_mov_immediate (operands[
2], <bitsize>,\
1227 AARCH64_CHECK_ORR);"
1228 [(set_attr "type" "neon_logic<q>")]
1231 (define_insn "xor<mode>
3<vczle><vczbe>"
1232 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
1233 (xor:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
1234 (match_operand:VDQ_I
2 "register_operand" "w")))]
1236 "eor
\t%
0.<Vbtype>, %
1.<Vbtype>, %
2.<Vbtype>"
1237 [(set_attr "type" "neon_logic<q>")]
1240 (define_insn "one_cmpl<mode>
2<vczle><vczbe>"
1241 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
1242 (not:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")))]
1244 "not
\t%
0.<Vbtype>, %
1.<Vbtype>"
1245 [(set_attr "type" "neon_logic<q>")]
1248 (define_insn "aarch64_simd_vec_set<mode>"
1249 [(set (match_operand:VALL_F16
0 "register_operand" "=w,w,w")
1251 (vec_duplicate:VALL_F16
1252 (match_operand:<VEL>
1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1253 (match_operand:VALL_F16
3 "register_operand" "
0,
0,
0")
1254 (match_operand:SI
2 "immediate_operand" "i,i,i")))]
1255 "TARGET_SIMD && exact_log2 (INTVAL (operands[
2])) >=
0"
1257 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[
2])));
1258 operands[
2] = GEN_INT ((HOST_WIDE_INT)
1 << elt);
1259 switch (which_alternative)
1262 return "ins
\\t%
0.<Vetype>[%p2], %
1.<Vetype>[
0]";
1264 return "ins
\\t%
0.<Vetype>[%p2], %<vwcore>
1";
1266 return "ld1
\\t{%
0.<Vetype>}[%p2], %
1";
1271 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1274 (define_insn "aarch64_simd_vec_set_zero<mode>"
1275 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
1277 (match_operand:VALL_F16
1 "aarch64_simd_imm_zero" "")
1278 (match_operand:VALL_F16
3 "register_operand" "
0")
1279 (match_operand:SI
2 "immediate_operand" "i")))]
1280 "TARGET_SIMD && exact_log2 (INTVAL (operands[
2])) >=
0"
1282 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[
2])));
1283 operands[
2] = GEN_INT ((HOST_WIDE_INT)
1 << elt);
1284 return "ins
\\t%
0.<Vetype>[%p2], <vwcore>zr";
1288 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1289 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
1291 (vec_duplicate:VALL_F16
1293 (match_operand:VALL_F16
3 "register_operand" "w")
1295 [(match_operand:SI
4 "immediate_operand" "i")])))
1296 (match_operand:VALL_F16
1 "register_operand" "
0")
1297 (match_operand:SI
2 "immediate_operand" "i")))]
1298 "TARGET_SIMD && exact_log2 (INTVAL (operands[
2])) >=
0"
1300 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[
2])));
1301 operands[
2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1302 operands[
4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
4]));
1304 return "ins
\t%
0.<Vetype>[%p2], %
3.<Vetype>[%
4]";
1306 [(set_attr "type" "neon_ins<q>")]
1309 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1310 [(set (match_operand:VALL_F16_NO_V2Q
0 "register_operand" "=w")
1311 (vec_merge:VALL_F16_NO_V2Q
1312 (vec_duplicate:VALL_F16_NO_V2Q
1314 (match_operand:<VSWAP_WIDTH>
3 "register_operand" "w")
1316 [(match_operand:SI
4 "immediate_operand" "i")])))
1317 (match_operand:VALL_F16_NO_V2Q
1 "register_operand" "
0")
1318 (match_operand:SI
2 "immediate_operand" "i")))]
1319 "TARGET_SIMD && exact_log2 (INTVAL (operands[
2])) >=
0"
1321 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[
2])));
1322 operands[
2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1323 operands[
4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1324 INTVAL (operands[
4]));
1326 return "ins
\t%
0.<Vetype>[%p2], %
3.<Vetype>[%
4]";
1328 [(set_attr "type" "neon_ins<q>")]
1331 (define_expand "signbit<mode>
2"
1332 [(use (match_operand:<V_INT_EQUIV>
0 "register_operand"))
1333 (use (match_operand:VDQSF
1 "register_operand"))]
1336 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) -
1;
1337 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1339 operands[
1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[
1], <MODE>mode);
1341 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[
0], operands[
1],
1346 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1347 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
1348 (lshiftrt:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
1349 (match_operand:VDQ_I
2 "aarch64_simd_rshift_imm" "Dr")))]
1351 "ushr
\t%
0.<Vtype>, %
1.<Vtype>, %
2"
1352 [(set_attr "type" "neon_shift_imm<q>")]
1355 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1356 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w")
1357 (ashiftrt:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w,w")
1358 (match_operand:VDQ_I
2 "aarch64_simd_rshift_imm" "D1,Dr")))]
1361 cmlt
\t%
0.<Vtype>, %
1.<Vtype>, #
0
1362 sshr
\t%
0.<Vtype>, %
1.<Vtype>, %
2"
1363 [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
1366 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1367 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
1370 (match_operand:VDQ_I
2 "register_operand" "w")
1371 (match_operand:VDQ_I
3 "aarch64_simd_rshift_imm"))
1372 (match_operand:VDQ_I
1 "register_operand" "
0")))]
1374 "<sra_op>sra
\t%<v>
0<Vmtype>, %<v>
2<Vmtype>, %
3"
1375 [(set_attr "type" "neon_shift_acc<q>")]
1378 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1379 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
1384 (<SHIFTEXTEND>:<V2XWIDE>
1385 (match_operand:VSDQ_I_DI
2 "register_operand" "w"))
1386 (match_operand:<V2XWIDE>
4 "aarch64_int_rnd_operand"))
1387 (match_operand:VSDQ_I_DI
3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1388 (match_operand:VSDQ_I_DI
1 "register_operand" "
0")))]
1390 && aarch64_const_vec_rnd_cst_p (operands[
4], operands[
3])"
1391 "<sra_op>rsra
\\t%<v>
0<Vmtype>, %<v>
2<Vmtype>, %
3"
1392 [(set_attr "type" "neon_shift_acc<q>")]
1395 (define_expand "aarch64_<sra_op>sra_n<mode>"
1396 [(set (match_operand:VDQ_I
0 "register_operand")
1399 (match_operand:VDQ_I
2 "register_operand")
1400 (match_operand:SI
3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1401 (match_operand:VDQ_I
1 "register_operand")))]
1405 = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[
3]));
1409 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1410 [(match_operand:VSDQ_I_DI
0 "register_operand")
1411 (match_operand:VSDQ_I_DI
1 "register_operand")
1413 (match_operand:VSDQ_I_DI
2 "register_operand")
1414 (match_operand:SI
3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1417 /* Use this expander to create the rounding constant vector, which is
1418 1 << (shift -
1). Use wide_int here to ensure that the right TImode
1419 RTL is generated when handling the DImode expanders. */
1420 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1421 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[
3]) -
1, prec);
1422 rtx shft = gen_int_mode (INTVAL (operands[
3]), DImode);
1423 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1424 if (VECTOR_MODE_P (<MODE>mode))
1426 shft = gen_const_vec_duplicate (<MODE>mode, shft);
1427 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1430 emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[
0], operands[
1],
1431 operands[
2], shft, rnd));
1436 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1437 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
1438 (ashift:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
1439 (match_operand:VDQ_I
2 "aarch64_simd_lshift_imm" "Dl")))]
1441 "shl
\t%
0.<Vtype>, %
1.<Vtype>, %
2"
1442 [(set_attr "type" "neon_shift_imm<q>")]
1445 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1446 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
1447 (ashift:VDQ_I (match_operand:VDQ_I
1 "register_operand" "w")
1448 (match_operand:VDQ_I
2 "register_operand" "w")))]
1450 "sshl
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1451 [(set_attr "type" "neon_shift_reg<q>")]
1454 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1455 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
1456 (unspec:VDQ_I [(match_operand:VDQ_I
1 "register_operand" "w")
1457 (match_operand:VDQ_I
2 "register_operand" "w")]
1458 UNSPEC_ASHIFT_UNSIGNED))]
1460 "ushl
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1461 [(set_attr "type" "neon_shift_reg<q>")]
1464 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1465 [(set (match_operand:VDQ_I
0 "register_operand" "=w")
1466 (unspec:VDQ_I [(match_operand:VDQ_I
1 "register_operand" "w")
1467 (match_operand:VDQ_I
2 "register_operand" "w")]
1468 UNSPEC_ASHIFT_SIGNED))]
1470 "sshl
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1471 [(set_attr "type" "neon_shift_reg<q>")]
1474 (define_expand "ashl<mode>
3"
1475 [(match_operand:VDQ_I
0 "register_operand")
1476 (match_operand:VDQ_I
1 "register_operand")
1477 (match_operand:SI
2 "general_operand")]
1480 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1483 if (CONST_INT_P (operands[
2]))
1485 shift_amount = INTVAL (operands[
2]);
1486 if (shift_amount >=
0 && shift_amount < bit_width)
1488 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1490 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[
0],
1497 operands[
2] = force_reg (SImode, operands[
2]);
1499 rtx tmp = gen_reg_rtx (<MODE>mode);
1500 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1503 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[
0], operands[
1], tmp));
1507 (define_expand "lshr<mode>
3"
1508 [(match_operand:VDQ_I
0 "register_operand")
1509 (match_operand:VDQ_I
1 "register_operand")
1510 (match_operand:SI
2 "general_operand")]
1513 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1516 if (CONST_INT_P (operands[
2]))
1518 shift_amount = INTVAL (operands[
2]);
1519 if (shift_amount >
0 && shift_amount <= bit_width)
1521 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1523 emit_insn (gen_aarch64_simd_lshr<mode> (operands[
0],
1530 operands[
2] = force_reg (SImode, operands[
2]);
1532 rtx tmp = gen_reg_rtx (SImode);
1533 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1534 emit_insn (gen_negsi2 (tmp, operands[
2]));
1535 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1536 convert_to_mode (<VEL>mode, tmp,
0)));
1537 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[
0], operands[
1],
1542 (define_expand "ashr<mode>
3"
1543 [(match_operand:VDQ_I
0 "register_operand")
1544 (match_operand:VDQ_I
1 "register_operand")
1545 (match_operand:SI
2 "general_operand")]
1548 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1551 if (CONST_INT_P (operands[
2]))
1553 shift_amount = INTVAL (operands[
2]);
1554 if (shift_amount >
0 && shift_amount <= bit_width)
1556 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1558 emit_insn (gen_aarch64_simd_ashr<mode> (operands[
0],
1565 operands[
2] = force_reg (SImode, operands[
2]);
1567 rtx tmp = gen_reg_rtx (SImode);
1568 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1569 emit_insn (gen_negsi2 (tmp, operands[
2]));
1570 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1572 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[
0], operands[
1],
1577 (define_expand "vashl<mode>
3"
1578 [(match_operand:VDQ_I
0 "register_operand")
1579 (match_operand:VDQ_I
1 "register_operand")
1580 (match_operand:VDQ_I
2 "register_operand")]
1583 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[
0], operands[
1],
1588 (define_expand "vashr<mode>
3"
1589 [(match_operand:VDQ_I
0 "register_operand")
1590 (match_operand:VDQ_I
1 "register_operand")
1591 (match_operand:VDQ_I
2 "register_operand")]
1594 rtx neg = gen_reg_rtx (<MODE>mode);
1595 emit (gen_neg<mode>
2 (neg, operands[
2]));
1596 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[
0], operands[
1],
1602 (define_expand "aarch64_ashr_simddi"
1603 [(match_operand:DI
0 "register_operand")
1604 (match_operand:DI
1 "register_operand")
1605 (match_operand:SI
2 "aarch64_shift_imm64_di")]
1608 /* An arithmetic shift right by
64 fills the result with copies of the sign
1609 bit, just like asr by
63 - however the standard pattern does not handle
1611 if (INTVAL (operands[
2]) ==
64)
1612 operands[
2] = GEN_INT (
63);
1613 emit_insn (gen_ashrdi3 (operands[
0], operands[
1], operands[
2]));
1618 (define_expand "vlshr<mode>
3"
1619 [(match_operand:VDQ_I
0 "register_operand")
1620 (match_operand:VDQ_I
1 "register_operand")
1621 (match_operand:VDQ_I
2 "register_operand")]
1624 rtx neg = gen_reg_rtx (<MODE>mode);
1625 emit (gen_neg<mode>
2 (neg, operands[
2]));
1626 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[
0], operands[
1],
1631 (define_expand "aarch64_lshr_simddi"
1632 [(match_operand:DI
0 "register_operand")
1633 (match_operand:DI
1 "register_operand")
1634 (match_operand:SI
2 "aarch64_shift_imm64_di")]
1637 if (INTVAL (operands[
2]) ==
64)
1638 emit_move_insn (operands[
0], const0_rtx);
1640 emit_insn (gen_lshrdi3 (operands[
0], operands[
1], operands[
2]));
1645 ;; For
64-bit modes we use ushl/r, as this does not require a SIMD zero.
1646 (define_insn "vec_shr_<mode><vczle><vczbe>"
1647 [(set (match_operand:VD
0 "register_operand" "=w")
1648 (unspec:VD [(match_operand:VD
1 "register_operand" "w")
1649 (match_operand:SI
2 "immediate_operand" "i")]
1653 if (BYTES_BIG_ENDIAN)
1654 return "shl %d0, %d1, %
2";
1656 return "ushr %d0, %d1, %
2";
1658 [(set_attr "type" "neon_shift_imm")]
1661 (define_expand "vec_set<mode>"
1662 [(match_operand:VALL_F16
0 "register_operand")
1663 (match_operand:<VEL>
1 "aarch64_simd_nonimmediate_operand")
1664 (match_operand:SI
2 "immediate_operand")]
1667 HOST_WIDE_INT elem = (HOST_WIDE_INT)
1 << INTVAL (operands[
2]);
1668 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[
0], operands[
1],
1669 GEN_INT (elem), operands[
0]));
1675 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1676 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1677 (plus:VDQ_BHSI (mult:VDQ_BHSI
1678 (match_operand:VDQ_BHSI
2 "register_operand" "w")
1679 (match_operand:VDQ_BHSI
3 "register_operand" "w"))
1680 (match_operand:VDQ_BHSI
1 "register_operand" "
0")))]
1682 "mla
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vtype>"
1683 [(set_attr "type" "neon_mla_<Vetype><q>")]
1686 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1687 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1690 (vec_duplicate:VDQHS
1692 (match_operand:VDQHS
1 "register_operand" "<h_con>")
1693 (parallel [(match_operand:SI
2 "immediate_operand")])))
1694 (match_operand:VDQHS
3 "register_operand" "w"))
1695 (match_operand:VDQHS
4 "register_operand" "
0")))]
1698 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
1699 return "mla
\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
1701 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1704 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1705 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1708 (vec_duplicate:VDQHS
1710 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
1711 (parallel [(match_operand:SI
2 "immediate_operand")])))
1712 (match_operand:VDQHS
3 "register_operand" "w"))
1713 (match_operand:VDQHS
4 "register_operand" "
0")))]
1716 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
1717 return "mla
\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
1719 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1722 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1723 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1726 (vec_duplicate:VDQHS
1727 (match_operand:<VEL>
3 "register_operand" "<h_con>"))
1728 (match_operand:VDQHS
2 "register_operand" "w"))
1729 (match_operand:VDQHS
1 "register_operand" "
0")))]
1731 "mla
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vetype>[
0]"
1732 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1735 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1736 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1737 (minus:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "
0")
1738 (mult:VDQ_BHSI (match_operand:VDQ_BHSI
2 "register_operand" "w")
1739 (match_operand:VDQ_BHSI
3 "register_operand" "w"))))]
1741 "mls
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vtype>"
1742 [(set_attr "type" "neon_mla_<Vetype><q>")]
1745 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1746 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1748 (match_operand:VDQHS
4 "register_operand" "
0")
1750 (vec_duplicate:VDQHS
1752 (match_operand:VDQHS
1 "register_operand" "<h_con>")
1753 (parallel [(match_operand:SI
2 "immediate_operand")])))
1754 (match_operand:VDQHS
3 "register_operand" "w"))))]
1757 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
1758 return "mls
\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
1760 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1763 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1764 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1766 (match_operand:VDQHS
4 "register_operand" "
0")
1768 (vec_duplicate:VDQHS
1770 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
1771 (parallel [(match_operand:SI
2 "immediate_operand")])))
1772 (match_operand:VDQHS
3 "register_operand" "w"))))]
1775 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
1776 return "mls
\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
1778 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1781 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1782 [(set (match_operand:VDQHS
0 "register_operand" "=w")
1784 (match_operand:VDQHS
1 "register_operand" "
0")
1786 (vec_duplicate:VDQHS
1787 (match_operand:<VEL>
3 "register_operand" "<h_con>"))
1788 (match_operand:VDQHS
2 "register_operand" "w"))))]
1790 "mls
\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vetype>[
0]"
1791 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1794 ;; Max/Min operations.
1795 (define_insn "<su><maxmin><mode>
3<vczle><vczbe>"
1796 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1797 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "w")
1798 (match_operand:VDQ_BHSI
2 "register_operand" "w")))]
1800 "<su><maxmin>
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1801 [(set_attr "type" "neon_minmax<q>")]
1804 (define_expand "<su><maxmin>v2di3"
1805 [(set (match_operand:V2DI
0 "register_operand")
1806 (MAXMIN:V2DI (match_operand:V2DI
1 "register_operand")
1807 (match_operand:V2DI
2 "register_operand")))]
1810 enum rtx_code cmp_operator;
1831 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[
1], operands[
2]);
1832 emit_insn (gen_vcondv2div2di (operands[
0], operands[
1],
1833 operands[
2], cmp_fmt, operands[
1], operands[
2]));
1837 ;; Pairwise Integer Max/Min operations.
1838 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1839 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
1840 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI
1 "register_operand" "w")
1841 (match_operand:VDQ_BHSI
2 "register_operand" "w")]
1844 "<maxmin_uns_op>p
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1845 [(set_attr "type" "neon_minmax<q>")]
1848 ;; Pairwise FP Max/Min operations.
1849 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1850 [(set (match_operand:VHSDF
0 "register_operand" "=w")
1851 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")
1852 (match_operand:VHSDF
2 "register_operand" "w")]
1855 "<maxmin_uns_op>p
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
1856 [(set_attr "type" "neon_minmax<q>")]
1859 ;; vec_concat gives a new vector with the low elements from operand
1, and
1860 ;; the high elements from operand
2. That is to say, given op1 = { a, b }
1861 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1862 ;; What that means, is that the RTL descriptions of the below patterns
1863 ;; need to change depending on endianness.
1865 ;; Narrowing operations.
1867 (define_insn "aarch64_xtn2<mode>_insn_le"
1868 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
1869 (vec_concat:<VNARROWQ2>
1870 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
1871 (truncate:<VNARROWQ> (match_operand:VQN
2 "register_operand" "w"))))]
1872 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1873 "xtn2
\t%
0.<V2ntype>, %
2.<Vtype>"
1874 [(set_attr "type" "neon_move_narrow_q")]
1877 (define_insn "aarch64_xtn2<mode>_insn_be"
1878 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
1879 (vec_concat:<VNARROWQ2>
1880 (truncate:<VNARROWQ> (match_operand:VQN
2 "register_operand" "w"))
1881 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
1882 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1883 "xtn2
\t%
0.<V2ntype>, %
2.<Vtype>"
1884 [(set_attr "type" "neon_move_narrow_q")]
1887 (define_expand "aarch64_xtn2<mode>"
1888 [(match_operand:<VNARROWQ2>
0 "register_operand")
1889 (match_operand:<VNARROWQ>
1 "register_operand")
1890 (truncate:<VNARROWQ> (match_operand:VQN
2 "register_operand"))]
1893 if (BYTES_BIG_ENDIAN)
1894 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[
0], operands[
1],
1897 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[
0], operands[
1],
1903 (define_insn "*aarch64_narrow_trunc<mode>"
1904 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
1905 (vec_concat:<VNARROWQ2>
1906 (truncate:<VNARROWQ>
1907 (match_operand:VQN
1 "register_operand" "w"))
1908 (truncate:<VNARROWQ>
1909 (match_operand:VQN
2 "register_operand" "w"))))]
1912 if (!BYTES_BIG_ENDIAN)
1913 return "uzp1
\\t%
0.<V2ntype>, %
1.<V2ntype>, %
2.<V2ntype>";
1915 return "uzp1
\\t%
0.<V2ntype>, %
2.<V2ntype>, %
1.<V2ntype>";
1917 [(set_attr "type" "neon_permute<q>")]
1922 (define_expand "vec_pack_trunc_<mode>"
1923 [(match_operand:<VNARROWD>
0 "register_operand")
1924 (match_operand:VDN
1 "general_operand")
1925 (match_operand:VDN
2 "general_operand")]
1928 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1929 emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[
1], operands[
2]));
1930 emit_insn (gen_trunc<Vdbl><Vnarrowd>
2 (operands[
0], tempreg));
1936 (define_expand "vec_pack_trunc_<mode>"
1937 [(set (match_operand:<VNARROWQ2>
0 "register_operand")
1938 (vec_concat:<VNARROWQ2>
1939 (truncate:<VNARROWQ> (match_operand:VQN
1 "register_operand"))
1940 (truncate:<VNARROWQ> (match_operand:VQN
2 "register_operand"))))]
1943 rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1944 int lo = BYTES_BIG_ENDIAN ?
2 :
1;
1945 int hi = BYTES_BIG_ENDIAN ?
1 :
2;
1947 emit_insn (gen_trunc<mode><Vnarrowq>
2 (tmpreg, operands[lo]));
1949 if (BYTES_BIG_ENDIAN)
1950 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[
0], tmpreg,
1953 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[
0], tmpreg,
1959 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1960 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
1961 (vec_concat:<VNARROWQ2>
1962 (truncate:<VNARROWQ>
1963 (SHIFTRT:VQN (match_operand:VQN
1 "register_operand" "w")
1964 (match_operand:VQN
2 "aarch64_simd_shift_imm_vec_exact_top")))
1965 (truncate:<VNARROWQ>
1966 (SHIFTRT:VQN (match_operand:VQN
3 "register_operand" "w")
1968 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1969 "uzp2
\\t%
0.<V2ntype>, %
1.<V2ntype>, %
3.<V2ntype>"
1970 [(set_attr "type" "neon_permute<q>")]
1973 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1974 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
1975 (vec_concat:<VNARROWQ2>
1976 (truncate:<VNARROWQ>
1977 (SHIFTRT:VQN (match_operand:VQN
3 "register_operand" "w")
1978 (match_operand:VQN
2 "aarch64_simd_shift_imm_vec_exact_top")))
1979 (truncate:<VNARROWQ>
1980 (SHIFTRT:VQN (match_operand:VQN
1 "register_operand" "w")
1982 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1983 "uzp2
\\t%
0.<V2ntype>, %
1.<V2ntype>, %
3.<V2ntype>"
1984 [(set_attr "type" "neon_permute<q>")]
1987 ;; Widening operations.
1989 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1990 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
1991 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1992 (match_operand:VQW
1 "register_operand" "w")
1993 (match_operand:VQW
2 "vect_par_cnst_lo_half" "")
1996 "<su>xtl
\t%
0.<Vwtype>, %
1.<Vhalftype>"
1997 [(set_attr "type" "neon_shift_imm_long")]
2000 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
2001 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2002 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2003 (match_operand:VQW
1 "register_operand" "w")
2004 (match_operand:VQW
2 "vect_par_cnst_hi_half" "")
2007 "<su>xtl2
\t%
0.<Vwtype>, %
1.<Vtype>"
2008 [(set_attr "type" "neon_shift_imm_long")]
2011 (define_expand "vec_unpack<su>_hi_<mode>"
2012 [(match_operand:<VWIDE>
0 "register_operand")
2013 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))]
2016 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2017 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[
0],
2023 (define_expand "vec_unpack<su>_lo_<mode>"
2024 [(match_operand:<VWIDE>
0 "register_operand")
2025 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))]
2028 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2029 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[
0],
2035 ;; Widening arithmetic.
2037 (define_insn "*aarch64_<su>mlal_lo<mode>"
2038 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2041 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2042 (match_operand:VQW
2 "register_operand" "w")
2043 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
2044 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2045 (match_operand:VQW
4 "register_operand" "w")
2047 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
2049 "<su>mlal
\t%
0.<Vwtype>, %
2.<Vhalftype>, %
4.<Vhalftype>"
2050 [(set_attr "type" "neon_mla_<Vetype>_long")]
2053 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2054 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2057 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2058 (match_operand:VQW
2 "register_operand" "w")
2059 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
2060 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2061 (match_operand:VQW
4 "register_operand" "w")
2063 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
2065 "<su>mlal2
\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vtype>"
2066 [(set_attr "type" "neon_mla_<Vetype>_long")]
2069 (define_expand "aarch64_<su>mlal_hi<mode>"
2070 [(match_operand:<VWIDE>
0 "register_operand")
2071 (match_operand:<VWIDE>
1 "register_operand")
2072 (ANY_EXTEND:<VWIDE>(match_operand:VQW
2 "register_operand"))
2073 (match_operand:VQW
3 "register_operand")]
2076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2077 emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[
0], operands[
1],
2078 operands[
2], p, operands[
3]));
2083 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2084 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2089 (match_operand:VQ_HSI
2 "register_operand" "w")
2090 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
2091 (vec_duplicate:<VWIDE>
2092 (ANY_EXTEND:<VWIDE_S>
2093 (match_operand:<VEL>
4 "register_operand" "<h_con>"))))
2094 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
2096 "<su>mlal2
\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vetype>[
0]"
2097 [(set_attr "type" "neon_mla_<Vetype>_long")]
2100 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2101 [(match_operand:<VWIDE>
0 "register_operand")
2102 (match_operand:<VWIDE>
1 "register_operand")
2103 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI
2 "register_operand"))
2104 (match_operand:<VEL>
3 "register_operand")]
2107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108 emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[
0],
2109 operands[
1], operands[
2], p, operands[
3]));
2114 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2115 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2117 (match_operand:<VWIDE>
1 "register_operand" "
0")
2119 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2120 (match_operand:VQW
2 "register_operand" "w")
2121 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
2122 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2123 (match_operand:VQW
4 "register_operand" "w")
2126 "<su>mlsl
\t%
0.<Vwtype>, %
2.<Vhalftype>, %
4.<Vhalftype>"
2127 [(set_attr "type" "neon_mla_<Vetype>_long")]
2130 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2131 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2133 (match_operand:<VWIDE>
1 "register_operand" "
0")
2135 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2136 (match_operand:VQW
2 "register_operand" "w")
2137 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
2138 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2139 (match_operand:VQW
4 "register_operand" "w")
2142 "<su>mlsl2
\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vtype>"
2143 [(set_attr "type" "neon_mla_<Vetype>_long")]
2146 (define_expand "aarch64_<su>mlsl_hi<mode>"
2147 [(match_operand:<VWIDE>
0 "register_operand")
2148 (match_operand:<VWIDE>
1 "register_operand")
2149 (ANY_EXTEND:<VWIDE>(match_operand:VQW
2 "register_operand"))
2150 (match_operand:VQW
3 "register_operand")]
2153 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2154 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[
0], operands[
1],
2155 operands[
2], p, operands[
3]));
2160 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2161 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2163 (match_operand:<VWIDE>
1 "register_operand" "
0")
2167 (match_operand:VQ_HSI
2 "register_operand" "w")
2168 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
2169 (vec_duplicate:<VWIDE>
2170 (ANY_EXTEND:<VWIDE_S>
2171 (match_operand:<VEL>
4 "register_operand" "<h_con>"))))))]
2173 "<su>mlsl2
\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vetype>[
0]"
2174 [(set_attr "type" "neon_mla_<Vetype>_long")]
2177 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2178 [(match_operand:<VWIDE>
0 "register_operand")
2179 (match_operand:<VWIDE>
1 "register_operand")
2180 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI
2 "register_operand"))
2181 (match_operand:<VEL>
3 "register_operand")]
2184 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2185 emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[
0],
2186 operands[
1], operands[
2], p, operands[
3]));
2191 (define_insn "aarch64_<su>mlal<mode>"
2192 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2196 (match_operand:VD_BHSI
2 "register_operand" "w"))
2198 (match_operand:VD_BHSI
3 "register_operand" "w")))
2199 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
2201 "<su>mlal
\t%
0.<Vwtype>, %
2.<Vtype>, %
3.<Vtype>"
2202 [(set_attr "type" "neon_mla_<Vetype>_long")]
2205 (define_insn "aarch64_<su>mlal_n<mode>"
2206 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2210 (match_operand:VD_HSI
2 "register_operand" "w"))
2211 (vec_duplicate:<VWIDE>
2212 (ANY_EXTEND:<VWIDE_S>
2213 (match_operand:<VEL>
3 "register_operand" "<h_con>"))))
2214 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
2216 "<su>mlal
\t%
0.<Vwtype>, %
2.<Vtype>, %
3.<Vetype>[
0]"
2217 [(set_attr "type" "neon_mla_<Vetype>_long")]
2220 (define_insn "aarch64_<su>mlsl<mode>"
2221 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2223 (match_operand:<VWIDE>
1 "register_operand" "
0")
2226 (match_operand:VD_BHSI
2 "register_operand" "w"))
2228 (match_operand:VD_BHSI
3 "register_operand" "w")))))]
2230 "<su>mlsl
\t%
0.<Vwtype>, %
2.<Vtype>, %
3.<Vtype>"
2231 [(set_attr "type" "neon_mla_<Vetype>_long")]
2234 (define_insn "aarch64_<su>mlsl_n<mode>"
2235 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2237 (match_operand:<VWIDE>
1 "register_operand" "
0")
2240 (match_operand:VD_HSI
2 "register_operand" "w"))
2241 (vec_duplicate:<VWIDE>
2242 (ANY_EXTEND:<VWIDE_S>
2243 (match_operand:<VEL>
3 "register_operand" "<h_con>"))))))]
2245 "<su>mlsl
\t%
0.<Vwtype>, %
2.<Vtype>, %
3.<Vetype>[
0]"
2246 [(set_attr "type" "neon_mla_<Vetype>_long")]
2249 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2250 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2251 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2252 (match_operand:VQW
1 "register_operand" "w")
2253 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
2254 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2255 (match_operand:VQW
2 "register_operand" "w")
2258 "<su>mull
\\t%
0.<Vwtype>, %
1.<Vhalftype>, %
2.<Vhalftype>"
2259 [(set_attr "type" "neon_mul_<Vetype>_long")]
2262 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2263 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2264 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2265 (match_operand:VD_BHSI
1 "register_operand" "w"))
2267 (match_operand:VD_BHSI
2 "register_operand" "w"))))]
2269 "<su>mull
\\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
2270 [(set_attr "type" "neon_mul_<Vetype>_long")]
2273 (define_expand "vec_widen_<su>mult_lo_<mode>"
2274 [(match_operand:<VWIDE>
0 "register_operand")
2275 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))
2276 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand"))]
2279 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2280 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[
0],
2287 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2288 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2289 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2290 (match_operand:VQW
1 "register_operand" "w")
2291 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
2292 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2293 (match_operand:VQW
2 "register_operand" "w")
2296 "<su>mull2
\\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
2297 [(set_attr "type" "neon_mul_<Vetype>_long")]
2300 (define_expand "vec_widen_<su>mult_hi_<mode>"
2301 [(match_operand:<VWIDE>
0 "register_operand")
2302 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))
2303 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand"))]
2306 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2307 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[
0],
2315 ;; vmull_lane_s16 intrinsics
2316 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2317 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2320 (match_operand:<VCOND>
1 "register_operand" "w"))
2321 (vec_duplicate:<VWIDE>
2322 (ANY_EXTEND:<VWIDE_S>
2324 (match_operand:VDQHS
2 "register_operand" "<vwx>")
2325 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))))))]
2328 operands[
3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
3]));
2329 return "<su>mull
\\t%
0.<Vwtype>, %
1.<Vcondtype>, %
2.<Vetype>[%
3]";
2331 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2334 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2335 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2339 (match_operand:VQ_HSI
1 "register_operand" "w")
2340 (match_operand:VQ_HSI
2 "vect_par_cnst_hi_half" "")))
2341 (vec_duplicate:<VWIDE>
2342 (ANY_EXTEND:<VWIDE_S>
2344 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
2345 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))))))]
2348 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
2349 return "<su>mull2
\\t%
0.<Vwtype>, %
1.<Vtype>, %
3.<Vetype>[%
4]";
2351 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2354 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2355 [(match_operand:<VWIDE>
0 "register_operand")
2356 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI
1 "register_operand"))
2357 (match_operand:<VCOND>
2 "register_operand")
2358 (match_operand:SI
3 "immediate_operand")]
2361 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2362 emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[
0],
2363 operands[
1], p, operands[
2], operands[
3]));
2368 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2369 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2373 (match_operand:VQ_HSI
1 "register_operand" "w")
2374 (match_operand:VQ_HSI
2 "vect_par_cnst_hi_half" "")))
2375 (vec_duplicate:<VWIDE>
2376 (ANY_EXTEND:<VWIDE_S>
2378 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
2379 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))))))]
2382 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
2383 return "<su>mull2
\\t%
0.<Vwtype>, %
1.<Vtype>, %
3.<Vetype>[%
4]";
2385 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2388 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2389 [(match_operand:<VWIDE>
0 "register_operand")
2390 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI
1 "register_operand"))
2391 (match_operand:<VCONQ>
2 "register_operand")
2392 (match_operand:SI
3 "immediate_operand")]
2395 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2396 emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[
0],
2397 operands[
1], p, operands[
2], operands[
3]));
2402 (define_insn "aarch64_<su>mull_n<mode>"
2403 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2406 (match_operand:VD_HSI
1 "register_operand" "w"))
2407 (vec_duplicate:<VWIDE>
2408 (ANY_EXTEND:<VWIDE_S>
2409 (match_operand:<VEL>
2 "register_operand" "<h_con>")))))]
2411 "<su>mull
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vetype>[
0]"
2412 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2415 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2416 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2420 (match_operand:VQ_HSI
1 "register_operand" "w")
2421 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
2422 (vec_duplicate:<VWIDE>
2423 (ANY_EXTEND:<VWIDE_S>
2424 (match_operand:<VEL>
2 "register_operand" "<h_con>")))))]
2426 "<su>mull2
\\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vetype>[
0]"
2427 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2430 (define_expand "aarch64_<su>mull_hi_n<mode>"
2431 [(match_operand:<VWIDE>
0 "register_operand")
2432 (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI
1 "register_operand"))
2433 (match_operand:<VEL>
2 "register_operand")]
2436 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2437 emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[
0], operands[
1],
2443 ;; vmlal_lane_s16 intrinsics
2444 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2445 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2449 (match_operand:<VCOND>
2 "register_operand" "w"))
2450 (vec_duplicate:<VWIDE>
2451 (ANY_EXTEND:<VWIDE_S>
2453 (match_operand:VDQHS
3 "register_operand" "<vwx>")
2454 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))))
2455 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
2458 operands[
4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
4]));
2459 return "<su>mlal
\\t%
0.<Vwtype>, %
2.<Vcondtype>, %
3.<Vetype>[%
4]";
2461 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2464 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2465 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2470 (match_operand:VQ_HSI
2 "register_operand" "w")
2471 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
2472 (vec_duplicate:<VWIDE>
2473 (ANY_EXTEND:<VWIDE_S>
2475 (match_operand:<VCOND>
4 "register_operand" "<vwx>")
2476 (parallel [(match_operand:SI
5 "immediate_operand" "i")])))))
2477 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
2480 operands[
5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
5]));
2481 return "<su>mlal2
\\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vetype>[%
5]";
2483 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2486 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2487 [(match_operand:<VWIDE>
0 "register_operand")
2488 (match_operand:<VWIDE>
1 "register_operand")
2489 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI
2 "register_operand"))
2490 (match_operand:<VCOND>
3 "register_operand")
2491 (match_operand:SI
4 "immediate_operand")]
2494 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2495 emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[
0],
2496 operands[
1], operands[
2], p, operands[
3], operands[
4]));
2501 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2502 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2507 (match_operand:VQ_HSI
2 "register_operand" "w")
2508 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
2509 (vec_duplicate:<VWIDE>
2510 (ANY_EXTEND:<VWIDE_S>
2512 (match_operand:<VCONQ>
4 "register_operand" "<vwx>")
2513 (parallel [(match_operand:SI
5 "immediate_operand" "i")])))))
2514 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
2517 operands[
5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
5]));
2518 return "<su>mlal2
\\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vetype>[%
5]";
2520 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2523 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2524 [(match_operand:<VWIDE>
0 "register_operand")
2525 (match_operand:<VWIDE>
1 "register_operand")
2526 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI
2 "register_operand"))
2527 (match_operand:<VCONQ>
3 "register_operand")
2528 (match_operand:SI
4 "immediate_operand")]
2531 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2532 emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[
0],
2533 operands[
1], operands[
2], p, operands[
3], operands[
4]));
2538 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2539 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2541 (match_operand:<VWIDE>
1 "register_operand" "
0")
2544 (match_operand:<VCOND>
2 "register_operand" "w"))
2545 (vec_duplicate:<VWIDE>
2546 (ANY_EXTEND:<VWIDE_S>
2548 (match_operand:VDQHS
3 "register_operand" "<vwx>")
2549 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))))))]
2552 operands[
4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
4]));
2553 return "<su>mlsl
\\t%
0.<Vwtype>, %
2.<Vcondtype>, %
3.<Vetype>[%
4]";
2555 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2558 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2559 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2561 (match_operand:<VWIDE>
1 "register_operand" "
0")
2565 (match_operand:VQ_HSI
2 "register_operand" "w")
2566 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
2567 (vec_duplicate:<VWIDE>
2568 (ANY_EXTEND:<VWIDE_S>
2570 (match_operand:<VCOND>
4 "register_operand" "<vwx>")
2571 (parallel [(match_operand:SI
5 "immediate_operand" "i")]))))
2575 operands[
5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
5]));
2576 return "<su>mlsl2
\\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vetype>[%
5]";
2578 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2581 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2582 [(match_operand:<VWIDE>
0 "register_operand")
2583 (match_operand:<VWIDE>
1 "register_operand")
2584 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI
2 "register_operand"))
2585 (match_operand:<VCOND>
3 "register_operand")
2586 (match_operand:SI
4 "immediate_operand")]
2589 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2590 emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[
0],
2591 operands[
1], operands[
2], p, operands[
3], operands[
4]));
2596 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2597 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
2599 (match_operand:<VWIDE>
1 "register_operand" "
0")
2603 (match_operand:VQ_HSI
2 "register_operand" "w")
2604 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
2605 (vec_duplicate:<VWIDE>
2606 (ANY_EXTEND:<VWIDE_S>
2608 (match_operand:<VCONQ>
4 "register_operand" "<vwx>")
2609 (parallel [(match_operand:SI
5 "immediate_operand" "i")]))))
2613 operands[
5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
5]));
2614 return "<su>mlsl2
\\t%
0.<Vwtype>, %
2.<Vtype>, %
4.<Vetype>[%
5]";
2616 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2619 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2620 [(match_operand:<VWIDE>
0 "register_operand")
2621 (match_operand:<VWIDE>
1 "register_operand")
2622 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI
2 "register_operand"))
2623 (match_operand:<VCONQ>
3 "register_operand")
2624 (match_operand:SI
4 "immediate_operand")]
2627 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2628 emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[
0],
2629 operands[
1], operands[
2], p, operands[
3], operands[
4]));
2634 ;; FP vector operations.
2635 ;; AArch64 AdvSIMD supports single-precision (
32-bit) and
2636 ;; double-precision (
64-bit) floating-point data types and arithmetic as
2637 ;; defined by the IEEE
754-
2008 standard. This makes them vectorizable
2638 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2640 ;; Floating-point operations can raise an exception. Vectorizing such
2641 ;; operations are safe because of reasons explained below.
2643 ;; ARMv8 permits an extension to enable trapped floating-point
2644 ;; exception handling, however this is an optional feature. In the
2645 ;; event of a floating-point exception being raised by vectorised
2647 ;;
1. If trapped floating-point exceptions are available, then a trap
2648 ;; will be taken when any lane raises an enabled exception. A trap
2649 ;; handler may determine which lane raised the exception.
2650 ;;
2. Alternatively a sticky exception flag is set in the
2651 ;; floating-point status register (FPSR). Software may explicitly
2652 ;; test the exception flags, in which case the tests will either
2653 ;; prevent vectorisation, allowing precise identification of the
2654 ;; failing operation, or if tested outside of vectorisable regions
2655 ;; then the specific operation and lane are not of interest.
2657 ;; FP arithmetic operations.
2659 (define_insn "add<mode>
3<vczle><vczbe>"
2660 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2661 (plus:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
2662 (match_operand:VHSDF
2 "register_operand" "w")))]
2664 "fadd
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2665 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2668 (define_insn "sub<mode>
3<vczle><vczbe>"
2669 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2670 (minus:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
2671 (match_operand:VHSDF
2 "register_operand" "w")))]
2673 "fsub
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2674 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2677 (define_insn "mul<mode>
3<vczle><vczbe>"
2678 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2679 (mult:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
2680 (match_operand:VHSDF
2 "register_operand" "w")))]
2682 "fmul
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2683 [(set_attr "type" "neon_fp_mul_<stype><q>")]
2686 (define_expand "div<mode>
3"
2687 [(set (match_operand:VHSDF
0 "register_operand")
2688 (div:VHSDF (match_operand:VHSDF
1 "register_operand")
2689 (match_operand:VHSDF
2 "register_operand")))]
2692 if (aarch64_emit_approx_div (operands[
0], operands[
1], operands[
2]))
2695 operands[
1] = force_reg (<MODE>mode, operands[
1]);
2698 (define_insn "*div<mode>
3<vczle><vczbe>"
2699 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2700 (div:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
2701 (match_operand:VHSDF
2 "register_operand" "w")))]
2703 "fdiv
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2704 [(set_attr "type" "neon_fp_div_<stype><q>")]
2707 ;; SVE has vector integer divisions, unlike Advanced SIMD.
2708 ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
2709 ;; optabs to the midend.
2710 (define_expand "<su_optab>div<mode>
3"
2711 [(set (match_operand:VQDIV
0 "register_operand")
2713 (match_operand:VQDIV
1 "register_operand")
2714 (match_operand:VQDIV
2 "register_operand")))]
2717 machine_mode sve_mode
2718 = aarch64_full_sve_mode (GET_MODE_INNER (<MODE>mode)).require ();
2719 rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[
0], <MODE>mode,
0);
2720 rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[
1], <MODE>mode,
0);
2721 rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[
2], <MODE>mode,
0);
2723 emit_insn (gen_<su_optab>div<vnx>
3 (sve_op0, sve_op1, sve_op2));
2728 (define_insn "neg<mode>
2<vczle><vczbe>"
2729 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2730 (neg:VHSDF (match_operand:VHSDF
1 "register_operand" "w")))]
2732 "fneg
\\t%
0.<Vtype>, %
1.<Vtype>"
2733 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2736 (define_insn "abs<mode>
2<vczle><vczbe>"
2737 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2738 (abs:VHSDF (match_operand:VHSDF
1 "register_operand" "w")))]
2740 "fabs
\\t%
0.<Vtype>, %
1.<Vtype>"
2741 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2744 (define_expand "aarch64_float_mla<mode>"
2745 [(set (match_operand:VDQF_DF
0 "register_operand")
2748 (match_operand:VDQF_DF
2 "register_operand")
2749 (match_operand:VDQF_DF
3 "register_operand"))
2750 (match_operand:VDQF_DF
1 "register_operand")))]
2753 rtx scratch = gen_reg_rtx (<MODE>mode);
2754 emit_insn (gen_mul<mode>
3 (scratch, operands[
2], operands[
3]));
2755 emit_insn (gen_add<mode>
3 (operands[
0], operands[
1], scratch));
2760 (define_expand "aarch64_float_mls<mode>"
2761 [(set (match_operand:VDQF_DF
0 "register_operand")
2763 (match_operand:VDQF_DF
1 "register_operand")
2765 (match_operand:VDQF_DF
2 "register_operand")
2766 (match_operand:VDQF_DF
3 "register_operand"))))]
2769 rtx scratch = gen_reg_rtx (<MODE>mode);
2770 emit_insn (gen_mul<mode>
3 (scratch, operands[
2], operands[
3]));
2771 emit_insn (gen_sub<mode>
3 (operands[
0], operands[
1], scratch));
2776 (define_expand "aarch64_float_mla_n<mode>"
2777 [(set (match_operand:VDQSF
0 "register_operand")
2780 (vec_duplicate:VDQSF
2781 (match_operand:<VEL>
3 "register_operand"))
2782 (match_operand:VDQSF
2 "register_operand"))
2783 (match_operand:VDQSF
1 "register_operand")))]
2786 rtx scratch = gen_reg_rtx (<MODE>mode);
2787 emit_insn (gen_mul_n<mode>
3 (scratch, operands[
2], operands[
3]));
2788 emit_insn (gen_add<mode>
3 (operands[
0], operands[
1], scratch));
2793 (define_expand "aarch64_float_mls_n<mode>"
2794 [(set (match_operand:VDQSF
0 "register_operand")
2796 (match_operand:VDQSF
1 "register_operand")
2798 (vec_duplicate:VDQSF
2799 (match_operand:<VEL>
3 "register_operand"))
2800 (match_operand:VDQSF
2 "register_operand"))))]
2803 rtx scratch = gen_reg_rtx (<MODE>mode);
2804 emit_insn (gen_mul_n<mode>
3 (scratch, operands[
2], operands[
3]));
2805 emit_insn (gen_sub<mode>
3 (operands[
0], operands[
1], scratch));
2810 (define_expand "aarch64_float_mla_lane<mode>"
2811 [(set (match_operand:VDQSF
0 "register_operand")
2814 (vec_duplicate:VDQSF
2816 (match_operand:V2SF
3 "register_operand")
2817 (parallel [(match_operand:SI
4 "immediate_operand")])))
2818 (match_operand:VDQSF
2 "register_operand"))
2819 (match_operand:VDQSF
1 "register_operand")))]
2822 rtx scratch = gen_reg_rtx (<MODE>mode);
2823 emit_insn (gen_mul_lane<mode>
3 (scratch, operands[
2],
2824 operands[
3], operands[
4]));
2825 emit_insn (gen_add<mode>
3 (operands[
0], operands[
1], scratch));
2830 (define_expand "aarch64_float_mls_lane<mode>"
2831 [(set (match_operand:VDQSF
0 "register_operand")
2833 (match_operand:VDQSF
1 "register_operand")
2835 (vec_duplicate:VDQSF
2837 (match_operand:V2SF
3 "register_operand")
2838 (parallel [(match_operand:SI
4 "immediate_operand")])))
2839 (match_operand:VDQSF
2 "register_operand"))))]
2842 rtx scratch = gen_reg_rtx (<MODE>mode);
2843 emit_insn (gen_mul_lane<mode>
3 (scratch, operands[
2],
2844 operands[
3], operands[
4]));
2845 emit_insn (gen_sub<mode>
3 (operands[
0], operands[
1], scratch));
2850 (define_expand "aarch64_float_mla_laneq<mode>"
2851 [(set (match_operand:VDQSF
0 "register_operand")
2854 (vec_duplicate:VDQSF
2856 (match_operand:V4SF
3 "register_operand")
2857 (parallel [(match_operand:SI
4 "immediate_operand")])))
2858 (match_operand:VDQSF
2 "register_operand"))
2859 (match_operand:VDQSF
1 "register_operand")))]
2862 rtx scratch = gen_reg_rtx (<MODE>mode);
2863 emit_insn (gen_mul_laneq<mode>
3 (scratch, operands[
2],
2864 operands[
3], operands[
4]));
2865 emit_insn (gen_add<mode>
3 (operands[
0], operands[
1], scratch));
2870 (define_expand "aarch64_float_mls_laneq<mode>"
2871 [(set (match_operand:VDQSF
0 "register_operand")
2873 (match_operand:VDQSF
1 "register_operand")
2875 (vec_duplicate:VDQSF
2877 (match_operand:V4SF
3 "register_operand")
2878 (parallel [(match_operand:SI
4 "immediate_operand")])))
2879 (match_operand:VDQSF
2 "register_operand"))))]
2882 rtx scratch = gen_reg_rtx (<MODE>mode);
2883 emit_insn (gen_mul_laneq<mode>
3 (scratch, operands[
2],
2884 operands[
3], operands[
4]));
2885 emit_insn (gen_sub<mode>
3 (operands[
0], operands[
1], scratch));
2890 (define_insn "fma<mode>
4<vczle><vczbe>"
2891 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2892 (fma:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
2893 (match_operand:VHSDF
2 "register_operand" "w")
2894 (match_operand:VHSDF
3 "register_operand" "
0")))]
2896 "fmla
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2897 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2900 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2901 [(set (match_operand:VDQF
0 "register_operand" "=w")
2905 (match_operand:VDQF
1 "register_operand" "<h_con>")
2906 (parallel [(match_operand:SI
2 "immediate_operand")])))
2907 (match_operand:VDQF
3 "register_operand" "w")
2908 (match_operand:VDQF
4 "register_operand" "
0")))]
2911 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
2912 return "fmla
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
2914 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2917 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2918 [(set (match_operand:VDQSF
0 "register_operand" "=w")
2920 (vec_duplicate:VDQSF
2922 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
2923 (parallel [(match_operand:SI
2 "immediate_operand")])))
2924 (match_operand:VDQSF
3 "register_operand" "w")
2925 (match_operand:VDQSF
4 "register_operand" "
0")))]
2928 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
2929 return "fmla
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
2931 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2934 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2935 [(set (match_operand:VMUL
0 "register_operand" "=w")
2938 (match_operand:<VEL>
1 "register_operand" "<h_con>"))
2939 (match_operand:VMUL
2 "register_operand" "w")
2940 (match_operand:VMUL
3 "register_operand" "
0")))]
2942 "fmla
\t%
0.<Vtype>, %
2.<Vtype>, %
1.<Vetype>[
0]"
2943 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2946 (define_insn "*aarch64_fma4_elt_to_64v2df"
2947 [(set (match_operand:DF
0 "register_operand" "=w")
2950 (match_operand:V2DF
1 "register_operand" "w")
2951 (parallel [(match_operand:SI
2 "immediate_operand")]))
2952 (match_operand:DF
3 "register_operand" "w")
2953 (match_operand:DF
4 "register_operand" "
0")))]
2956 operands[
2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[
2]));
2957 return "fmla
\\t%
0.2d, %
3.2d, %
1.d[%
2]";
2959 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2962 (define_insn "fnma<mode>
4<vczle><vczbe>"
2963 [(set (match_operand:VHSDF
0 "register_operand" "=w")
2965 (neg:VHSDF (match_operand:VHSDF
1 "register_operand" "w"))
2966 (match_operand:VHSDF
2 "register_operand" "w")
2967 (match_operand:VHSDF
3 "register_operand" "
0")))]
2969 "fmls
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
2970 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2973 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2974 [(set (match_operand:VDQF
0 "register_operand" "=w")
2977 (match_operand:VDQF
3 "register_operand" "w"))
2980 (match_operand:VDQF
1 "register_operand" "<h_con>")
2981 (parallel [(match_operand:SI
2 "immediate_operand")])))
2982 (match_operand:VDQF
4 "register_operand" "
0")))]
2985 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
2986 return "fmls
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
2988 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2991 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2992 [(set (match_operand:VDQSF
0 "register_operand" "=w")
2995 (match_operand:VDQSF
3 "register_operand" "w"))
2996 (vec_duplicate:VDQSF
2998 (match_operand:<VSWAP_WIDTH>
1 "register_operand" "<h_con>")
2999 (parallel [(match_operand:SI
2 "immediate_operand")])))
3000 (match_operand:VDQSF
4 "register_operand" "
0")))]
3003 operands[
2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
2]));
3004 return "fmls
\\t%
0.<Vtype>, %
3.<Vtype>, %
1.<Vetype>[%
2]";
3006 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3009 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
3010 [(set (match_operand:VMUL
0 "register_operand" "=w")
3013 (match_operand:VMUL
2 "register_operand" "w"))
3015 (match_operand:<VEL>
1 "register_operand" "<h_con>"))
3016 (match_operand:VMUL
3 "register_operand" "
0")))]
3018 "fmls
\t%
0.<Vtype>, %
2.<Vtype>, %
1.<Vetype>[
0]"
3019 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3022 (define_insn "*aarch64_fnma4_elt_to_64v2df"
3023 [(set (match_operand:DF
0 "register_operand" "=w")
3026 (match_operand:V2DF
1 "register_operand" "w")
3027 (parallel [(match_operand:SI
2 "immediate_operand")]))
3029 (match_operand:DF
3 "register_operand" "w"))
3030 (match_operand:DF
4 "register_operand" "
0")))]
3033 operands[
2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[
2]));
3034 return "fmls
\\t%
0.2d, %
3.2d, %
1.d[%
2]";
3036 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3039 ;; Vector versions of the floating-point frint patterns.
3040 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3041 (define_insn "<frint_pattern><mode>
2<vczle><vczbe>"
3042 [(set (match_operand:VHSDF
0 "register_operand" "=w")
3043 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")]
3046 "frint<frint_suffix>
\\t%
0.<Vtype>, %
1.<Vtype>"
3047 [(set_attr "type" "neon_fp_round_<stype><q>")]
3050 ;; Vector versions of the fcvt standard patterns.
3051 ;; Expands to lbtrunc, lround, lceil, lfloor
3052 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>
2"
3053 [(set (match_operand:<FCVT_TARGET>
0 "register_operand" "=w")
3054 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3055 [(match_operand:VHSDF
1 "register_operand" "w")]
3058 "fcvt<frint_suffix><su>
\\t%
0.<Vtype>, %
1.<Vtype>"
3059 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3062 ;; HF Scalar variants of related SIMD instructions.
3063 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3064 [(set (match_operand:HI
0 "register_operand" "=w")
3065 (FIXUORS:HI (unspec:HF [(match_operand:HF
1 "register_operand" "w")]
3067 "TARGET_SIMD_F16INST"
3068 "fcvt<frint_suffix><su>
\t%h0, %h1"
3069 [(set_attr "type" "neon_fp_to_int_s")]
3072 (define_insn "<optab>_trunchfhi2"
3073 [(set (match_operand:HI
0 "register_operand" "=w")
3074 (FIXUORS:HI (match_operand:HF
1 "register_operand" "w")))]
3075 "TARGET_SIMD_F16INST"
3076 "fcvtz<su>
\t%h0, %h1"
3077 [(set_attr "type" "neon_fp_to_int_s")]
3080 (define_insn "<optab>hihf2"
3081 [(set (match_operand:HF
0 "register_operand" "=w")
3082 (FLOATUORS:HF (match_operand:HI
1 "register_operand" "w")))]
3083 "TARGET_SIMD_F16INST"
3084 "<su_optab>cvtf
\t%h0, %h1"
3085 [(set_attr "type" "neon_int_to_fp_s")]
3088 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>
2_mult"
3089 [(set (match_operand:<FCVT_TARGET>
0 "register_operand" "=w")
3090 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3092 (match_operand:VDQF
1 "register_operand" "w")
3093 (match_operand:VDQF
2 "aarch64_fp_vec_pow2" ""))]
3096 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[
2]),
1,
3097 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3099 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[
2]);
3101 snprintf (buf,
64, "fcvtz<su>
\\t%%
0.<Vtype>, %%
1.<Vtype>, #%d", fbits);
3102 output_asm_insn (buf, operands);
3105 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3108 (define_expand "<optab><VHSDF:mode><fcvt_target>
2"
3109 [(set (match_operand:<FCVT_TARGET>
0 "register_operand")
3110 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3111 [(match_operand:VHSDF
1 "register_operand")]
3116 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>
2"
3117 [(set (match_operand:<FCVT_TARGET>
0 "register_operand")
3118 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3119 [(match_operand:VHSDF
1 "register_operand")]
3124 (define_expand "ftrunc<VHSDF:mode>
2"
3125 [(set (match_operand:VHSDF
0 "register_operand")
3126 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand")]
3131 (define_insn "<optab><fcvt_target><VHSDF:mode>
2"
3132 [(set (match_operand:VHSDF
0 "register_operand" "=w")
3134 (match_operand:<FCVT_TARGET>
1 "register_operand" "w")))]
3136 "<su_optab>cvtf
\\t%
0.<Vtype>, %
1.<Vtype>"
3137 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3140 ;; Conversions between vectors of floats and doubles.
3141 ;; Contains a mix of patterns to match standard pattern names
3142 ;; and those for intrinsics.
3144 ;; Float widening operations.
3146 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3147 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3148 (float_extend:<VWIDE> (vec_select:<VHALF>
3149 (match_operand:VQ_HSF
1 "register_operand" "w")
3150 (match_operand:VQ_HSF
2 "vect_par_cnst_lo_half" "")
3153 "fcvtl
\\t%
0.<Vwtype>, %
1.<Vhalftype>"
3154 [(set_attr "type" "neon_fp_cvt_widen_s")]
3157 ;; Convert between fixed-point and floating-point (vector modes)
3159 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>
3"
3160 [(set (match_operand:<VHSDF:FCVT_TARGET>
0 "register_operand" "=w")
3161 (unspec:<VHSDF:FCVT_TARGET>
3162 [(match_operand:VHSDF
1 "register_operand" "w")
3163 (match_operand:SI
2 "immediate_operand" "i")]
3166 "<FCVT_F2FIXED:fcvt_fixed_insn>
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, #%
2"
3167 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3170 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>
3"
3171 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET>
0 "register_operand" "=w")
3172 (unspec:<VDQ_HSDI:FCVT_TARGET>
3173 [(match_operand:VDQ_HSDI
1 "register_operand" "w")
3174 (match_operand:SI
2 "immediate_operand" "i")]
3177 "<FCVT_FIXED2F:fcvt_fixed_insn>
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, #%
2"
3178 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3181 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3182 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3183 ;; the meaning of HI and LO changes depending on the target endianness.
3184 ;; While elsewhere we map the higher numbered elements of a vector to
3185 ;; the lower architectural lanes of the vector, for these patterns we want
3186 ;; to always treat "hi" as referring to the higher architectural lanes.
3187 ;; Consequently, while the patterns below look inconsistent with our
3188 ;; other big-endian patterns their behavior is as required.
3190 (define_expand "vec_unpacks_lo_<mode>"
3191 [(match_operand:<VWIDE>
0 "register_operand")
3192 (match_operand:VQ_HSF
1 "register_operand")]
3195 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3196 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[
0],
3202 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3203 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3204 (float_extend:<VWIDE> (vec_select:<VHALF>
3205 (match_operand:VQ_HSF
1 "register_operand" "w")
3206 (match_operand:VQ_HSF
2 "vect_par_cnst_hi_half" "")
3209 "fcvtl2
\\t%
0.<Vwtype>, %
1.<Vtype>"
3210 [(set_attr "type" "neon_fp_cvt_widen_s")]
3213 (define_expand "vec_unpacks_hi_<mode>"
3214 [(match_operand:<VWIDE>
0 "register_operand")
3215 (match_operand:VQ_HSF
1 "register_operand")]
3218 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3219 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[
0],
3224 (define_insn "aarch64_float_extend_lo_<Vwide>"
3225 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
3226 (float_extend:<VWIDE>
3227 (match_operand:VDF
1 "register_operand" "w")))]
3229 "fcvtl
\\t%
0<Vmwtype>, %
1<Vmtype>"
3230 [(set_attr "type" "neon_fp_cvt_widen_s")]
3233 ;; Float narrowing operations.
3235 (define_insn "aarch64_float_trunc_rodd_df"
3236 [(set (match_operand:SF
0 "register_operand" "=w")
3237 (unspec:SF [(match_operand:DF
1 "register_operand" "w")]
3241 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3244 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3245 [(set (match_operand:V2SF
0 "register_operand" "=w")
3246 (unspec:V2SF [(match_operand:V2DF
1 "register_operand" "w")]
3249 "fcvtxn
\\t%
0.2s, %
1.2d"
3250 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3253 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3254 [(set (match_operand:V4SF
0 "register_operand" "=w")
3256 (match_operand:V2SF
1 "register_operand" "
0")
3257 (unspec:V2SF [(match_operand:V2DF
2 "register_operand" "w")]
3259 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3260 "fcvtxn2
\\t%
0.4s, %
2.2d"
3261 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3264 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3265 [(set (match_operand:V4SF
0 "register_operand" "=w")
3267 (unspec:V2SF [(match_operand:V2DF
2 "register_operand" "w")]
3269 (match_operand:V2SF
1 "register_operand" "
0")))]
3270 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3271 "fcvtxn2
\\t%
0.4s, %
2.2d"
3272 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3275 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3276 [(match_operand:V4SF
0 "register_operand")
3277 (match_operand:V2SF
1 "register_operand")
3278 (match_operand:V2DF
2 "register_operand")]
3281 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3282 ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3283 : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3284 emit_insn (gen (operands[
0], operands[
1], operands[
2]));
3289 (define_insn "aarch64_float_truncate_lo_<mode><vczle><vczbe>"
3290 [(set (match_operand:VDF
0 "register_operand" "=w")
3292 (match_operand:<VWIDE>
1 "register_operand" "w")))]
3294 "fcvtn
\\t%
0.<Vtype>, %
1<Vmwtype>"
3295 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3298 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3299 [(set (match_operand:<VDBL>
0 "register_operand" "=w")
3301 (match_operand:VDF
1 "register_operand" "
0")
3303 (match_operand:<VWIDE>
2 "register_operand" "w"))))]
3304 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3305 "fcvtn2
\\t%
0.<Vdtype>, %
2<Vmwtype>"
3306 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3309 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3310 [(set (match_operand:<VDBL>
0 "register_operand" "=w")
3313 (match_operand:<VWIDE>
2 "register_operand" "w"))
3314 (match_operand:VDF
1 "register_operand" "
0")))]
3315 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3316 "fcvtn2
\\t%
0.<Vdtype>, %
2<Vmwtype>"
3317 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3320 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3321 [(match_operand:<VDBL>
0 "register_operand")
3322 (match_operand:VDF
1 "register_operand")
3323 (match_operand:<VWIDE>
2 "register_operand")]
3326 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3327 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3328 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3329 emit_insn (gen (operands[
0], operands[
1], operands[
2]));
3334 (define_expand "vec_pack_trunc_v2df"
3335 [(set (match_operand:V4SF
0 "register_operand")
3337 (float_truncate:V2SF
3338 (match_operand:V2DF
1 "register_operand"))
3339 (float_truncate:V2SF
3340 (match_operand:V2DF
2 "register_operand"))
3344 rtx tmp = gen_reg_rtx (V2SFmode);
3345 int lo = BYTES_BIG_ENDIAN ?
2 :
1;
3346 int hi = BYTES_BIG_ENDIAN ?
1 :
2;
3348 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3349 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[
0],
3350 tmp, operands[hi]));
3355 (define_expand "vec_pack_trunc_df"
3356 [(set (match_operand:V2SF
0 "register_operand")
3358 (float_truncate:SF (match_operand:DF
1 "general_operand"))
3359 (float_truncate:SF (match_operand:DF
2 "general_operand"))))]
3362 rtx tmp = gen_reg_rtx (V2SFmode);
3363 emit_insn (gen_aarch64_vec_concatdf (tmp, operands[
1], operands[
2]));
3364 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[
0], tmp));
3370 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
3372 ;; a = (b < c) ? b : c;
3373 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3374 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3377 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3378 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3379 ;; operand will be returned when both operands are zero (i.e. they may not
3380 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
3381 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3384 (define_insn "<su><maxmin><mode>
3"
3385 [(set (match_operand:VHSDF
0 "register_operand" "=w")
3386 (FMAXMIN:VHSDF (match_operand:VHSDF
1 "register_operand" "w")
3387 (match_operand:VHSDF
2 "register_operand" "w")))]
3389 "f<maxmin>nm
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
3390 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3393 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3394 ;; fmaxnm and fminnm are used for the fmax<mode>
3 standard pattern names,
3395 ;; which implement the IEEE fmax ()/fmin () functions.
3396 (define_insn "<fmaxmin><mode>
3<vczle><vczbe>"
3397 [(set (match_operand:VHSDF
0 "register_operand" "=w")
3398 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")
3399 (match_operand:VHSDF
2 "register_operand" "w")]
3402 "<maxmin_uns_op>
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
3403 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3406 ;; 'across lanes' add.
3408 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3409 [(set (match_operand:VHSDF
0 "register_operand" "=w")
3410 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")
3411 (match_operand:VHSDF
2 "register_operand" "w")]
3414 "faddp
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
3415 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3418 (define_insn "reduc_plus_scal_<mode>"
3419 [(set (match_operand:<VEL>
0 "register_operand" "=w")
3420 (unspec:<VEL> [(match_operand:VDQV
1 "register_operand" "w")]
3423 "add<VDQV:vp>
\\t%<Vetype>
0, %
1.<Vtype>"
3424 [(set_attr "type" "neon_reduc_add<q>")]
3427 (define_insn "reduc_plus_scal_v2si"
3428 [(set (match_operand:SI
0 "register_operand" "=w")
3429 (unspec:SI [(match_operand:V2SI
1 "register_operand" "w")]
3432 "addp
\\t%
0.2s, %
1.2s, %
1.2s"
3433 [(set_attr "type" "neon_reduc_add")]
3436 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3437 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3438 [(set (match_operand:GPI
0 "register_operand" "=w")
3440 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E
1 "register_operand" "w")]
3443 "add<VDQV_E:vp>
\\t%<VDQV_E:Vetype>
0, %
1.<VDQV_E:Vtype>"
3444 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3447 (define_insn "reduc_plus_scal_<mode>"
3448 [(set (match_operand:<VEL>
0 "register_operand" "=w")
3449 (unspec:<VEL> [(match_operand:V2F
1 "register_operand" "w")]
3452 "faddp
\\t%<Vetype>
0, %
1.<Vtype>"
3453 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3456 (define_expand "reduc_plus_scal_v4sf"
3457 [(set (match_operand:SF
0 "register_operand")
3458 (unspec:SF [(match_operand:V4SF
1 "register_operand")]
3462 rtx elt = aarch64_endian_lane_rtx (V4SFmode,
0);
3463 rtx scratch = gen_reg_rtx (V4SFmode);
3464 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[
1], operands[
1]));
3465 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3466 emit_insn (gen_aarch64_get_lanev4sf (operands[
0], scratch, elt));
3470 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3471 ;; sign or zero-extends its elements.
3472 (define_insn "aarch64_<su>addlv<mode>"
3473 [(set (match_operand:<VWIDE_S>
0 "register_operand" "=w")
3475 [(ANY_EXTEND:<V2XWIDE>
3476 (match_operand:VDQV_L
1 "register_operand" "w"))]
3479 "<su>addl<vp>
\\t%<Vwstype>
0<Vwsuf>, %
1.<Vtype>"
3480 [(set_attr "type" "neon_reduc_add<q>")]
3483 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3484 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3485 ;; of that vector are used. We can greatly simplify the RTL expression using
3487 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3488 [(set (match_operand:<VWIDE_S>
0 "register_operand")
3492 (ANY_EXTEND:<V2XWIDE>
3493 (match_operand:VDQV_L
1 "register_operand"))
3494 (match_operand:<V2XWIDE>
2 "vect_par_cnst_select_half"))
3495 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup
1))
3496 (match_operand:<V2XWIDE>
3 "vect_par_cnst_select_half")))]
3498 "TARGET_SIMD && !aarch64_pars_overlap_p (operands[
2], operands[
3])"
3503 [(ANY_EXTEND:<V2XWIDE>
3509 ;; Similar to the above but for two-step zero-widening reductions.
3510 ;; We can push the outer zero_extend outside the ADDV unspec and make
3511 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3512 ;; in a single instruction.
3513 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3514 [(set (match_operand:<VWIDE2X_S>
0 "register_operand" "=w")
3516 [(zero_extend:<VQUADW>
3519 (zero_extend:<V2XWIDE>
3520 (match_operand:VDQQH
1 "register_operand" "w"))
3521 (match_operand:<V2XWIDE>
2 "vect_par_cnst_select_half"))
3522 (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup
1))
3523 (match_operand:<V2XWIDE>
3 "vect_par_cnst_select_half"))))]
3525 "TARGET_SIMD && !aarch64_pars_overlap_p (operands[
2], operands[
3])"
3529 (zero_extend:<VWIDE2X_S>
3531 [(zero_extend:<V2XWIDE>
3537 ;; Zero-extending version of the above. As these intrinsics produce a scalar
3538 ;; value that may be used by further intrinsics we want to avoid moving the
3539 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3541 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3542 [(set (match_operand:GPI
0 "register_operand" "=w")
3545 [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3546 (match_operand:VDQV_L
1 "register_operand" "w"))]
3549 && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3550 "<su>addl<VDQV_L:vp>
\\t%<VDQV_L:Vwstype>
0<VDQV_L:Vwsuf>, %
1.<VDQV_L:Vtype>"
3551 [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3554 (define_expand "aarch64_<su>addlp<mode>"
3555 [(set (match_operand:<VDBLW>
0 "register_operand")
3558 (ANY_EXTEND:<V2XWIDE>
3559 (match_operand:VDQV_L
1 "register_operand"))
3561 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup
1))
3565 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () /
2;
3566 operands[
2] = aarch64_gen_stepped_int_parallel (nunits,
0,
2);
3567 operands[
3] = aarch64_gen_stepped_int_parallel (nunits,
1,
2);
3571 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3572 [(set (match_operand:<VDBLW>
0 "register_operand" "=w")
3575 (ANY_EXTEND:<V2XWIDE>
3576 (match_operand:VDQV_L
1 "register_operand" "w"))
3577 (match_operand:<V2XWIDE>
2 "vect_par_cnst_even_or_odd_half"))
3578 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup
1))
3579 (match_operand:<V2XWIDE>
3 "vect_par_cnst_even_or_odd_half"))))]
3581 && !rtx_equal_p (operands[
2], operands[
3])"
3582 "<su>addlp
\\t%
0.<Vwhalf>, %
1.<Vtype>"
3583 [(set_attr "type" "neon_reduc_add<q>")]
3586 (define_insn "clrsb<mode>
2<vczle><vczbe>"
3587 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
3588 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "w")))]
3590 "cls
\\t%
0.<Vtype>, %
1.<Vtype>"
3591 [(set_attr "type" "neon_cls<q>")]
3594 (define_insn "clz<mode>
2<vczle><vczbe>"
3595 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
3596 (clz:VDQ_BHSI (match_operand:VDQ_BHSI
1 "register_operand" "w")))]
3598 "clz
\\t%
0.<Vtype>, %
1.<Vtype>"
3599 [(set_attr "type" "neon_cls<q>")]
3602 (define_insn "popcount<mode>
2<vczle><vczbe>"
3603 [(set (match_operand:VB
0 "register_operand" "=w")
3604 (popcount:VB (match_operand:VB
1 "register_operand" "w")))]
3606 "cnt
\\t%
0.<Vbtype>, %
1.<Vbtype>"
3607 [(set_attr "type" "neon_cnt<q>")]
3610 ;; 'across lanes' max and min ops.
3612 ;; Template for outputting a scalar, so we can create __builtins which can be
3613 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
3614 (define_expand "reduc_<optab>_scal_<mode>"
3615 [(match_operand:<VEL>
0 "register_operand")
3616 (unspec:<VEL> [(match_operand:VHSDF
1 "register_operand")]
3620 rtx elt = aarch64_endian_lane_rtx (<MODE>mode,
0);
3621 rtx scratch = gen_reg_rtx (<MODE>mode);
3622 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3624 emit_insn (gen_aarch64_get_lane<mode> (operands[
0], scratch, elt));
3629 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3630 [(match_operand:<VEL>
0 "register_operand")
3631 (unspec:<VEL> [(match_operand:VHSDF
1 "register_operand")]
3635 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[
0], operands[
1]));
3640 ;; Likewise for integer cases, signed and unsigned.
3641 (define_expand "reduc_<optab>_scal_<mode>"
3642 [(match_operand:<VEL>
0 "register_operand")
3643 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI
1 "register_operand")]
3647 rtx elt = aarch64_endian_lane_rtx (<MODE>mode,
0);
3648 rtx scratch = gen_reg_rtx (<MODE>mode);
3649 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3651 emit_insn (gen_aarch64_get_lane<mode> (operands[
0], scratch, elt));
3656 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3657 [(set (match_operand:VDQV_S
0 "register_operand" "=w")
3658 (unspec:VDQV_S [(match_operand:VDQV_S
1 "register_operand" "w")]
3661 "<maxmin_uns_op>v
\\t%<Vetype>
0, %
1.<Vtype>"
3662 [(set_attr "type" "neon_reduc_minmax<q>")]
3665 (define_insn "aarch64_reduc_<optab>_internalv2si"
3666 [(set (match_operand:V2SI
0 "register_operand" "=w")
3667 (unspec:V2SI [(match_operand:V2SI
1 "register_operand" "w")]
3670 "<maxmin_uns_op>p
\\t%
0.2s, %
1.2s, %
1.2s"
3671 [(set_attr "type" "neon_reduc_minmax")]
3674 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3675 [(set (match_operand:VHSDF
0 "register_operand" "=w")
3676 (unspec:VHSDF [(match_operand:VHSDF
1 "register_operand" "w")]
3679 "<maxmin_uns_op><vp>
\\t%<Vetype>
0, %
1.<Vtype>"
3680 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3683 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3685 ;; Operand
1 is the mask, operands
2 and
3 are the bitfields from which
3688 ;; Thus our BSL is of the form:
3689 ;; op0 = bsl (mask, op2, op3)
3690 ;; We can use any of:
3693 ;; bsl mask, op1, op2
3694 ;; if (op0 = op1) (so
1-bits in mask choose bits from op2, else op0)
3695 ;; bit op0, op2, mask
3696 ;; if (op0 = op2) (so
0-bits in mask choose bits from op1, else op0)
3697 ;; bif op0, op1, mask
3699 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3700 ;; Some forms of straight-line code may generate the equivalent form
3701 ;; in *aarch64_simd_bsl<mode>_alt.
3703 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3704 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w,w")
3708 (match_operand:<V_INT_EQUIV>
3 "register_operand" "w,
0,w")
3709 (match_operand:VDQ_I
2 "register_operand" "w,w,
0"))
3710 (match_operand:VDQ_I
1 "register_operand" "
0,w,w"))
3711 (match_dup:<V_INT_EQUIV>
3)
3715 bsl
\\t%
0.<Vbtype>, %
2.<Vbtype>, %
3.<Vbtype>
3716 bit
\\t%
0.<Vbtype>, %
2.<Vbtype>, %
1.<Vbtype>
3717 bif
\\t%
0.<Vbtype>, %
3.<Vbtype>, %
1.<Vbtype>"
3718 [(set_attr "type" "neon_bsl<q>")]
3721 ;; We need this form in addition to the above pattern to match the case
3722 ;; when combine tries merging three insns such that the second operand of
3723 ;; the outer XOR matches the second operand of the inner XOR rather than
3724 ;; the first. The two are equivalent but since recog doesn't try all
3725 ;; permutations of commutative operations, we have to have a separate pattern.
3727 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3728 [(set (match_operand:VDQ_I
0 "register_operand" "=w,w,w")
3732 (match_operand:VDQ_I
3 "register_operand" "w,w,
0")
3733 (match_operand:<V_INT_EQUIV>
2 "register_operand" "w,
0,w"))
3734 (match_operand:VDQ_I
1 "register_operand" "
0,w,w"))
3735 (match_dup:<V_INT_EQUIV>
2)))]
3738 bsl
\\t%
0.<Vbtype>, %
3.<Vbtype>, %
2.<Vbtype>
3739 bit
\\t%
0.<Vbtype>, %
3.<Vbtype>, %
1.<Vbtype>
3740 bif
\\t%
0.<Vbtype>, %
2.<Vbtype>, %
1.<Vbtype>"
3741 [(set_attr "type" "neon_bsl<q>")]
3744 ;; DImode is special, we want to avoid computing operations which are
3745 ;; more naturally computed in general purpose registers in the vector
3746 ;; registers. If we do that, we need to move all three operands from general
3747 ;; purpose registers to vector registers, then back again. However, we
3748 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3749 ;; optimizations based on the component operations of a BSL.
3751 ;; That means we need a splitter back to the individual operations, if they
3752 ;; would be better calculated on the integer side.
3754 (define_insn_and_split "aarch64_simd_bsldi_internal"
3755 [(set (match_operand:DI
0 "register_operand" "=w,w,w,&r")
3759 (match_operand:DI
3 "register_operand" "w,
0,w,r")
3760 (match_operand:DI
2 "register_operand" "w,w,
0,r"))
3761 (match_operand:DI
1 "register_operand" "
0,w,w,r"))
3766 bsl
\\t%
0.8b, %
2.8b, %
3.8b
3767 bit
\\t%
0.8b, %
2.8b, %
1.8b
3768 bif
\\t%
0.8b, %
3.8b, %
1.8b
3770 "&& REG_P (operands[
0]) && GP_REGNUM_P (REGNO (operands[
0]))"
3771 [(match_dup
1) (match_dup
1) (match_dup
2) (match_dup
3)]
3773 /* Split back to individual operations. If we're before reload, and
3774 able to create a temporary register, do so. If we're after reload,
3775 we've got an early-clobber destination register, so use that.
3776 Otherwise, we can't create pseudos and we can't yet guarantee that
3777 operands[
0] is safe to write, so FAIL to split. */
3780 if (reload_completed)
3781 scratch = operands[
0];
3782 else if (can_create_pseudo_p ())
3783 scratch = gen_reg_rtx (DImode);
3787 emit_insn (gen_xordi3 (scratch, operands[
2], operands[
3]));
3788 emit_insn (gen_anddi3 (scratch, scratch, operands[
1]));
3789 emit_insn (gen_xordi3 (operands[
0], scratch, operands[
3]));
3792 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3793 (set_attr "length" "
4,
4,
4,
12")]
3796 (define_insn_and_split "aarch64_simd_bsldi_alt"
3797 [(set (match_operand:DI
0 "register_operand" "=w,w,w,&r")
3801 (match_operand:DI
3 "register_operand" "w,w,
0,r")
3802 (match_operand:DI
2 "register_operand" "w,
0,w,r"))
3803 (match_operand:DI
1 "register_operand" "
0,w,w,r"))
3808 bsl
\\t%
0.8b, %
3.8b, %
2.8b
3809 bit
\\t%
0.8b, %
3.8b, %
1.8b
3810 bif
\\t%
0.8b, %
2.8b, %
1.8b
3812 "&& REG_P (operands[
0]) && GP_REGNUM_P (REGNO (operands[
0]))"
3813 [(match_dup
0) (match_dup
1) (match_dup
2) (match_dup
3)]
3815 /* Split back to individual operations. If we're before reload, and
3816 able to create a temporary register, do so. If we're after reload,
3817 we've got an early-clobber destination register, so use that.
3818 Otherwise, we can't create pseudos and we can't yet guarantee that
3819 operands[
0] is safe to write, so FAIL to split. */
3822 if (reload_completed)
3823 scratch = operands[
0];
3824 else if (can_create_pseudo_p ())
3825 scratch = gen_reg_rtx (DImode);
3829 emit_insn (gen_xordi3 (scratch, operands[
2], operands[
3]));
3830 emit_insn (gen_anddi3 (scratch, scratch, operands[
1]));
3831 emit_insn (gen_xordi3 (operands[
0], scratch, operands[
2]));
3834 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3835 (set_attr "length" "
4,
4,
4,
12")]
3838 (define_expand "aarch64_simd_bsl<mode>"
3839 [(match_operand:VALLDIF
0 "register_operand")
3840 (match_operand:<V_INT_EQUIV>
1 "register_operand")
3841 (match_operand:VALLDIF
2 "register_operand")
3842 (match_operand:VALLDIF
3 "register_operand")]
3845 /* We can't alias operands together if they have different modes. */
3846 rtx tmp = operands[
0];
3847 if (FLOAT_MODE_P (<MODE>mode))
3849 operands[
2] = gen_lowpart (<V_INT_EQUIV>mode, operands[
2]);
3850 operands[
3] = gen_lowpart (<V_INT_EQUIV>mode, operands[
3]);
3851 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3853 operands[
1] = gen_lowpart (<V_INT_EQUIV>mode, operands[
1]);
3854 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3858 if (tmp != operands[
0])
3859 emit_move_insn (operands[
0], gen_lowpart (<MODE>mode, tmp));
3864 (define_expand "vcond_mask_<mode><v_int_equiv>"
3865 [(match_operand:VALLDI
0 "register_operand")
3866 (match_operand:VALLDI
1 "nonmemory_operand")
3867 (match_operand:VALLDI
2 "nonmemory_operand")
3868 (match_operand:<V_INT_EQUIV>
3 "register_operand")]
3871 /* If we have (a = (P) ? -
1 :
0);
3872 Then we can simply move the generated mask (result must be int). */
3873 if (operands[
1] == CONSTM1_RTX (<MODE>mode)
3874 && operands[
2] == CONST0_RTX (<MODE>mode))
3875 emit_move_insn (operands[
0], operands[
3]);
3876 /* Similarly, (a = (P) ?
0 : -
1) is just inverting the generated mask. */
3877 else if (operands[
1] == CONST0_RTX (<MODE>mode)
3878 && operands[
2] == CONSTM1_RTX (<MODE>mode))
3879 emit_insn (gen_one_cmpl<v_int_equiv>
2 (operands[
0], operands[
3]));
3882 if (!REG_P (operands[
1]))
3883 operands[
1] = force_reg (<MODE>mode, operands[
1]);
3884 if (!REG_P (operands[
2]))
3885 operands[
2] = force_reg (<MODE>mode, operands[
2]);
3886 emit_insn (gen_aarch64_simd_bsl<mode> (operands[
0], operands[
3],
3887 operands[
1], operands[
2]));
3893 ;; Patterns comparing two vectors to produce a mask.
3895 (define_expand "vec_cmp<mode><mode>"
3896 [(set (match_operand:VSDQ_I_DI
0 "register_operand")
3897 (match_operator
1 "comparison_operator"
3898 [(match_operand:VSDQ_I_DI
2 "register_operand")
3899 (match_operand:VSDQ_I_DI
3 "nonmemory_operand")]))]
3902 rtx mask = operands[
0];
3903 enum rtx_code code = GET_CODE (operands[
1]);
3913 if (operands[
3] == CONST0_RTX (<MODE>mode))
3918 if (!REG_P (operands[
3]))
3919 operands[
3] = force_reg (<MODE>mode, operands[
3]);
3927 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[
2], operands[
3]));
3931 emit_insn (gen_aarch64_cmge<mode> (mask, operands[
2], operands[
3]));
3935 emit_insn (gen_aarch64_cmle<mode> (mask, operands[
2], operands[
3]));
3939 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[
2], operands[
3]));
3943 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[
3], operands[
2]));
3947 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[
2], operands[
3]));
3951 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[
3], operands[
2]));
3955 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[
2], operands[
3]));
3959 /* Handle NE as !EQ. */
3960 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[
2], operands[
3]));
3961 emit_insn (gen_one_cmpl<v_int_equiv>
2 (mask, mask));
3965 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[
2], operands[
3]));
3975 (define_expand "vec_cmp<mode><v_int_equiv>"
3976 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand")
3977 (match_operator
1 "comparison_operator"
3978 [(match_operand:VDQF
2 "register_operand")
3979 (match_operand:VDQF
3 "nonmemory_operand")]))]
3982 int use_zero_form =
0;
3983 enum rtx_code code = GET_CODE (operands[
1]);
3984 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3986 rtx (*comparison) (rtx, rtx, rtx) = NULL;
3995 if (operands[
3] == CONST0_RTX (<MODE>mode))
4002 if (!REG_P (operands[
3]))
4003 operands[
3] = force_reg (<MODE>mode, operands[
3]);
4013 comparison = gen_aarch64_cmlt<mode>;
4018 std::swap (operands[
2], operands[
3]);
4022 comparison = gen_aarch64_cmgt<mode>;
4027 comparison = gen_aarch64_cmle<mode>;
4032 std::swap (operands[
2], operands[
3]);
4036 comparison = gen_aarch64_cmge<mode>;
4040 comparison = gen_aarch64_cmeq<mode>;
4058 /* All of the above must not raise any FP exceptions. Thus we first
4059 check each operand for NaNs and force any elements containing NaN to
4060 zero before using them in the compare.
4061 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4062 (cm<cc> (isnan (a) ?
0.0 : a,
4063 isnan (b) ?
0.0 : b))
4064 We use the following transformations for doing the comparisions:
4068 a UNLT b -> b GT a. */
4070 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4071 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4072 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4073 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[
2], operands[
2]));
4074 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[
3], operands[
3]));
4075 emit_insn (gen_and<v_int_equiv>
3 (tmp2, tmp0, tmp1));
4076 emit_insn (gen_and<v_int_equiv>
3 (tmp0, tmp0,
4077 lowpart_subreg (<V_INT_EQUIV>mode,
4080 emit_insn (gen_and<v_int_equiv>
3 (tmp1, tmp1,
4081 lowpart_subreg (<V_INT_EQUIV>mode,
4084 gcc_assert (comparison != NULL);
4085 emit_insn (comparison (operands[
0],
4086 lowpart_subreg (<MODE>mode,
4087 tmp0, <V_INT_EQUIV>mode),
4088 lowpart_subreg (<MODE>mode,
4089 tmp1, <V_INT_EQUIV>mode)));
4090 emit_insn (gen_orn<v_int_equiv>
3 (operands[
0], tmp2, operands[
0]));
4100 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
4101 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
4107 a NE b -> ~(a EQ b) */
4108 gcc_assert (comparison != NULL);
4109 emit_insn (comparison (operands[
0], operands[
2], operands[
3]));
4111 emit_insn (gen_one_cmpl<v_int_equiv>
2 (operands[
0], operands[
0]));
4115 /* LTGT is not guranteed to not generate a FP exception. So let's
4116 go the faster way : ((a > b) || (b > a)). */
4117 emit_insn (gen_aarch64_cmgt<mode> (operands[
0],
4118 operands[
2], operands[
3]));
4119 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[
3], operands[
2]));
4120 emit_insn (gen_ior<v_int_equiv>
3 (operands[
0], operands[
0], tmp));
4126 /* cmeq (a, a) & cmeq (b, b). */
4127 emit_insn (gen_aarch64_cmeq<mode> (operands[
0],
4128 operands[
2], operands[
2]));
4129 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[
3], operands[
3]));
4130 emit_insn (gen_and<v_int_equiv>
3 (operands[
0], operands[
0], tmp));
4132 if (code == UNORDERED)
4133 emit_insn (gen_one_cmpl<v_int_equiv>
2 (operands[
0], operands[
0]));
4134 else if (code == UNEQ)
4136 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[
2], operands[
3]));
4137 emit_insn (gen_orn<v_int_equiv>
3 (operands[
0], operands[
0], tmp));
4148 (define_expand "vec_cmpu<mode><mode>"
4149 [(set (match_operand:VSDQ_I_DI
0 "register_operand")
4150 (match_operator
1 "comparison_operator"
4151 [(match_operand:VSDQ_I_DI
2 "register_operand")
4152 (match_operand:VSDQ_I_DI
3 "nonmemory_operand")]))]
4155 emit_insn (gen_vec_cmp<mode><mode> (operands[
0], operands[
1],
4156 operands[
2], operands[
3]));
4160 (define_expand "vcond<mode><mode>"
4161 [(set (match_operand:VALLDI
0 "register_operand")
4162 (if_then_else:VALLDI
4163 (match_operator
3 "comparison_operator"
4164 [(match_operand:VALLDI
4 "register_operand")
4165 (match_operand:VALLDI
5 "nonmemory_operand")])
4166 (match_operand:VALLDI
1 "nonmemory_operand")
4167 (match_operand:VALLDI
2 "nonmemory_operand")))]
4170 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4171 enum rtx_code code = GET_CODE (operands[
3]);
4173 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4174 it as well as switch operands
1/
2 in order to avoid the additional
4178 operands[
3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[
3]),
4179 operands[
4], operands[
5]);
4180 std::swap (operands[
1], operands[
2]);
4182 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[
3],
4183 operands[
4], operands[
5]));
4184 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[
0], operands[
1],
4185 operands[
2], mask));
4190 (define_expand "vcond<v_cmp_mixed><mode>"
4191 [(set (match_operand:<V_cmp_mixed>
0 "register_operand")
4192 (if_then_else:<V_cmp_mixed>
4193 (match_operator
3 "comparison_operator"
4194 [(match_operand:VDQF_COND
4 "register_operand")
4195 (match_operand:VDQF_COND
5 "nonmemory_operand")])
4196 (match_operand:<V_cmp_mixed>
1 "nonmemory_operand")
4197 (match_operand:<V_cmp_mixed>
2 "nonmemory_operand")))]
4200 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4201 enum rtx_code code = GET_CODE (operands[
3]);
4203 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4204 it as well as switch operands
1/
2 in order to avoid the additional
4208 operands[
3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[
3]),
4209 operands[
4], operands[
5]);
4210 std::swap (operands[
1], operands[
2]);
4212 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[
3],
4213 operands[
4], operands[
5]));
4214 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4215 operands[
0], operands[
1],
4216 operands[
2], mask));
4221 (define_expand "vcondu<mode><mode>"
4222 [(set (match_operand:VSDQ_I_DI
0 "register_operand")
4223 (if_then_else:VSDQ_I_DI
4224 (match_operator
3 "comparison_operator"
4225 [(match_operand:VSDQ_I_DI
4 "register_operand")
4226 (match_operand:VSDQ_I_DI
5 "nonmemory_operand")])
4227 (match_operand:VSDQ_I_DI
1 "nonmemory_operand")
4228 (match_operand:VSDQ_I_DI
2 "nonmemory_operand")))]
4231 rtx mask = gen_reg_rtx (<MODE>mode);
4232 enum rtx_code code = GET_CODE (operands[
3]);
4234 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4235 it as well as switch operands
1/
2 in order to avoid the additional
4239 operands[
3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[
3]),
4240 operands[
4], operands[
5]);
4241 std::swap (operands[
1], operands[
2]);
4243 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[
3],
4244 operands[
4], operands[
5]));
4245 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[
0], operands[
1],
4246 operands[
2], mask));
4250 (define_expand "vcondu<mode><v_cmp_mixed>"
4251 [(set (match_operand:VDQF
0 "register_operand")
4253 (match_operator
3 "comparison_operator"
4254 [(match_operand:<V_cmp_mixed>
4 "register_operand")
4255 (match_operand:<V_cmp_mixed>
5 "nonmemory_operand")])
4256 (match_operand:VDQF
1 "nonmemory_operand")
4257 (match_operand:VDQF
2 "nonmemory_operand")))]
4260 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4261 enum rtx_code code = GET_CODE (operands[
3]);
4263 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4264 it as well as switch operands
1/
2 in order to avoid the additional
4268 operands[
3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[
3]),
4269 operands[
4], operands[
5]);
4270 std::swap (operands[
1], operands[
2]);
4272 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4274 operands[
4], operands[
5]));
4275 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[
0], operands[
1],
4276 operands[
2], mask));
4280 ;; Patterns for AArch64 SIMD Intrinsics.
4282 ;; Lane extraction with sign extension to general purpose register.
4283 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4284 [(set (match_operand:GPI
0 "register_operand" "=r")
4286 (vec_select:<VDQQH:VEL>
4287 (match_operand:VDQQH
1 "register_operand" "w")
4288 (parallel [(match_operand:SI
2 "immediate_operand" "i")]))))]
4291 operands[
2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4292 INTVAL (operands[
2]));
4293 return "smov
\\t%<GPI:w>
0, %
1.<VDQQH:Vetype>[%
2]";
4295 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4298 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4299 [(set (match_operand:GPI
0 "register_operand" "=r")
4301 (vec_select:<VDQQH:VEL>
4302 (match_operand:VDQQH
1 "register_operand" "w")
4303 (parallel [(match_operand:SI
2 "immediate_operand" "i")]))))]
4306 operands[
2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4307 INTVAL (operands[
2]));
4308 return "umov
\\t%w0, %
1.<VDQQH:Vetype>[%
2]";
4310 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4313 ;; Lane extraction of a value, neither sign nor zero extension
4314 ;; is guaranteed so upper bits should be considered undefined.
4315 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4316 ;; Extracting lane zero is split into a simple move when it is between SIMD
4317 ;; registers or a store.
4318 (define_insn_and_split "aarch64_get_lane<mode>"
4319 [(set (match_operand:<VEL>
0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4321 (match_operand:VALL_F16
1 "register_operand" "w, w, w")
4322 (parallel [(match_operand:SI
2 "immediate_operand" "i, i, i")])))]
4325 operands[
2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
2]));
4326 switch (which_alternative)
4329 return "umov
\\t%<vwcore>
0, %
1.<Vetype>[%
2]";
4331 return "dup
\\t%<Vetype>
0, %
1.<Vetype>[%
2]";
4333 return "st1
\\t{%
1.<Vetype>}[%
2], %
0";
4338 "&& reload_completed
4339 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[
2])) ==
0"
4340 [(set (match_dup
0) (match_dup
1))]
4342 operands[
1] = aarch64_replace_reg_mode (operands[
1], <VEL>mode);
4344 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4347 (define_insn "*aarch64_get_high<mode>"
4348 [(set (match_operand:<VEL>
0 "aarch64_simd_nonimmediate_operand" "=r")
4350 (match_operand:VQ_2E
1 "register_operand" "w")
4351 (parallel [(match_operand:SI
2 "immediate_operand")])))]
4352 "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[
2])) ==
1"
4354 [(set_attr "type" "f_mrc")]
4357 (define_insn "load_pair_lanes<mode>"
4358 [(set (match_operand:<VDBL>
0 "register_operand" "=w")
4360 (match_operand:VDCSIF
1 "memory_operand" "Utq")
4361 (match_operand:VDCSIF
2 "memory_operand" "m")))]
4363 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[
1], operands[
2])"
4364 "ldr
\\t%<single_dtype>
0, %
1"
4365 [(set_attr "type" "neon_load1_1reg<dblq>")]
4368 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4369 ;; below. The reason for having both of them is that the alternatives of
4370 ;; the later patterns do not have consistent register preferences: the STP
4371 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4372 ;; the GPR form is more natural for scalar integers) whereas the other
4373 ;; alternatives *require* an FPR for operand
1 and prefer one for operand
2.
4375 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4376 ;; which the destination was always memory. On the other hand, expressing
4377 ;; the true preferences makes GPRs seem more palatable than they really are
4378 ;; for register destinations.
4380 ;; Despite that, we do still want the general form to have STP alternatives,
4381 ;; in order to handle cases where a register destination is spilled.
4383 ;; The best compromise therefore seemed to be to have a dedicated STP
4384 ;; pattern to catch cases in which the destination was always memory.
4385 ;; This dedicated pattern must come first.
4387 (define_insn "store_pair_lanes<mode>"
4388 [(set (match_operand:<VDBL>
0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
4390 (match_operand:VDCSIF
1 "register_operand" "w, r")
4391 (match_operand:VDCSIF
2 "register_operand" "w, r")))]
4394 stp
\t%<single_type>
1, %<single_type>
2, %y0
4395 stp
\t%<single_wx>
1, %<single_wx>
2, %y0"
4396 [(set_attr "type" "neon_stp, store_16")]
4399 ;; Form a vector whose least significant half comes from operand
1 and whose
4400 ;; most significant half comes from operand
2. The register alternatives
4401 ;; tie the least significant half to the same register as the destination,
4402 ;; so that only the other half needs to be handled explicitly. For the
4403 ;; reasons given above, the STP alternatives use ? for constraints that
4404 ;; the register alternatives either don't accept or themselves disparage.
4406 (define_insn "*aarch64_combine_internal<mode>"
4407 [(set (match_operand:<VDBL>
0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
4409 (match_operand:VDCSIF
1 "register_operand" "
0,
0,
0,
0, ?w, ?r")
4410 (match_operand:VDCSIF
2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, w, ?r")))]
4412 && !BYTES_BIG_ENDIAN
4413 && (register_operand (operands[
0], <VDBL>mode)
4414 || register_operand (operands[
2], <MODE>mode))"
4416 ins
\t%
0.<single_type>[
1], %
2.<single_type>[
0]
4417 ins
\t%
0.<single_type>[
1], %<single_wx>
2
4419 ld1
\t{%
0.<single_type>}[
1], %
2
4420 stp
\t%<single_type>
1, %<single_type>
2, %y0
4421 stp
\t%<single_wx>
1, %<single_wx>
2, %y0"
4422 [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr,
4423 neon_load1_one_lane<dblq>, neon_stp, store_16")
4424 (set_attr "arch" "simd,simd,*,simd,*,*")]
4427 (define_insn "*aarch64_combine_internal_be<mode>"
4428 [(set (match_operand:<VDBL>
0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
4430 (match_operand:VDCSIF
2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, ?w, ?r")
4431 (match_operand:VDCSIF
1 "register_operand" "
0,
0,
0,
0, ?w, ?r")))]
4434 && (register_operand (operands[
0], <VDBL>mode)
4435 || register_operand (operands[
2], <MODE>mode))"
4437 ins
\t%
0.<single_type>[
1], %
2.<single_type>[
0]
4438 ins
\t%
0.<single_type>[
1], %<single_wx>
2
4440 ld1
\t{%
0.<single_type>}[
1], %
2
4441 stp
\t%<single_type>
2, %<single_type>
1, %y0
4442 stp
\t%<single_wx>
2, %<single_wx>
1, %y0"
4443 [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr, neon_load1_one_lane<dblq>, neon_stp, store_16")
4444 (set_attr "arch" "simd,simd,*,simd,*,*")]
4447 ;; In this insn, operand
1 should be low, and operand
2 the high part of the
4450 (define_insn "*aarch64_combinez<mode>"
4451 [(set (match_operand:<VDBL>
0 "register_operand" "=w,w,w")
4453 (match_operand:VDCSIF
1 "nonimmediate_operand" "w,?r,m")
4454 (match_operand:VDCSIF
2 "aarch64_simd_or_scalar_imm_zero")))]
4455 "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4457 fmov
\\t%<single_type>
0, %<single_type>
1
4458 fmov
\t%<single_type>
0, %<single_wx>
1
4459 ldr
\\t%<single_type>
0, %
1"
4460 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
4463 (define_insn "*aarch64_combinez_be<mode>"
4464 [(set (match_operand:<VDBL>
0 "register_operand" "=w,w,w")
4466 (match_operand:VDCSIF
2 "aarch64_simd_or_scalar_imm_zero")
4467 (match_operand:VDCSIF
1 "nonimmediate_operand" "w,?r,m")))]
4468 "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4470 fmov
\\t%<single_type>
0, %<single_type>
1
4471 fmov
\t%<single_type>
0, %<single_wx>
1
4472 ldr
\\t%<single_type>
0, %
1"
4473 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
4476 ;; Form a vector whose first half (in array order) comes from operand
1
4477 ;; and whose second half (in array order) comes from operand
2.
4478 ;; This operand order follows the RTL vec_concat operation.
4479 (define_expand "@aarch64_vec_concat<mode>"
4480 [(set (match_operand:<VDBL>
0 "register_operand")
4482 (match_operand:VDCSIF
1 "general_operand")
4483 (match_operand:VDCSIF
2 "general_operand")))]
4486 int lo = BYTES_BIG_ENDIAN ?
2 :
1;
4487 int hi = BYTES_BIG_ENDIAN ?
1 :
2;
4489 if (MEM_P (operands[
1])
4490 && MEM_P (operands[
2])
4491 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[
1], operands[
2]))
4492 /* Use load_pair_lanes<mode>. */
4494 else if (operands[hi] == CONST0_RTX (<MODE>mode))
4496 /* Use *aarch64_combinez<mode>. */
4497 if (!nonimmediate_operand (operands[lo], <MODE>mode))
4498 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4502 /* Use *aarch64_combine_internal<mode>. */
4503 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4504 if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4506 if (MEM_P (operands[hi]))
4508 rtx addr = force_reg (Pmode, XEXP (operands[hi],
0));
4509 operands[hi] = replace_equiv_address (operands[hi], addr);
4512 operands[hi] = force_reg (<MODE>mode, operands[hi]);
4517 ;; Form a vector whose least significant half comes from operand
1 and whose
4518 ;; most significant half comes from operand
2. This operand order follows
4519 ;; arm_neon.h vcombine* intrinsics.
4520 (define_expand "aarch64_combine<mode>"
4521 [(match_operand:<VDBL>
0 "register_operand")
4522 (match_operand:VDC
1 "general_operand")
4523 (match_operand:VDC
2 "general_operand")]
4526 if (BYTES_BIG_ENDIAN)
4527 std::swap (operands[
1], operands[
2]);
4528 emit_insn (gen_aarch64_vec_concat<mode> (operands[
0], operands[
1],
4534 ;; <su><addsub>l<q>.
4536 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4537 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4538 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4539 (match_operand:VQW
1 "register_operand" "w")
4540 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
4541 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4542 (match_operand:VQW
2 "register_operand" "w")
4545 "<ANY_EXTEND:su><ADDSUB:optab>l2
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
4546 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4549 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4550 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4551 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4552 (match_operand:VQW
1 "register_operand" "w")
4553 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
4554 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4555 (match_operand:VQW
2 "register_operand" "w")
4558 "<ANY_EXTEND:su><ADDSUB:optab>l
\t%
0.<Vwtype>, %
1.<Vhalftype>, %
2.<Vhalftype>"
4559 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4562 (define_expand "vec_widen_<su>add_lo_<mode>"
4563 [(match_operand:<VWIDE>
0 "register_operand")
4564 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))
4565 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand"))]
4568 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4569 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[
0], operands[
1],
4574 (define_expand "vec_widen_<su>add_hi_<mode>"
4575 [(match_operand:<VWIDE>
0 "register_operand")
4576 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))
4577 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand"))]
4580 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4581 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[
0], operands[
1],
4586 (define_expand "vec_widen_<su>sub_lo_<mode>"
4587 [(match_operand:<VWIDE>
0 "register_operand")
4588 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))
4589 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand"))]
4592 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4593 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[
0], operands[
1],
4598 (define_expand "vec_widen_<su>sub_hi_<mode>"
4599 [(match_operand:<VWIDE>
0 "register_operand")
4600 (ANY_EXTEND:<VWIDE> (match_operand:VQW
1 "register_operand"))
4601 (ANY_EXTEND:<VWIDE> (match_operand:VQW
2 "register_operand"))]
4604 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4605 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[
0], operands[
1],
4610 (define_expand "aarch64_saddl2<mode>"
4611 [(match_operand:<VWIDE>
0 "register_operand")
4612 (match_operand:VQW
1 "register_operand")
4613 (match_operand:VQW
2 "register_operand")]
4616 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4617 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[
0], operands[
1],
4622 (define_expand "aarch64_uaddl2<mode>"
4623 [(match_operand:<VWIDE>
0 "register_operand")
4624 (match_operand:VQW
1 "register_operand")
4625 (match_operand:VQW
2 "register_operand")]
4628 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4629 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[
0], operands[
1],
4634 (define_expand "aarch64_ssubl2<mode>"
4635 [(match_operand:<VWIDE>
0 "register_operand")
4636 (match_operand:VQW
1 "register_operand")
4637 (match_operand:VQW
2 "register_operand")]
4640 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4641 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[
0], operands[
1],
4646 (define_expand "aarch64_usubl2<mode>"
4647 [(match_operand:<VWIDE>
0 "register_operand")
4648 (match_operand:VQW
1 "register_operand")
4649 (match_operand:VQW
2 "register_operand")]
4652 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4653 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[
0], operands[
1],
4658 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4659 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4660 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4661 (match_operand:VD_BHSI
1 "register_operand" "w"))
4663 (match_operand:VD_BHSI
2 "register_operand" "w"))))]
4665 "<ANY_EXTEND:su><ADDSUB:optab>l
\t%
0.<Vwtype>, %
1.<Vtype>, %
2.<Vtype>"
4666 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4669 ;; <su><addsub>w<q>.
4671 (define_expand "widen_ssum<mode>
3"
4672 [(set (match_operand:<VDBLW>
0 "register_operand")
4673 (plus:<VDBLW> (sign_extend:<VDBLW>
4674 (match_operand:VQW
1 "register_operand"))
4675 (match_operand:<VDBLW>
2 "register_operand")))]
4678 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4679 rtx temp = gen_reg_rtx (GET_MODE (operands[
0]));
4681 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[
2],
4683 emit_insn (gen_aarch64_saddw2<mode> (operands[
0], temp, operands[
1]));
4688 (define_expand "widen_ssum<mode>
3"
4689 [(set (match_operand:<VWIDE>
0 "register_operand")
4690 (plus:<VWIDE> (sign_extend:<VWIDE>
4691 (match_operand:VD_BHSI
1 "register_operand"))
4692 (match_operand:<VWIDE>
2 "register_operand")))]
4695 emit_insn (gen_aarch64_saddw<mode> (operands[
0], operands[
2], operands[
1]));
4699 (define_expand "widen_usum<mode>
3"
4700 [(set (match_operand:<VDBLW>
0 "register_operand")
4701 (plus:<VDBLW> (zero_extend:<VDBLW>
4702 (match_operand:VQW
1 "register_operand"))
4703 (match_operand:<VDBLW>
2 "register_operand")))]
4706 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4707 rtx temp = gen_reg_rtx (GET_MODE (operands[
0]));
4709 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[
2],
4711 emit_insn (gen_aarch64_uaddw2<mode> (operands[
0], temp, operands[
1]));
4716 (define_expand "widen_usum<mode>
3"
4717 [(set (match_operand:<VWIDE>
0 "register_operand")
4718 (plus:<VWIDE> (zero_extend:<VWIDE>
4719 (match_operand:VD_BHSI
1 "register_operand"))
4720 (match_operand:<VWIDE>
2 "register_operand")))]
4723 emit_insn (gen_aarch64_uaddw<mode> (operands[
0], operands[
2], operands[
1]));
4727 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4728 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4729 (minus:<VWIDE> (match_operand:<VWIDE>
1 "register_operand" "w")
4731 (match_operand:VD_BHSI
2 "register_operand" "w"))))]
4733 "<ANY_EXTEND:su>subw
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vtype>"
4734 [(set_attr "type" "neon_sub_widen")]
4737 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4738 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4739 (minus:<VWIDE> (match_operand:<VWIDE>
1 "register_operand" "w")
4742 (match_operand:VQW
2 "register_operand" "w")
4743 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))))]
4745 "<ANY_EXTEND:su>subw
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vhalftype>"
4746 [(set_attr "type" "neon_sub_widen")]
4749 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4750 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4751 (minus:<VWIDE> (match_operand:<VWIDE>
1 "register_operand" "w")
4754 (match_operand:VQW
2 "register_operand" "w")
4755 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))))]
4757 "<ANY_EXTEND:su>subw2
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vtype>"
4758 [(set_attr "type" "neon_sub_widen")]
4761 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4762 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4764 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI
2 "register_operand" "w"))
4765 (match_operand:<VWIDE>
1 "register_operand" "w")))]
4767 "<ANY_EXTEND:su>addw
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vtype>"
4768 [(set_attr "type" "neon_add_widen")]
4771 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4772 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4776 (match_operand:VQW
2 "register_operand" "w")
4777 (match_operand:VQW
3 "vect_par_cnst_lo_half" "")))
4778 (match_operand:<VWIDE>
1 "register_operand" "w")))]
4780 "<ANY_EXTEND:su>addw
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vhalftype>"
4781 [(set_attr "type" "neon_add_widen")]
4784 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4785 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
4789 (match_operand:VQW
2 "register_operand" "w")
4790 (match_operand:VQW
3 "vect_par_cnst_hi_half" "")))
4791 (match_operand:<VWIDE>
1 "register_operand" "w")))]
4793 "<ANY_EXTEND:su>addw2
\\t%
0.<Vwtype>, %
1.<Vwtype>, %
2.<Vtype>"
4794 [(set_attr "type" "neon_add_widen")]
4797 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4798 [(set (match_operand:<VWIDE>
0 "register_operand")
4802 (match_operand:VQW
2 "register_operand")
4804 (match_operand:<VWIDE>
1 "register_operand")))]
4807 /* We still do an emit_insn rather than relying on the pattern above
4808 because for the MINUS case the operands would need to be swapped
4811 = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4812 emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4820 ;; <su><r>h<addsub>.
4822 (define_expand "<su_optab>avg<mode>
3_floor"
4823 [(set (match_operand:VDQ_BHSI
0 "register_operand")
4827 (ANY_EXTEND:<V2XWIDE>
4828 (match_operand:VDQ_BHSI
1 "register_operand"))
4829 (ANY_EXTEND:<V2XWIDE>
4830 (match_operand:VDQ_BHSI
2 "register_operand")))
4834 operands[
3] = CONST1_RTX (<V2XWIDE>mode);
4838 (define_expand "<su_optab>avg<mode>
3_ceil"
4839 [(set (match_operand:VDQ_BHSI
0 "register_operand")
4844 (ANY_EXTEND:<V2XWIDE>
4845 (match_operand:VDQ_BHSI
1 "register_operand"))
4846 (ANY_EXTEND:<V2XWIDE>
4847 (match_operand:VDQ_BHSI
2 "register_operand")))
4852 operands[
3] = CONST1_RTX (<V2XWIDE>mode);
4856 (define_expand "aarch64_<su>hsub<mode>"
4857 [(set (match_operand:VDQ_BHSI
0 "register_operand")
4861 (ANY_EXTEND:<V2XWIDE>
4862 (match_operand:VDQ_BHSI
1 "register_operand"))
4863 (ANY_EXTEND:<V2XWIDE>
4864 (match_operand:VDQ_BHSI
2 "register_operand")))
4868 operands[
3] = CONST1_RTX (<V2XWIDE>mode);
4872 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4873 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
4877 (ANY_EXTEND:<V2XWIDE>
4878 (match_operand:VDQ_BHSI
1 "register_operand" "w"))
4879 (ANY_EXTEND:<V2XWIDE>
4880 (match_operand:VDQ_BHSI
2 "register_operand" "w")))
4881 (match_operand:<V2XWIDE>
3 "aarch64_simd_imm_one"))))]
4883 "<su>h<ADDSUB:optab>
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
4884 [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4887 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4888 [(set (match_operand:VDQ_BHSI
0 "register_operand" "=w")
4893 (ANY_EXTEND:<V2XWIDE>
4894 (match_operand:VDQ_BHSI
1 "register_operand" "w"))
4895 (ANY_EXTEND:<V2XWIDE>
4896 (match_operand:VDQ_BHSI
2 "register_operand" "w")))
4897 (match_operand:<V2XWIDE>
3 "aarch64_simd_imm_one"))
4900 "<su>rhadd
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
4901 [(set_attr "type" "neon_add_halve<q>")]
4904 ;; <r><addsub>hn<q>.
4906 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4907 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
4908 (truncate:<VNARROWQ>
4910 (ADDSUB:VQN (match_operand:VQN
1 "register_operand" "w")
4911 (match_operand:VQN
2 "register_operand" "w"))
4912 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4914 "<optab>hn
\\t%
0.<Vntype>, %
1.<Vtype>, %
2.<Vtype>"
4915 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4918 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4919 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
4920 (truncate:<VNARROWQ>
4923 (ADDSUB:VQN (match_operand:VQN
1 "register_operand" "w")
4924 (match_operand:VQN
2 "register_operand" "w"))
4925 (match_operand:VQN
3 "aarch64_simd_raddsubhn_imm_vec"))
4926 (match_operand:VQN
4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4928 "r<optab>hn
\\t%
0.<Vntype>, %
1.<Vtype>, %
2.<Vtype>"
4929 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4932 (define_expand "aarch64_<optab>hn<mode>"
4933 [(set (match_operand:<VNARROWQ>
0 "register_operand")
4934 (ADDSUB:VQN (match_operand:VQN
1 "register_operand")
4935 (match_operand:VQN
2 "register_operand")))]
4939 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4940 GET_MODE_UNIT_BITSIZE (<MODE>mode) /
2);
4941 emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[
0], operands[
1],
4942 operands[
2], shft));
4947 (define_expand "aarch64_r<optab>hn<mode>"
4948 [(set (match_operand:<VNARROWQ>
0 "register_operand")
4949 (ADDSUB:VQN (match_operand:VQN
1 "register_operand")
4950 (match_operand:VQN
2 "register_operand")))]
4954 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4955 GET_MODE_UNIT_BITSIZE (<MODE>mode) /
2);
4957 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4958 HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) /
2 -
1));
4959 emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[
0], operands[
1],
4960 operands[
2], rnd, shft));
4965 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4966 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
4967 (vec_concat:<VNARROWQ2>
4968 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
4969 (truncate:<VNARROWQ>
4971 (ADDSUB:VQN (match_operand:VQN
2 "register_operand" "w")
4972 (match_operand:VQN
3 "register_operand" "w"))
4973 (match_operand:VQN
4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4974 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4975 "<optab>hn2
\\t%
0.<V2ntype>, %
2.<Vtype>, %
3.<Vtype>"
4976 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4979 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4980 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
4981 (vec_concat:<VNARROWQ2>
4982 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
4983 (truncate:<VNARROWQ>
4986 (ADDSUB:VQN (match_operand:VQN
2 "register_operand" "w")
4987 (match_operand:VQN
3 "register_operand" "w"))
4988 (match_operand:VQN
4 "aarch64_simd_raddsubhn_imm_vec"))
4989 (match_operand:VQN
5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4990 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4991 "r<optab>hn2
\\t%
0.<V2ntype>, %
2.<Vtype>, %
3.<Vtype>"
4992 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4995 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4996 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
4997 (vec_concat:<VNARROWQ2>
4998 (truncate:<VNARROWQ>
5000 (ADDSUB:VQN (match_operand:VQN
2 "register_operand" "w")
5001 (match_operand:VQN
3 "register_operand" "w"))
5002 (match_operand:VQN
4 "aarch64_simd_shift_imm_vec_exact_top")))
5003 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
5004 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5005 "<optab>hn2
\\t%
0.<V2ntype>, %
2.<Vtype>, %
3.<Vtype>"
5006 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
5009 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
5010 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
5011 (vec_concat:<VNARROWQ2>
5012 (truncate:<VNARROWQ>
5015 (ADDSUB:VQN (match_operand:VQN
2 "register_operand" "w")
5016 (match_operand:VQN
3 "register_operand" "w"))
5017 (match_operand:VQN
4 "aarch64_simd_raddsubhn_imm_vec"))
5018 (match_operand:VQN
5 "aarch64_simd_shift_imm_vec_exact_top")))
5019 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
5020 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5021 "r<optab>hn2
\\t%
0.<V2ntype>, %
2.<Vtype>, %
3.<Vtype>"
5022 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
5025 (define_expand "aarch64_<optab>hn2<mode>"
5026 [(match_operand:<VNARROWQ2>
0 "register_operand")
5027 (match_operand:<VNARROWQ>
1 "register_operand")
5028 (ADDSUB:VQN (match_operand:VQN
2 "register_operand")
5029 (match_operand:VQN
3 "register_operand"))]
5033 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5034 GET_MODE_UNIT_BITSIZE (<MODE>mode) /
2);
5035 if (BYTES_BIG_ENDIAN)
5036 emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[
0],
5037 operands[
1], operands[
2], operands[
3], shft));
5039 emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[
0],
5040 operands[
1], operands[
2], operands[
3], shft));
5045 (define_expand "aarch64_r<optab>hn2<mode>"
5046 [(match_operand:<VNARROWQ2>
0 "register_operand")
5047 (match_operand:<VNARROWQ>
1 "register_operand")
5048 (ADDSUB:VQN (match_operand:VQN
2 "register_operand")
5049 (match_operand:VQN
3 "register_operand"))]
5053 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5054 GET_MODE_UNIT_BITSIZE (<MODE>mode) /
2);
5056 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5057 HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) /
2 -
1));
5058 if (BYTES_BIG_ENDIAN)
5059 emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[
0],
5060 operands[
1], operands[
2], operands[
3], rnd, shft));
5062 emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[
0],
5063 operands[
1], operands[
2], operands[
3], rnd, shft));
5068 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5069 (define_insn_and_split "*bitmask_shift_plus<mode>"
5070 [(set (match_operand:VQN
0 "register_operand" "=&w")
5073 (plus:VQN (match_operand:VQN
1 "register_operand" "w")
5074 (match_operand:VQN
2 "register_operand" "w"))
5075 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5076 (match_operand:VQN
4 "register_operand" "w")))]
5083 if (can_create_pseudo_p ())
5084 tmp = gen_reg_rtx (<VNARROWQ>mode);
5086 tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[
0]));
5087 emit_insn (gen_aarch64_addhn<mode> (tmp, operands[
1], operands[
2]));
5088 emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[
0], operands[
4], tmp));
5094 (define_insn "aarch64_pmul<mode>"
5095 [(set (match_operand:VB
0 "register_operand" "=w")
5096 (unspec:VB [(match_operand:VB
1 "register_operand" "w")
5097 (match_operand:VB
2 "register_operand" "w")]
5100 "pmul
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
5101 [(set_attr "type" "neon_mul_<Vetype><q>")]
5104 (define_insn "aarch64_pmullv8qi"
5105 [(set (match_operand:V8HI
0 "register_operand" "=w")
5106 (unspec:V8HI [(match_operand:V8QI
1 "register_operand" "w")
5107 (match_operand:V8QI
2 "register_operand" "w")]
5110 "pmull
\\t%
0.8h, %
1.8b, %
2.8b"
5111 [(set_attr "type" "neon_mul_b_long")]
5114 (define_insn "aarch64_pmull_hiv16qi_insn"
5115 [(set (match_operand:V8HI
0 "register_operand" "=w")
5118 (match_operand:V16QI
1 "register_operand" "w")
5119 (match_operand:V16QI
3 "vect_par_cnst_hi_half" ""))
5121 (match_operand:V16QI
2 "register_operand" "w")
5125 "pmull2
\\t%
0.8h, %
1.16b, %
2.16b"
5126 [(set_attr "type" "neon_mul_b_long")]
5129 (define_expand "aarch64_pmull_hiv16qi"
5130 [(match_operand:V8HI
0 "register_operand")
5131 (match_operand:V16QI
1 "register_operand")
5132 (match_operand:V16QI
2 "register_operand")]
5135 rtx p = aarch64_simd_vect_par_cnst_half (V16QImode,
16, true);
5136 emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[
0], operands[
1],
5144 (define_insn "aarch64_fmulx<mode>"
5145 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
5147 [(match_operand:VHSDF_HSDF
1 "register_operand" "w")
5148 (match_operand:VHSDF_HSDF
2 "register_operand" "w")]
5151 "fmulx
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
5152 [(set_attr "type" "neon_fp_mul_<stype>")]
5155 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5157 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5158 [(set (match_operand:VDQSF
0 "register_operand" "=w")
5160 [(match_operand:VDQSF
1 "register_operand" "w")
5161 (vec_duplicate:VDQSF
5163 (match_operand:<VSWAP_WIDTH>
2 "register_operand" "w")
5164 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))]
5168 operands[
3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[
3]));
5169 return "fmulx
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
5171 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5174 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5176 (define_insn "*aarch64_mulx_elt<mode>"
5177 [(set (match_operand:VDQF
0 "register_operand" "=w")
5179 [(match_operand:VDQF
1 "register_operand" "w")
5182 (match_operand:VDQF
2 "register_operand" "w")
5183 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))]
5187 operands[
3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
3]));
5188 return "fmulx
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
5190 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5195 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5196 [(set (match_operand:VHSDF
0 "register_operand" "=w")
5198 [(match_operand:VHSDF
1 "register_operand" "w")
5199 (vec_duplicate:VHSDF
5200 (match_operand:<VEL>
2 "register_operand" "<h_con>"))]
5203 "fmulx
\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[
0]";
5204 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5207 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5208 ;; vmulxd_lane_f64 == vmulx_lane_f64
5209 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5211 (define_insn "*aarch64_vgetfmulx<mode>"
5212 [(set (match_operand:<VEL>
0 "register_operand" "=w")
5214 [(match_operand:<VEL>
1 "register_operand" "w")
5216 (match_operand:VDQF
2 "register_operand" "w")
5217 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
5221 operands[
3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
3]));
5222 return "fmulx
\t%<Vetype>
0, %<Vetype>
1, %
2.<Vetype>[%
3]";
5224 [(set_attr "type" "fmul<Vetype>")]
5228 (define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
5229 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
5230 (BINQOPS:VSDQ_I (match_operand:VSDQ_I
1 "register_operand" "w")
5231 (match_operand:VSDQ_I
2 "register_operand" "w")))]
5233 "<su_optab>q<addsub>
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
5234 [(set_attr "type" "neon_q<addsub><q>")]
5237 ;; suqadd and usqadd
5239 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5240 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
5241 (unspec:VSDQ_I [(match_operand:VSDQ_I
1 "register_operand" "
0")
5242 (match_operand:VSDQ_I
2 "register_operand" "w")]
5245 "<sur>qadd
\\t%<v>
0<Vmtype>, %<v>
2<Vmtype>"
5246 [(set_attr "type" "neon_qadd<q>")]
5249 ;; sqmovn and uqmovn
5251 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5252 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
5253 (SAT_TRUNC:<VNARROWQ>
5254 (match_operand:SD_HSDI
1 "register_operand" "w")))]
5256 "<su>qxtn
\\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>"
5257 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5260 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5261 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
5262 (SAT_TRUNC:<VNARROWQ>
5263 (match_operand:VQN
1 "register_operand" "w")))]
5265 "<su>qxtn
\\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>"
5266 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5269 (define_insn "aarch64_<su>qxtn2<mode>_le"
5270 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
5271 (vec_concat:<VNARROWQ2>
5272 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
5273 (SAT_TRUNC:<VNARROWQ>
5274 (match_operand:VQN
2 "register_operand" "w"))))]
5275 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5276 "<su>qxtn2
\\t%
0.<V2ntype>, %
2.<Vtype>"
5277 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5280 (define_insn "aarch64_<su>qxtn2<mode>_be"
5281 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
5282 (vec_concat:<VNARROWQ2>
5283 (SAT_TRUNC:<VNARROWQ>
5284 (match_operand:VQN
2 "register_operand" "w"))
5285 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
5286 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5287 "<su>qxtn2
\\t%
0.<V2ntype>, %
2.<Vtype>"
5288 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5291 (define_expand "aarch64_<su>qxtn2<mode>"
5292 [(match_operand:<VNARROWQ2>
0 "register_operand")
5293 (match_operand:<VNARROWQ>
1 "register_operand")
5294 (SAT_TRUNC:<VNARROWQ>
5295 (match_operand:VQN
2 "register_operand"))]
5298 if (BYTES_BIG_ENDIAN)
5299 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[
0], operands[
1],
5302 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[
0], operands[
1],
5310 (define_insn "aarch64_sqmovun<mode>"
5311 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
5312 (truncate:<VNARROWQ>
5315 (match_operand:SD_HSDI
1 "register_operand" "w")
5317 (const_int <half_mask>))))]
5319 "sqxtun
\\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>"
5320 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5323 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5324 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
5325 (truncate:<VNARROWQ>
5327 (smax:VQN (match_operand:VQN
1 "register_operand" "w")
5328 (match_operand:VQN
2 "aarch64_simd_or_scalar_imm_zero"))
5329 (match_operand:VQN
3 "aarch64_simd_umax_half_mode"))))]
5331 "sqxtun
\\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>"
5332 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5335 (define_expand "aarch64_sqmovun<mode>"
5336 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
5337 (truncate:<VNARROWQ>
5339 (smax:VQN (match_operand:VQN
1 "register_operand" "w")
5344 operands[
2] = CONST0_RTX (<MODE>mode);
5346 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5347 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5351 (define_insn "aarch64_sqxtun2<mode>_le"
5352 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
5353 (vec_concat:<VNARROWQ2>
5354 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
5355 (truncate:<VNARROWQ>
5358 (match_operand:VQN
2 "register_operand" "w")
5359 (match_operand:VQN
3 "aarch64_simd_or_scalar_imm_zero"))
5360 (match_operand:VQN
4 "aarch64_simd_umax_half_mode")))))]
5361 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5362 "sqxtun2
\\t%
0.<V2ntype>, %
2.<Vtype>"
5363 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5366 (define_insn "aarch64_sqxtun2<mode>_be"
5367 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
5368 (vec_concat:<VNARROWQ2>
5369 (truncate:<VNARROWQ>
5372 (match_operand:VQN
2 "register_operand" "w")
5373 (match_operand:VQN
3 "aarch64_simd_or_scalar_imm_zero"))
5374 (match_operand:VQN
4 "aarch64_simd_umax_half_mode")))
5375 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
5376 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5377 "sqxtun2
\\t%
0.<V2ntype>, %
2.<Vtype>"
5378 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5381 (define_expand "aarch64_sqxtun2<mode>"
5382 [(match_operand:<VNARROWQ2>
0 "register_operand")
5383 (match_operand:<VNARROWQ>
1 "register_operand")
5384 (match_operand:VQN
2 "register_operand")]
5387 rtx zeros = CONST0_RTX (<MODE>mode);
5388 rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5389 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5390 if (BYTES_BIG_ENDIAN)
5391 emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[
0], operands[
1],
5392 operands[
2], zeros, half_umax));
5394 emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[
0], operands[
1],
5395 operands[
2], zeros, half_umax));
5402 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5403 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
5405 (match_operand:VSDQ_I
1 "register_operand" "w")))]
5407 "s<optab>
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>"
5408 [(set_attr "type" "neon_<optab><q>")]
5413 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5414 [(set (match_operand:VSDQ_HSI
0 "register_operand" "=w")
5416 [(match_operand:VSDQ_HSI
1 "register_operand" "w")
5417 (match_operand:VSDQ_HSI
2 "register_operand" "w")]
5420 "sq<r>dmulh
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
5421 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5424 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5425 [(set (match_operand:VDQHS
0 "register_operand" "=w")
5427 [(match_operand:VDQHS
1 "register_operand" "w")
5428 (vec_duplicate:VDQHS
5429 (match_operand:<VEL>
2 "register_operand" "<h_con>"))]
5432 "sq<r>dmulh
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[
0]"
5433 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5438 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5439 [(set (match_operand:VDQHS
0 "register_operand" "=w")
5441 [(match_operand:VDQHS
1 "register_operand" "w")
5443 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
5444 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
5448 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
5449 return
\"sq<r>dmulh
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[%
3]
\";"
5450 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5453 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5454 [(set (match_operand:VDQHS
0 "register_operand" "=w")
5456 [(match_operand:VDQHS
1 "register_operand" "w")
5458 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
5459 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
5463 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
5464 return
\"sq<r>dmulh
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vetype>[%
3]
\";"
5465 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5468 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5469 [(set (match_operand:SD_HSI
0 "register_operand" "=w")
5471 [(match_operand:SD_HSI
1 "register_operand" "w")
5473 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
5474 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
5478 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
5479 return
\"sq<r>dmulh
\\t%<v>
0, %<v>
1, %
2.<v>[%
3]
\";"
5480 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5483 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5484 [(set (match_operand:SD_HSI
0 "register_operand" "=w")
5486 [(match_operand:SD_HSI
1 "register_operand" "w")
5488 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
5489 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))]
5493 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
5494 return
\"sq<r>dmulh
\\t%<v>
0, %<v>
1, %
2.<v>[%
3]
\";"
5495 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5500 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5501 [(set (match_operand:VSDQ_HSI
0 "register_operand" "=w")
5503 [(match_operand:VSDQ_HSI
1 "register_operand" "
0")
5504 (match_operand:VSDQ_HSI
2 "register_operand" "w")
5505 (match_operand:VSDQ_HSI
3 "register_operand" "w")]
5508 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%<v>
0<Vmtype>, %<v>
2<Vmtype>, %<v>
3<Vmtype>"
5509 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5512 ;; sqrdml[as]h_lane.
5514 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5515 [(set (match_operand:VDQHS
0 "register_operand" "=w")
5517 [(match_operand:VDQHS
1 "register_operand" "
0")
5518 (match_operand:VDQHS
2 "register_operand" "w")
5520 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
5521 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))]
5525 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
5527 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vetype>[%
4]";
5529 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5532 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5533 [(set (match_operand:SD_HSI
0 "register_operand" "=w")
5535 [(match_operand:SD_HSI
1 "register_operand" "
0")
5536 (match_operand:SD_HSI
2 "register_operand" "w")
5538 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
5539 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))]
5543 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
5545 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%<v>
0, %<v>
2, %
3.<Vetype>[%
4]";
5547 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5550 ;; sqrdml[as]h_laneq.
5552 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5553 [(set (match_operand:VDQHS
0 "register_operand" "=w")
5555 [(match_operand:VDQHS
1 "register_operand" "
0")
5556 (match_operand:VDQHS
2 "register_operand" "w")
5558 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
5559 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))]
5563 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
5565 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%
0.<Vtype>, %
2.<Vtype>, %
3.<Vetype>[%
4]";
5567 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5570 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5571 [(set (match_operand:SD_HSI
0 "register_operand" "=w")
5573 [(match_operand:SD_HSI
1 "register_operand" "
0")
5574 (match_operand:SD_HSI
2 "register_operand" "w")
5576 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
5577 (parallel [(match_operand:SI
4 "immediate_operand" "i")]))]
5581 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
5583 "sqrdml<SQRDMLH_AS:rdma_as>h
\\t%<v>
0, %<v>
2, %
3.<v>[%
4]";
5585 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5590 (define_insn "aarch64_sqdmlal<mode>"
5591 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5595 (sign_extend:<VWIDE>
5596 (match_operand:VSD_HSI
2 "register_operand" "w"))
5597 (sign_extend:<VWIDE>
5598 (match_operand:VSD_HSI
3 "register_operand" "w")))
5600 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
5602 "sqdmlal
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %<v>
3<Vmtype>"
5603 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5606 (define_insn "aarch64_sqdmlsl<mode>"
5607 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5609 (match_operand:<VWIDE>
1 "register_operand" "
0")
5612 (sign_extend:<VWIDE>
5613 (match_operand:VSD_HSI
2 "register_operand" "w"))
5614 (sign_extend:<VWIDE>
5615 (match_operand:VSD_HSI
3 "register_operand" "w")))
5618 "sqdmlsl
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %<v>
3<Vmtype>"
5619 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5624 (define_insn "aarch64_sqdmlal_lane<mode>"
5625 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5629 (sign_extend:<VWIDE>
5630 (match_operand:VD_HSI
2 "register_operand" "w"))
5631 (vec_duplicate:<VWIDE>
5632 (sign_extend:<VWIDE_S>
5634 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
5635 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
5638 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
5641 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
5643 "sqdmlal
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5645 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5648 (define_insn "aarch64_sqdmlsl_lane<mode>"
5649 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5651 (match_operand:<VWIDE>
1 "register_operand" "
0")
5654 (sign_extend:<VWIDE>
5655 (match_operand:VD_HSI
2 "register_operand" "w"))
5656 (vec_duplicate:<VWIDE>
5657 (sign_extend:<VWIDE_S>
5659 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
5660 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
5665 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
5667 "sqdmlsl
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5669 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5673 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5674 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5676 (match_operand:<VWIDE>
1 "register_operand" "
0")
5679 (sign_extend:<VWIDE>
5680 (match_operand:VD_HSI
2 "register_operand" "w"))
5681 (vec_duplicate:<VWIDE>
5682 (sign_extend:<VWIDE_S>
5684 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
5685 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
5690 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
5692 "sqdmlsl
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5694 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5697 (define_insn "aarch64_sqdmlal_laneq<mode>"
5698 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5702 (sign_extend:<VWIDE>
5703 (match_operand:VD_HSI
2 "register_operand" "w"))
5704 (vec_duplicate:<VWIDE>
5705 (sign_extend:<VWIDE_S>
5707 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
5708 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
5711 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
5714 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
5716 "sqdmlal
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5718 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5722 (define_insn "aarch64_sqdmlal_lane<mode>"
5723 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5727 (sign_extend:<VWIDE>
5728 (match_operand:SD_HSI
2 "register_operand" "w"))
5729 (sign_extend:<VWIDE>
5731 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
5732 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
5735 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
5738 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
5740 "sqdmlal
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5742 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5745 (define_insn "aarch64_sqdmlsl_lane<mode>"
5746 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5748 (match_operand:<VWIDE>
1 "register_operand" "
0")
5751 (sign_extend:<VWIDE>
5752 (match_operand:SD_HSI
2 "register_operand" "w"))
5753 (sign_extend:<VWIDE>
5755 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
5756 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
5761 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
5763 "sqdmlsl
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5765 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5769 (define_insn "aarch64_sqdmlal_laneq<mode>"
5770 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5774 (sign_extend:<VWIDE>
5775 (match_operand:SD_HSI
2 "register_operand" "w"))
5776 (sign_extend:<VWIDE>
5778 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
5779 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
5782 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
5785 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
5787 "sqdmlal
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5789 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5792 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5793 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5795 (match_operand:<VWIDE>
1 "register_operand" "
0")
5798 (sign_extend:<VWIDE>
5799 (match_operand:SD_HSI
2 "register_operand" "w"))
5800 (sign_extend:<VWIDE>
5802 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
5803 (parallel [(match_operand:SI
4 "immediate_operand" "i")])))
5808 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
5810 "sqdmlsl
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5812 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5817 (define_insn "aarch64_sqdmlsl_n<mode>"
5818 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5820 (match_operand:<VWIDE>
1 "register_operand" "
0")
5823 (sign_extend:<VWIDE>
5824 (match_operand:VD_HSI
2 "register_operand" "w"))
5825 (vec_duplicate:<VWIDE>
5826 (sign_extend:<VWIDE_S>
5827 (match_operand:<VEL>
3 "register_operand" "<vwx>"))))
5830 "sqdmlsl
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[
0]"
5831 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5834 (define_insn "aarch64_sqdmlal_n<mode>"
5835 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5839 (sign_extend:<VWIDE>
5840 (match_operand:VD_HSI
2 "register_operand" "w"))
5841 (vec_duplicate:<VWIDE>
5842 (sign_extend:<VWIDE_S>
5843 (match_operand:<VEL>
3 "register_operand" "<vwx>"))))
5845 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
5847 "sqdmlal
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[
0]"
5848 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5854 (define_insn "aarch64_sqdmlal2<mode>_internal"
5855 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5859 (sign_extend:<VWIDE>
5861 (match_operand:VQ_HSI
2 "register_operand" "w")
5862 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
5863 (sign_extend:<VWIDE>
5865 (match_operand:VQ_HSI
3 "register_operand" "w")
5868 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
5870 "sqdmlal2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %<v>
3<Vmtype>"
5871 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5874 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5875 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5877 (match_operand:<VWIDE>
1 "register_operand" "
0")
5880 (sign_extend:<VWIDE>
5882 (match_operand:VQ_HSI
2 "register_operand" "w")
5883 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
5884 (sign_extend:<VWIDE>
5886 (match_operand:VQ_HSI
3 "register_operand" "w")
5890 "sqdmlsl2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %<v>
3<Vmtype>"
5891 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5894 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5895 [(match_operand:<VWIDE>
0 "register_operand")
5897 (match_operand:<VWIDE>
1 "register_operand")
5899 (match_operand:VQ_HSI
2 "register_operand")
5900 (match_operand:VQ_HSI
3 "register_operand")]
5903 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5904 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[
0],
5905 operands[
1], operands[
2],
5912 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5913 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5915 (match_operand:<VWIDE>
1 "register_operand" "
0")
5918 (sign_extend:<VWIDE>
5920 (match_operand:VQ_HSI
2 "register_operand" "w")
5921 (match_operand:VQ_HSI
5 "vect_par_cnst_hi_half" "")))
5922 (vec_duplicate:<VWIDE>
5923 (sign_extend:<VWIDE_S>
5925 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
5926 (parallel [(match_operand:SI
4 "immediate_operand" "i")])
5931 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
5933 "sqdmlsl2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5935 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5938 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5939 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5943 (sign_extend:<VWIDE>
5945 (match_operand:VQ_HSI
2 "register_operand" "w")
5946 (match_operand:VQ_HSI
5 "vect_par_cnst_hi_half" "")))
5947 (vec_duplicate:<VWIDE>
5948 (sign_extend:<VWIDE_S>
5950 (match_operand:<VCOND>
3 "register_operand" "<vwx>")
5951 (parallel [(match_operand:SI
4 "immediate_operand" "i")])
5954 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
5957 operands[
4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
4]));
5959 "sqdmlal2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5961 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5964 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5965 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5967 (match_operand:<VWIDE>
1 "register_operand" "
0")
5970 (sign_extend:<VWIDE>
5972 (match_operand:VQ_HSI
2 "register_operand" "w")
5973 (match_operand:VQ_HSI
5 "vect_par_cnst_hi_half" "")))
5974 (vec_duplicate:<VWIDE>
5975 (sign_extend:<VWIDE_S>
5977 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
5978 (parallel [(match_operand:SI
4 "immediate_operand" "i")])
5983 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
5985 "sqdmlsl2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
5987 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5990 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5991 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
5995 (sign_extend:<VWIDE>
5997 (match_operand:VQ_HSI
2 "register_operand" "w")
5998 (match_operand:VQ_HSI
5 "vect_par_cnst_hi_half" "")))
5999 (vec_duplicate:<VWIDE>
6000 (sign_extend:<VWIDE_S>
6002 (match_operand:<VCONQ>
3 "register_operand" "<vwx>")
6003 (parallel [(match_operand:SI
4 "immediate_operand" "i")])
6006 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
6009 operands[
4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
4]));
6011 "sqdmlal2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[%
4]";
6013 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6016 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
6017 [(match_operand:<VWIDE>
0 "register_operand")
6019 (match_operand:<VWIDE>
1 "register_operand")
6021 (match_operand:VQ_HSI
2 "register_operand")
6022 (match_operand:<VCOND>
3 "register_operand")
6023 (match_operand:SI
4 "immediate_operand")]
6026 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6027 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[
0],
6028 operands[
1], operands[
2],
6029 operands[
3], operands[
4], p));
6033 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6034 [(match_operand:<VWIDE>
0 "register_operand")
6036 (match_operand:<VWIDE>
1 "register_operand")
6038 (match_operand:VQ_HSI
2 "register_operand")
6039 (match_operand:<VCONQ>
3 "register_operand")
6040 (match_operand:SI
4 "immediate_operand")]
6043 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6044 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[
0],
6045 operands[
1], operands[
2],
6046 operands[
3], operands[
4], p));
6050 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6051 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6053 (match_operand:<VWIDE>
1 "register_operand" "
0")
6056 (sign_extend:<VWIDE>
6058 (match_operand:VQ_HSI
2 "register_operand" "w")
6059 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
6060 (vec_duplicate:<VWIDE>
6061 (sign_extend:<VWIDE_S>
6062 (match_operand:<VEL>
3 "register_operand" "<vwx>"))))
6065 "sqdmlsl2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[
0]"
6066 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6069 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6070 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6074 (sign_extend:<VWIDE>
6076 (match_operand:VQ_HSI
2 "register_operand" "w")
6077 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
6078 (vec_duplicate:<VWIDE>
6079 (sign_extend:<VWIDE_S>
6080 (match_operand:<VEL>
3 "register_operand" "<vwx>"))))
6082 (match_operand:<VWIDE>
1 "register_operand" "
0")))]
6084 "sqdmlal2
\\t%<vw2>
0<Vmwtype>, %<v>
2<Vmtype>, %
3.<Vetype>[
0]"
6085 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6088 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6089 [(match_operand:<VWIDE>
0 "register_operand")
6091 (match_operand:<VWIDE>
1 "register_operand")
6093 (match_operand:VQ_HSI
2 "register_operand")
6094 (match_operand:<VEL>
3 "register_operand")]
6097 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6098 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[
0],
6099 operands[
1], operands[
2],
6106 (define_insn "aarch64_sqdmull<mode>"
6107 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6110 (sign_extend:<VWIDE>
6111 (match_operand:VSD_HSI
1 "register_operand" "w"))
6112 (sign_extend:<VWIDE>
6113 (match_operand:VSD_HSI
2 "register_operand" "w")))
6116 "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
6117 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6122 (define_insn "aarch64_sqdmull_lane<mode>"
6123 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6126 (sign_extend:<VWIDE>
6127 (match_operand:VD_HSI
1 "register_operand" "w"))
6128 (vec_duplicate:<VWIDE>
6129 (sign_extend:<VWIDE_S>
6131 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
6132 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
6137 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
6138 return "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
6140 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6143 (define_insn "aarch64_sqdmull_laneq<mode>"
6144 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6147 (sign_extend:<VWIDE>
6148 (match_operand:VD_HSI
1 "register_operand" "w"))
6149 (vec_duplicate:<VWIDE>
6150 (sign_extend:<VWIDE_S>
6152 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
6153 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
6158 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
6159 return "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
6161 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6164 (define_insn "aarch64_sqdmull_lane<mode>"
6165 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6168 (sign_extend:<VWIDE>
6169 (match_operand:SD_HSI
1 "register_operand" "w"))
6170 (sign_extend:<VWIDE>
6172 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
6173 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))
6178 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
6179 return "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
6181 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6184 (define_insn "aarch64_sqdmull_laneq<mode>"
6185 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6188 (sign_extend:<VWIDE>
6189 (match_operand:SD_HSI
1 "register_operand" "w"))
6190 (sign_extend:<VWIDE>
6192 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
6193 (parallel [(match_operand:SI
3 "immediate_operand" "i")]))
6198 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
6199 return "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
6201 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6206 (define_insn "aarch64_sqdmull_n<mode>"
6207 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6210 (sign_extend:<VWIDE>
6211 (match_operand:VD_HSI
1 "register_operand" "w"))
6212 (vec_duplicate:<VWIDE>
6213 (sign_extend:<VWIDE_S>
6214 (match_operand:<VEL>
2 "register_operand" "<vwx>")))
6218 "sqdmull
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[
0]"
6219 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6224 (define_insn "aarch64_sqdmull2<mode>_internal"
6225 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6228 (sign_extend:<VWIDE>
6230 (match_operand:VQ_HSI
1 "register_operand" "w")
6231 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
6232 (sign_extend:<VWIDE>
6234 (match_operand:VQ_HSI
2 "register_operand" "w")
6239 "sqdmull2
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
6240 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6243 (define_expand "aarch64_sqdmull2<mode>"
6244 [(match_operand:<VWIDE>
0 "register_operand")
6245 (match_operand:VQ_HSI
1 "register_operand")
6246 (match_operand:VQ_HSI
2 "register_operand")]
6249 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6250 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[
0], operands[
1],
6257 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6258 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6261 (sign_extend:<VWIDE>
6263 (match_operand:VQ_HSI
1 "register_operand" "w")
6264 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
6265 (vec_duplicate:<VWIDE>
6266 (sign_extend:<VWIDE_S>
6268 (match_operand:<VCOND>
2 "register_operand" "<vwx>")
6269 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
6274 operands[
3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[
3]));
6275 return "sqdmull2
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
6277 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6280 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6281 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6284 (sign_extend:<VWIDE>
6286 (match_operand:VQ_HSI
1 "register_operand" "w")
6287 (match_operand:VQ_HSI
4 "vect_par_cnst_hi_half" "")))
6288 (vec_duplicate:<VWIDE>
6289 (sign_extend:<VWIDE_S>
6291 (match_operand:<VCONQ>
2 "register_operand" "<vwx>")
6292 (parallel [(match_operand:SI
3 "immediate_operand" "i")])))
6297 operands[
3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[
3]));
6298 return "sqdmull2
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[%
3]";
6300 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6303 (define_expand "aarch64_sqdmull2_lane<mode>"
6304 [(match_operand:<VWIDE>
0 "register_operand")
6305 (match_operand:VQ_HSI
1 "register_operand")
6306 (match_operand:<VCOND>
2 "register_operand")
6307 (match_operand:SI
3 "immediate_operand")]
6310 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6311 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[
0], operands[
1],
6312 operands[
2], operands[
3],
6317 (define_expand "aarch64_sqdmull2_laneq<mode>"
6318 [(match_operand:<VWIDE>
0 "register_operand")
6319 (match_operand:VQ_HSI
1 "register_operand")
6320 (match_operand:<VCONQ>
2 "register_operand")
6321 (match_operand:SI
3 "immediate_operand")]
6324 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6325 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[
0], operands[
1],
6326 operands[
2], operands[
3],
6333 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6334 [(set (match_operand:<VWIDE>
0 "register_operand" "=w")
6337 (sign_extend:<VWIDE>
6339 (match_operand:VQ_HSI
1 "register_operand" "w")
6340 (match_operand:VQ_HSI
3 "vect_par_cnst_hi_half" "")))
6341 (vec_duplicate:<VWIDE>
6342 (sign_extend:<VWIDE_S>
6343 (match_operand:<VEL>
2 "register_operand" "<vwx>")))
6347 "sqdmull2
\\t%<vw2>
0<Vmwtype>, %<v>
1<Vmtype>, %
2.<Vetype>[
0]"
6348 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6351 (define_expand "aarch64_sqdmull2_n<mode>"
6352 [(match_operand:<VWIDE>
0 "register_operand")
6353 (match_operand:VQ_HSI
1 "register_operand")
6354 (match_operand:<VEL>
2 "register_operand")]
6357 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6358 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[
0], operands[
1],
6365 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6366 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
6368 [(match_operand:VSDQ_I_DI
1 "register_operand" "w")
6369 (match_operand:VSDQ_I_DI
2 "register_operand" "w")]
6372 "<sur>shl
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>";
6373 [(set_attr "type" "neon_shift_reg<q>")]
6379 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6380 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
6382 [(match_operand:VSDQ_I
1 "register_operand" "w")
6383 (match_operand:VSDQ_I
2 "register_operand" "w")]
6386 "<sur>q<r>shl
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>";
6387 [(set_attr "type" "neon_sat_shift_reg<q>")]
6392 (define_insn "aarch64_<su>shll<mode>"
6393 [(set (match_operand:<VWIDE>
0 "register_operand")
6394 (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6395 (match_operand:VD_BHSI
1 "register_operand"))
6396 (match_operand:<VWIDE>
2
6397 "aarch64_simd_shll_imm_vec")))]
6400 [w, w, D2] shll
\t%
0.<Vwtype>, %
1.<Vtype>, %I2
6401 [w, w, DL] <su>shll
\t%
0.<Vwtype>, %
1.<Vtype>, %I2
6403 [(set_attr "type" "neon_shift_imm_long")]
6406 (define_expand "aarch64_<sur>shll_n<mode>"
6407 [(set (match_operand:<VWIDE>
0 "register_operand")
6408 (unspec:<VWIDE> [(match_operand:VD_BHSI
1 "register_operand")
6410 "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6414 rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[
2]);
6415 emit_insn (gen_aarch64_<sur>shll<mode> (operands[
0], operands[
1], shft));
6422 (define_insn "aarch64_<su>shll2<mode>"
6423 [(set (match_operand:<VWIDE>
0 "register_operand")
6424 (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6426 (match_operand:VQW
1 "register_operand")
6427 (match_operand:VQW
2 "vect_par_cnst_hi_half")))
6428 (match_operand:<VWIDE>
3
6429 "aarch64_simd_shll_imm_vec")))]
6431 {@ [cons: =
0,
1,
2,
3]
6432 [w, w, , D2] shll2
\t%
0.<Vwtype>, %
1.<Vtype>, %I3
6433 [w, w, , DL] <su>shll2
\t%
0.<Vwtype>, %
1.<Vtype>, %I3
6435 [(set_attr "type" "neon_shift_imm_long")]
6438 (define_expand "aarch64_<sur>shll2_n<mode>"
6439 [(set (match_operand:<VWIDE>
0 "register_operand")
6440 (unspec:<VWIDE> [(match_operand:VQW
1 "register_operand")
6442 "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6446 rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[
2]);
6447 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6448 emit_insn (gen_aarch64_<sur>shll2<mode> (operands[
0], operands[
1], p, shft));
6455 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6456 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
6460 (<SHIFTEXTEND>:<V2XWIDE>
6461 (match_operand:VSDQ_I_DI
1 "register_operand" "w"))
6462 (match_operand:<V2XWIDE>
3 "aarch64_int_rnd_operand"))
6463 (match_operand:VSDQ_I_DI
2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6465 && aarch64_const_vec_rnd_cst_p (operands[
3], operands[
2])"
6466 "<sra_op>rshr
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %
2"
6467 [(set_attr "type" "neon_sat_shift_imm<q>")]
6470 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6471 [(match_operand:VSDQ_I_DI
0 "register_operand")
6473 (match_operand:VSDQ_I_DI
1 "register_operand")
6474 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6477 /* Use this expander to create the rounding constant vector, which is
6478 1 << (shift -
1). Use wide_int here to ensure that the right TImode
6479 RTL is generated when handling the DImode expanders. */
6480 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6481 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[
2]) -
1, prec);
6482 rtx shft = gen_int_mode (INTVAL (operands[
2]), DImode);
6483 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6484 if (VECTOR_MODE_P (<MODE>mode))
6486 shft = gen_const_vec_duplicate (<MODE>mode, shft);
6487 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6490 emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[
0], operands[
1],
6498 (define_insn "aarch64_<sur>sra_ndi"
6499 [(set (match_operand:DI
0 "register_operand" "=w")
6500 (unspec:DI [(match_operand:DI
1 "register_operand" "
0")
6501 (match_operand:DI
2 "register_operand" "w")
6503 "aarch64_simd_shift_imm_offset_di" "i")]
6506 "<sur>sra
\\t%d0, %d2, %
3"
6507 [(set_attr "type" "neon_shift_acc")]
6512 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6513 [(set (match_operand:VSDQ_I_DI
0 "register_operand" "=w")
6514 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI
1 "register_operand" "
0")
6515 (match_operand:VSDQ_I_DI
2 "register_operand" "w")
6517 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6520 "s<lr>i
\\t%<v>
0<Vmtype>, %<v>
2<Vmtype>, %
3"
6521 [(set_attr "type" "neon_shift_imm<q>")]
6526 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6527 [(set (match_operand:VSDQ_I
0 "register_operand" "=w")
6528 (unspec:VSDQ_I [(match_operand:VSDQ_I
1 "register_operand" "w")
6530 "aarch64_simd_shift_imm_<ve_mode>" "i")]
6533 "<sur>qshl<u>
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %
2"
6534 [(set_attr "type" "neon_sat_shift_imm<q>")]
6540 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6541 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
6542 (SAT_TRUNC:<VNARROWQ>
6543 (<TRUNC_SHIFT>:SD_HSDI
6544 (match_operand:SD_HSDI
1 "register_operand" "w")
6545 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6547 "<shrn_op>shrn
\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
6548 [(set_attr "type" "neon_shift_imm_narrow_q")]
6551 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6552 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
6553 (ALL_TRUNC:<VNARROWQ>
6555 (match_operand:VQN
1 "register_operand" "w")
6556 (match_operand:VQN
2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6557 "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6558 "<shrn_op>shrn
\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
6559 [(set_attr "type" "neon_shift_imm_narrow_q")]
6562 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6563 [(set (match_operand:<VNARROWQ>
0 "register_operand")
6564 (ALL_TRUNC:<VNARROWQ>
6566 (match_operand:VQN
1 "register_operand")
6567 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6570 operands[
2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6571 INTVAL (operands[
2]));
6575 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6576 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
6577 (ALL_TRUNC:<VNARROWQ>
6578 (<TRUNC_SHIFT>:<V2XWIDE>
6580 (<TRUNCEXTEND>:<V2XWIDE>
6581 (match_operand:VQN
1 "register_operand" "w"))
6582 (match_operand:<V2XWIDE>
3 "aarch64_int_rnd_operand"))
6583 (match_operand:VQN
2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6585 && aarch64_const_vec_rnd_cst_p (operands[
3], operands[
2])"
6586 "<shrn_op>rshrn
\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
6587 [(set_attr "type" "neon_shift_imm_narrow_q")]
6590 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6591 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
6592 (SAT_TRUNC:<VNARROWQ>
6593 (<TRUNC_SHIFT>:<DWI>
6595 (<TRUNCEXTEND>:<DWI>
6596 (match_operand:SD_HSDI
1 "register_operand" "w"))
6597 (match_operand:<DWI>
3 "aarch64_int_rnd_operand"))
6598 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6600 && aarch64_const_vec_rnd_cst_p (operands[
3], operands[
2])"
6601 "<shrn_op>rshrn
\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
6602 [(set_attr "type" "neon_shift_imm_narrow_q")]
6605 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6606 [(set (match_operand:<VNARROWQ>
0 "register_operand")
6607 (SAT_TRUNC:<VNARROWQ>
6608 (<TRUNC_SHIFT>:<V2XWIDE>
6610 (<TRUNCEXTEND>:<V2XWIDE>
6611 (match_operand:SD_HSDI
1 "register_operand"))
6613 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6616 /* Use this expander to create the rounding constant vector, which is
6617 1 << (shift -
1). Use wide_int here to ensure that the right TImode
6618 RTL is generated when handling the DImode expanders. */
6619 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6620 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[
2]) -
1, prec);
6621 operands[
3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6625 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6626 [(set (match_operand:<VNARROWQ>
0 "register_operand")
6627 (ALL_TRUNC:<VNARROWQ>
6628 (<TRUNC_SHIFT>:<V2XWIDE>
6630 (<TRUNCEXTEND>:<V2XWIDE>
6631 (match_operand:VQN
1 "register_operand"))
6633 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6636 if (<CODE> == TRUNCATE
6637 && INTVAL (operands[
2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6639 rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6640 emit_insn (gen_aarch64_raddhn<mode> (operands[
0], operands[
1], tmp0));
6643 /* Use this expander to create the rounding constant vector, which is
6644 1 << (shift -
1). Use wide_int here to ensure that the right TImode
6645 RTL is generated when handling the DImode expanders. */
6646 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6647 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[
2]) -
1, prec);
6648 operands[
3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6649 operands[
3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[
3]);
6650 operands[
2] = gen_const_vec_duplicate (<MODE>mode, operands[
2]);
6654 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6655 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
6656 (truncate:<VNARROWQ>
6660 (match_operand:VQN
1 "register_operand" "w")
6661 (match_operand:VQN
2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6662 (match_operand:VQN
3 "aarch64_simd_imm_zero"))
6663 (match_operand:VQN
4 "aarch64_simd_umax_half_mode"))))]
6665 "sqshrun
\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
6666 [(set_attr "type" "neon_shift_imm_narrow_q")]
6669 (define_insn "aarch64_sqshrun_n<mode>_insn"
6670 [(set (match_operand:SD_HSDI
0 "register_operand" "=w")
6674 (match_operand:SD_HSDI
1 "register_operand" "w")
6675 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6677 (const_int <half_mask>)))]
6679 "sqshrun
\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
6680 [(set_attr "type" "neon_shift_imm_narrow_q")]
6683 (define_expand "aarch64_sqshrun_n<mode>"
6684 [(match_operand:<VNARROWQ>
0 "register_operand")
6685 (match_operand:SD_HSDI
1 "register_operand")
6686 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6689 rtx dst = gen_reg_rtx (<MODE>mode);
6690 emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[
1],
6692 emit_move_insn (operands[
0], gen_lowpart (<VNARROWQ>mode, dst));
6697 (define_expand "aarch64_sqshrun_n<mode>"
6698 [(set (match_operand:<VNARROWQ>
0 "register_operand")
6699 (truncate:<VNARROWQ>
6703 (match_operand:VQN
1 "register_operand")
6704 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6709 operands[
2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6710 INTVAL (operands[
2]));
6711 operands[
3] = CONST0_RTX (<MODE>mode);
6713 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6714 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6718 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6719 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
6720 (truncate:<VNARROWQ>
6725 (sign_extend:<V2XWIDE>
6726 (match_operand:VQN
1 "register_operand" "w"))
6727 (match_operand:<V2XWIDE>
3 "aarch64_int_rnd_operand"))
6728 (match_operand:VQN
2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6729 (match_operand:<V2XWIDE>
4 "aarch64_simd_imm_zero"))
6730 (match_operand:<V2XWIDE>
5 "aarch64_simd_umax_quarter_mode"))))]
6732 && aarch64_const_vec_rnd_cst_p (operands[
3], operands[
2])"
6733 "sqrshrun
\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
6734 [(set_attr "type" "neon_shift_imm_narrow_q")]
6737 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6738 [(set (match_operand:<DWI>
0 "register_operand" "=w")
6744 (match_operand:SD_HSDI
1 "register_operand" "w"))
6745 (match_operand:<DWI>
3 "aarch64_int_rnd_operand"))
6746 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6748 (const_int <half_mask>)))]
6750 && aarch64_const_vec_rnd_cst_p (operands[
3], operands[
2])"
6751 "sqrshrun
\t%<vn2>
0<Vmntype>, %<v>
1<Vmtype>, %
2"
6752 [(set_attr "type" "neon_shift_imm_narrow_q")]
6755 (define_expand "aarch64_sqrshrun_n<mode>"
6756 [(match_operand:<VNARROWQ>
0 "register_operand")
6757 (match_operand:SD_HSDI
1 "register_operand")
6758 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6761 int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6762 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[
2]) -
1, prec);
6763 rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6764 rtx dst = gen_reg_rtx (<DWI>mode);
6765 emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[
1], operands[
2], rnd));
6766 emit_move_insn (operands[
0], gen_lowpart (<VNARROWQ>mode, dst));
6771 (define_expand "aarch64_sqrshrun_n<mode>"
6772 [(set (match_operand:<VNARROWQ>
0 "register_operand")
6773 (truncate:<VNARROWQ>
6778 (sign_extend:<V2XWIDE>
6779 (match_operand:VQN
1 "register_operand"))
6781 (match_operand:SI
2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6786 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6787 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[
2]) -
1, prec);
6788 operands[
3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6789 operands[
3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[
3]);
6790 operands[
2] = gen_const_vec_duplicate (<MODE>mode, operands[
2]);
6791 operands[
4] = CONST0_RTX (<V2XWIDE>mode);
6793 = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6794 operands[
5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[
5]);
6798 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6799 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
6800 (vec_concat:<VNARROWQ2>
6801 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
6802 (ALL_TRUNC:<VNARROWQ>
6804 (match_operand:VQN
2 "register_operand" "w")
6805 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6806 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6807 && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6808 "<shrn_op>shrn2
\t%<vn2>
0.<V2ntype>, %<v>
2.<Vtype>, %
3"
6809 [(set_attr "type" "neon_shift_imm_narrow_q")]
6812 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6813 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
6814 (vec_concat:<VNARROWQ2>
6815 (ALL_TRUNC:<VNARROWQ>
6817 (match_operand:VQN
2 "register_operand" "w")
6818 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6819 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
6820 "TARGET_SIMD && BYTES_BIG_ENDIAN
6821 && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6822 "<shrn_op>shrn2
\t%<vn2>
0.<V2ntype>, %<v>
2.<Vtype>, %
3"
6823 [(set_attr "type" "neon_shift_imm_narrow_q")]
6826 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6827 [(match_operand:<VNARROWQ2>
0 "register_operand")
6828 (match_operand:<VNARROWQ>
1 "register_operand")
6829 (ALL_TRUNC:<VNARROWQ>
6830 (SHIFTRT:VQN (match_operand:VQN
2 "register_operand")))
6831 (match_operand:SI
3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6832 "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6834 operands[
3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6835 INTVAL (operands[
3]));
6837 if (BYTES_BIG_ENDIAN)
6838 emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6839 operands[
0], operands[
1], operands[
2], operands[
3]));
6841 emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6842 operands[
0], operands[
1], operands[
2], operands[
3]));
6847 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6848 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
6849 (vec_concat:<VNARROWQ2>
6850 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
6851 (ALL_TRUNC:<VNARROWQ>
6852 (<TRUNC_SHIFT>:<V2XWIDE>
6854 (<TRUNCEXTEND>:<V2XWIDE>
6855 (match_operand:VQN
2 "register_operand" "w"))
6856 (match_operand:<V2XWIDE>
4 "aarch64_int_rnd_operand"))
6857 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6858 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6859 && aarch64_const_vec_rnd_cst_p (operands[
4], operands[
3])"
6860 "<shrn_op>rshrn2
\t%<vn2>
0.<V2ntype>, %<v>
2.<Vtype>, %
3"
6861 [(set_attr "type" "neon_shift_imm_narrow_q")]
6864 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
6865 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
6866 (vec_concat:<VNARROWQ2>
6867 (ALL_TRUNC:<VNARROWQ>
6868 (<TRUNC_SHIFT>:<V2XWIDE>
6870 (<TRUNCEXTEND>:<V2XWIDE>
6871 (match_operand:VQN
2 "register_operand" "w"))
6872 (match_operand:<V2XWIDE>
4 "aarch64_int_rnd_operand"))
6873 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6874 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
6875 "TARGET_SIMD && BYTES_BIG_ENDIAN
6876 && aarch64_const_vec_rnd_cst_p (operands[
4], operands[
3])"
6877 "<shrn_op>rshrn2
\t%<vn2>
0.<V2ntype>, %<v>
2.<Vtype>, %
3"
6878 [(set_attr "type" "neon_shift_imm_narrow_q")]
6881 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
6882 [(match_operand:<VNARROWQ2>
0 "register_operand")
6883 (match_operand:<VNARROWQ>
1 "register_operand")
6884 (ALL_TRUNC:<VNARROWQ> (match_operand:VQN
2 "register_operand"))
6885 (match_operand:SI
3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6888 if (<CODE> == TRUNCATE
6889 && INTVAL (operands[
3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6891 rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
6892 emit_insn (gen_aarch64_raddhn2<mode> (operands[
0], operands[
1],
6896 /* Use this expander to create the rounding constant vector, which is
6897 1 << (shift -
1). Use wide_int here to ensure that the right TImode
6898 RTL is generated when handling the DImode expanders. */
6899 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6900 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[
3]) -
1, prec);
6901 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6902 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6903 operands[
3] = gen_const_vec_duplicate (<MODE>mode, operands[
3]);
6904 if (BYTES_BIG_ENDIAN)
6905 emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[
0],
6911 emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[
0],
6920 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
6921 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
6922 (vec_concat:<VNARROWQ2>
6923 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
6924 (truncate:<VNARROWQ>
6928 (match_operand:VQN
2 "register_operand" "w")
6929 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6930 (match_operand:VQN
4 "aarch64_simd_imm_zero"))
6931 (match_operand:VQN
5 "aarch64_simd_umax_half_mode")))))]
6932 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6933 "sqshrun2
\t%<vn2>
0.<V2ntype>, %<v>
2.<Vtype>, %
3"
6934 [(set_attr "type" "neon_shift_imm_narrow_q")]
6937 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
6938 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
6939 (vec_concat:<VNARROWQ2>
6940 (truncate:<VNARROWQ>
6944 (match_operand:VQN
2 "register_operand" "w")
6945 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6946 (match_operand:VQN
4 "aarch64_simd_imm_zero"))
6947 (match_operand:VQN
5 "aarch64_simd_umax_half_mode")))
6948 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
6949 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6950 "sqshrun2
\t%<vn2>
0.<V2ntype>, %<v>
2.<Vtype>, %
3"
6951 [(set_attr "type" "neon_shift_imm_narrow_q")]
6954 (define_expand "aarch64_sqshrun2_n<mode>"
6955 [(match_operand:<VNARROWQ2>
0 "register_operand")
6956 (match_operand:<VNARROWQ>
1 "register_operand")
6957 (match_operand:VQN
2 "register_operand")
6958 (match_operand:SI
3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6961 operands[
3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6962 INTVAL (operands[
3]));
6963 rtx zeros = CONST0_RTX (<MODE>mode);
6965 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6966 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6967 if (BYTES_BIG_ENDIAN)
6968 emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[
0],
6969 operands[
1], operands[
2], operands[
3],
6972 emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[
0],
6973 operands[
1], operands[
2], operands[
3],
6979 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
6980 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
6981 (vec_concat:<VNARROWQ2>
6982 (match_operand:<VNARROWQ>
1 "register_operand" "
0")
6983 (truncate:<VNARROWQ>
6988 (sign_extend:<V2XWIDE>
6989 (match_operand:VQN
2 "register_operand" "w"))
6990 (match_operand:<V2XWIDE>
4 "aarch64_int_rnd_operand"))
6991 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6992 (match_operand:<V2XWIDE>
5 "aarch64_simd_imm_zero"))
6993 (match_operand:<V2XWIDE>
6 "aarch64_simd_umax_quarter_mode")))))]
6994 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6995 && aarch64_const_vec_rnd_cst_p (operands[
4], operands[
3])"
6996 "sqrshrun2
\t%<vn2>
0.<V2ntype>, %<v>
2.<Vtype>, %
3"
6997 [(set_attr "type" "neon_shift_imm_narrow_q")]
7000 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
7001 [(set (match_operand:<VNARROWQ2>
0 "register_operand" "=w")
7002 (vec_concat:<VNARROWQ2>
7003 (truncate:<VNARROWQ>
7008 (sign_extend:<V2XWIDE>
7009 (match_operand:VQN
2 "register_operand" "w"))
7010 (match_operand:<V2XWIDE>
4 "aarch64_int_rnd_operand"))
7011 (match_operand:VQN
3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7012 (match_operand:<V2XWIDE>
5 "aarch64_simd_imm_zero"))
7013 (match_operand:<V2XWIDE>
6 "aarch64_simd_umax_quarter_mode")))
7014 (match_operand:<VNARROWQ>
1 "register_operand" "
0")))]
7015 "TARGET_SIMD && BYTES_BIG_ENDIAN
7016 && aarch64_const_vec_rnd_cst_p (operands[
4], operands[
3])"
7017 "sqrshrun2
\t%<vn2>
0.<V2ntype>, %<v>
2.<Vtype>, %
3"
7018 [(set_attr "type" "neon_shift_imm_narrow_q")]
7021 (define_expand "aarch64_sqrshrun2_n<mode>"
7022 [(match_operand:<VNARROWQ2>
0 "register_operand")
7023 (match_operand:<VNARROWQ>
1 "register_operand")
7024 (match_operand:VQN
2 "register_operand")
7025 (match_operand:SI
3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7028 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7029 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[
3]) -
1, prec);
7030 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7031 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7032 rtx zero = CONST0_RTX (<V2XWIDE>mode);
7034 = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7035 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7036 operands[
3] = gen_const_vec_duplicate (<MODE>mode, operands[
3]);
7037 if (BYTES_BIG_ENDIAN)
7038 emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[
0],
7039 operands[
1], operands[
2], operands[
3], rnd,
7042 emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[
0],
7043 operands[
1], operands[
2], operands[
3], rnd,
7049 ;; cm(eq|ge|gt|lt|le)
7050 ;; Note, we have constraints for Dz and Z as different expanders
7051 ;; have different ideas of what should be passed to this pattern.
7053 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7054 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w,w")
7056 (COMPARISONS:<V_INT_EQUIV>
7057 (match_operand:VDQ_I
1 "register_operand" "w,w")
7058 (match_operand:VDQ_I
2 "aarch64_simd_reg_or_zero" "w,ZDz")
7062 cm<n_optab>
\t%<v>
0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7063 cm<optab>
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, #
0"
7064 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
7067 (define_insn_and_split "aarch64_cm<optab>di"
7068 [(set (match_operand:DI
0 "register_operand" "=w,w,r")
7071 (match_operand:DI
1 "register_operand" "w,w,r")
7072 (match_operand:DI
2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7074 (clobber (reg:CC CC_REGNUM))]
7077 "&& reload_completed"
7078 [(set (match_operand:DI
0 "register_operand")
7081 (match_operand:DI
1 "register_operand")
7082 (match_operand:DI
2 "aarch64_simd_reg_or_zero")
7085 /* If we are in the general purpose register file,
7086 we split to a sequence of comparison and store. */
7087 if (GP_REGNUM_P (REGNO (operands[
0]))
7088 && GP_REGNUM_P (REGNO (operands[
1])))
7090 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[
1], operands[
2]);
7091 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[
1], operands[
2]);
7092 rtx comparison = gen_rtx_<CMP> (mode, operands[
1], operands[
2]);
7093 emit_insn (gen_cstoredi_neg (operands[
0], comparison, cc_reg));
7096 /* Otherwise, we expand to a similar pattern which does not
7097 clobber CC_REGNUM. */
7099 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7102 (define_insn "*aarch64_cm<optab>di"
7103 [(set (match_operand:DI
0 "register_operand" "=w,w")
7106 (match_operand:DI
1 "register_operand" "w,w")
7107 (match_operand:DI
2 "aarch64_simd_reg_or_zero" "w,ZDz")
7109 "TARGET_SIMD && reload_completed"
7111 cm<n_optab>
\t%d0, %d<cmp_1>, %d<cmp_2>
7112 cm<optab>
\t%d0, %d1, #
0"
7113 [(set_attr "type" "neon_compare, neon_compare_zero")]
7118 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7119 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w")
7121 (UCOMPARISONS:<V_INT_EQUIV>
7122 (match_operand:VDQ_I
1 "register_operand" "w")
7123 (match_operand:VDQ_I
2 "register_operand" "w")
7126 "cm<n_optab>
\t%<v>
0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7127 [(set_attr "type" "neon_compare<q>")]
7130 (define_insn_and_split "aarch64_cm<optab>di"
7131 [(set (match_operand:DI
0 "register_operand" "=w,r")
7134 (match_operand:DI
1 "register_operand" "w,r")
7135 (match_operand:DI
2 "aarch64_simd_reg_or_zero" "w,r")
7137 (clobber (reg:CC CC_REGNUM))]
7140 "&& reload_completed"
7141 [(set (match_operand:DI
0 "register_operand")
7144 (match_operand:DI
1 "register_operand")
7145 (match_operand:DI
2 "aarch64_simd_reg_or_zero")
7148 /* If we are in the general purpose register file,
7149 we split to a sequence of comparison and store. */
7150 if (GP_REGNUM_P (REGNO (operands[
0]))
7151 && GP_REGNUM_P (REGNO (operands[
1])))
7153 machine_mode mode = CCmode;
7154 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[
1], operands[
2]);
7155 rtx comparison = gen_rtx_<CMP> (mode, operands[
1], operands[
2]);
7156 emit_insn (gen_cstoredi_neg (operands[
0], comparison, cc_reg));
7159 /* Otherwise, we expand to a similar pattern which does not
7160 clobber CC_REGNUM. */
7162 [(set_attr "type" "neon_compare,multiple")]
7165 (define_insn "*aarch64_cm<optab>di"
7166 [(set (match_operand:DI
0 "register_operand" "=w")
7169 (match_operand:DI
1 "register_operand" "w")
7170 (match_operand:DI
2 "aarch64_simd_reg_or_zero" "w")
7172 "TARGET_SIMD && reload_completed"
7173 "cm<n_optab>
\t%d0, %d<cmp_1>, %d<cmp_2>"
7174 [(set_attr "type" "neon_compare")]
7179 ;; Although neg (ne (and x y)
0) is the natural way of expressing a cmtst,
7180 ;; we don't have any insns using ne, and aarch64_vcond outputs
7181 ;; not (neg (eq (and x y)
0))
7182 ;; which is rewritten by simplify_rtx as
7183 ;; plus (eq (and x y)
0) -
1.
7185 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7186 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w")
7190 (match_operand:VDQ_I
1 "register_operand" "w")
7191 (match_operand:VDQ_I
2 "register_operand" "w"))
7192 (match_operand:VDQ_I
3 "aarch64_simd_imm_zero"))
7193 (match_operand:<V_INT_EQUIV>
4 "aarch64_simd_imm_minus_one")))
7196 "cmtst
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
7197 [(set_attr "type" "neon_tst<q>")]
7200 ;; One can also get a cmtsts by having to combine a
7201 ;; not (neq (eq x
0)) in which case you rewrite it to
7202 ;; a comparison against itself
7204 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7205 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w")
7208 (match_operand:VDQ_I
1 "register_operand" "w")
7209 (match_operand:VDQ_I
2 "aarch64_simd_imm_zero"))
7210 (match_operand:<V_INT_EQUIV>
3 "aarch64_simd_imm_minus_one")))
7213 "cmtst
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
1<Vmtype>"
7214 [(set_attr "type" "neon_tst<q>")]
7217 (define_insn_and_split "aarch64_cmtstdi"
7218 [(set (match_operand:DI
0 "register_operand" "=w,r")
7222 (match_operand:DI
1 "register_operand" "w,r")
7223 (match_operand:DI
2 "register_operand" "w,r"))
7225 (clobber (reg:CC CC_REGNUM))]
7228 "&& reload_completed"
7229 [(set (match_operand:DI
0 "register_operand")
7233 (match_operand:DI
1 "register_operand")
7234 (match_operand:DI
2 "register_operand"))
7237 /* If we are in the general purpose register file,
7238 we split to a sequence of comparison and store. */
7239 if (GP_REGNUM_P (REGNO (operands[
0]))
7240 && GP_REGNUM_P (REGNO (operands[
1])))
7242 rtx and_tree = gen_rtx_AND (DImode, operands[
1], operands[
2]);
7243 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7244 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7245 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7246 emit_insn (gen_cstoredi_neg (operands[
0], comparison, cc_reg));
7249 /* Otherwise, we expand to a similar pattern which does not
7250 clobber CC_REGNUM. */
7252 [(set_attr "type" "neon_tst,multiple")]
7255 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7256 [(set (match_operand:DI
0 "register_operand" "=w")
7260 (match_operand:DI
1 "register_operand" "w")
7261 (match_operand:DI
2 "register_operand" "w"))
7264 "cmtst
\t%d0, %d1, %d2"
7265 [(set_attr "type" "neon_tst")]
7268 ;; fcm(eq|ge|gt|le|lt)
7270 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7271 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w,w")
7273 (COMPARISONS:<V_INT_EQUIV>
7274 (match_operand:VHSDF_HSDF
1 "register_operand" "w,w")
7275 (match_operand:VHSDF_HSDF
2 "aarch64_simd_reg_or_zero" "w,YDz")
7279 fcm<n_optab>
\t%<v>
0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7280 fcm<optab>
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>,
0"
7281 [(set_attr "type" "neon_fp_compare_<stype><q>")]
7285 ;; Note we can also handle what would be fac(le|lt) by
7286 ;; generating fac(ge|gt).
7288 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7289 [(set (match_operand:<V_INT_EQUIV>
0 "register_operand" "=w")
7291 (FAC_COMPARISONS:<V_INT_EQUIV>
7293 (match_operand:VHSDF_HSDF
1 "register_operand" "w"))
7295 (match_operand:VHSDF_HSDF
2 "register_operand" "w"))
7298 "fac<n_optab>
\t%<v>
0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7299 [(set_attr "type" "neon_fp_compare_<stype><q>")]
7304 ;; ADDP with two registers semantically concatenates them and performs
7305 ;; a pairwise addition on the result. For
128-bit input modes represent this
7306 ;; as a concatentation of the pairwise addition results of the two input
7307 ;; registers. This allow us to avoid using intermediate
256-bit modes.
7308 (define_insn "aarch64_addp<mode>_insn"
7309 [(set (match_operand:VQ_I
0 "register_operand" "=w")
7313 (match_operand:VQ_I
1 "register_operand" "w")
7314 (match_operand:VQ_I
3 "vect_par_cnst_even_or_odd_half"))
7317 (match_operand:VQ_I
4 "vect_par_cnst_even_or_odd_half")))
7320 (match_operand:VQ_I
2 "register_operand" "w")
7325 "TARGET_SIMD && !rtx_equal_p (operands[
3], operands[
4])"
7326 "addp
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
7327 [(set_attr "type" "neon_reduc_add<q>")]
7330 ;; For
64-bit input modes an ADDP is represented as a concatentation
7331 ;; of the input registers into an
128-bit register which is then fed
7332 ;; into a pairwise add. That way we avoid having to create intermediate
7333 ;;
32-bit vector modes.
7334 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7335 [(set (match_operand:VD_BHSI
0 "register_operand" "=w")
7339 (match_operand:VD_BHSI
1 "register_operand" "w")
7340 (match_operand:VD_BHSI
2 "register_operand" "w"))
7341 (match_operand:<VDBL>
3 "vect_par_cnst_even_or_odd_half"))
7346 (match_operand:<VDBL>
4 "vect_par_cnst_even_or_odd_half"))))]
7347 "TARGET_SIMD && !rtx_equal_p (operands[
3], operands[
4])"
7348 "addp
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
7349 [(set_attr "type" "neon_reduc_add<q>")]
7352 ;; A common usecase of
64-bit ADDP is to have both operands come from the same
7353 ;;
128-bit vector and produce the pairwise addition results in the lower half.
7354 ;; Split into the
128-bit ADDP form and extract the low half.
7355 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7356 [(set (match_operand:<VHALF>
0 "register_operand" "=w")
7359 (match_operand:VQ_I
1 "register_operand" "w")
7360 (match_operand:VQ_I
2 "vect_par_cnst_even_or_odd_half"))
7363 (match_operand:VQ_I
3 "vect_par_cnst_even_or_odd_half"))))]
7364 "TARGET_SIMD && !rtx_equal_p (operands[
2], operands[
3])"
7370 if (can_create_pseudo_p ())
7371 scratch = gen_reg_rtx (<MODE>mode);
7373 scratch = lowpart_subreg (<MODE>mode, operands[
0], <VHALF>mode);
7375 emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[
1], operands[
1],
7376 operands[
2], operands[
3]));
7377 emit_move_insn (operands[
0], gen_lowpart (<VHALF>mode, scratch));
7382 (define_expand "aarch64_addp<mode>"
7383 [(match_operand:VDQ_I
0 "register_operand")
7384 (match_operand:VDQ_I
1 "register_operand")
7385 (match_operand:VDQ_I
2 "register_operand")]
7388 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7389 if (known_eq (GET_MODE_BITSIZE (<MODE>mode),
128))
7391 rtx par_even = aarch64_gen_stepped_int_parallel (nunits,
0,
2);
7392 rtx par_odd = aarch64_gen_stepped_int_parallel (nunits,
1,
2);
7393 if (BYTES_BIG_ENDIAN)
7394 std::swap (operands[
1], operands[
2]);
7395 emit_insn (gen_aarch64_addp<mode>_insn (operands[
0], operands[
1],
7396 operands[
2], par_even, par_odd));
7403 (define_expand "sqrt<mode>
2"
7404 [(set (match_operand:VHSDF
0 "register_operand")
7405 (sqrt:VHSDF (match_operand:VHSDF
1 "register_operand")))]
7408 if (aarch64_emit_approx_sqrt (operands[
0], operands[
1], false))
7412 (define_insn "*sqrt<mode>
2<vczle><vczbe>"
7413 [(set (match_operand:VHSDF
0 "register_operand" "=w")
7414 (sqrt:VHSDF (match_operand:VHSDF
1 "register_operand" "w")))]
7416 "fsqrt
\\t%
0.<Vtype>, %
1.<Vtype>"
7417 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7420 ;; Patterns for vector struct loads and stores.
7422 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7423 [(set (match_operand:VSTRUCT_2Q
0 "register_operand" "=w")
7424 (unspec:VSTRUCT_2Q [
7425 (match_operand:VSTRUCT_2Q
1 "aarch64_simd_struct_operand" "Utv")]
7428 "ld2
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
7429 [(set_attr "type" "neon_load2_2reg<q>")]
7432 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
7433 [(set (match_operand:VSTRUCT_2QD
0 "register_operand" "=w")
7434 (unspec:VSTRUCT_2QD [
7435 (match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")]
7438 "ld2r
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
7439 [(set_attr "type" "neon_load2_all_lanes<q>")]
7442 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7443 [(set (match_operand:VSTRUCT_2QD
0 "register_operand" "=w")
7444 (unspec:VSTRUCT_2QD [
7445 (match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
7446 (match_operand:VSTRUCT_2QD
2 "register_operand" "
0")
7447 (match_operand:SI
3 "immediate_operand" "i")]
7451 operands[
3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7452 INTVAL (operands[
3]));
7453 return "ld2
\\t{%S0.<Vetype> - %T0.<Vetype>}[%
3], %
1";
7455 [(set_attr "type" "neon_load2_one_lane")]
7458 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7459 [(set (match_operand:VSTRUCT_2Q
0 "register_operand")
7460 (unspec:VSTRUCT_2Q [
7461 (match_operand:VSTRUCT_2Q
1 "aarch64_simd_struct_operand")]
7465 if (BYTES_BIG_ENDIAN)
7467 rtx tmp = gen_reg_rtx (<MODE>mode);
7468 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7469 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7470 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[
1]));
7471 emit_insn (gen_aarch64_rev_reglist<mode> (operands[
0], tmp, mask));
7474 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[
0], operands[
1]));
7478 (define_insn "aarch64_simd_st2<vstruct_elt>"
7479 [(set (match_operand:VSTRUCT_2Q
0 "aarch64_simd_struct_operand" "=Utv")
7480 (unspec:VSTRUCT_2Q [
7481 (match_operand:VSTRUCT_2Q
1 "register_operand" "w")]
7484 "st2
\\t{%S1.<Vtype> - %T1.<Vtype>}, %
0"
7485 [(set_attr "type" "neon_store2_2reg<q>")]
7488 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7489 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7490 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
7491 (unspec:BLK [(match_operand:VSTRUCT_2QD
1 "register_operand" "w")
7492 (match_operand:SI
2 "immediate_operand" "i")]
7496 operands[
2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7497 INTVAL (operands[
2]));
7498 return "st2
\\t{%S1.<Vetype> - %T1.<Vetype>}[%
2], %
0";
7500 [(set_attr "type" "neon_store2_one_lane<q>")]
7503 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7504 [(set (match_operand:VSTRUCT_2Q
0 "aarch64_simd_struct_operand")
7505 (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q
1 "register_operand")]
7509 if (BYTES_BIG_ENDIAN)
7511 rtx tmp = gen_reg_rtx (<MODE>mode);
7512 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7513 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7514 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[
1], mask));
7515 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[
0], tmp));
7518 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[
0], operands[
1]));
7522 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7523 [(set (match_operand:VSTRUCT_3Q
0 "register_operand" "=w")
7524 (unspec:VSTRUCT_3Q [
7525 (match_operand:VSTRUCT_3Q
1 "aarch64_simd_struct_operand" "Utv")]
7528 "ld3
\\t{%S0.<Vtype> - %U0.<Vtype>}, %
1"
7529 [(set_attr "type" "neon_load3_3reg<q>")]
7532 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
7533 [(set (match_operand:VSTRUCT_3QD
0 "register_operand" "=w")
7534 (unspec:VSTRUCT_3QD [
7535 (match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")]
7538 "ld3r
\\t{%S0.<Vtype> - %U0.<Vtype>}, %
1"
7539 [(set_attr "type" "neon_load3_all_lanes<q>")]
7542 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7543 [(set (match_operand:VSTRUCT_3QD
0 "register_operand" "=w")
7544 (unspec:VSTRUCT_3QD [
7545 (match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
7546 (match_operand:VSTRUCT_3QD
2 "register_operand" "
0")
7547 (match_operand:SI
3 "immediate_operand" "i")]
7551 operands[
3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7552 INTVAL (operands[
3]));
7553 return "ld3
\\t{%S0.<Vetype> - %U0.<Vetype>}[%
3], %
1";
7555 [(set_attr "type" "neon_load3_one_lane")]
7558 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7559 [(set (match_operand:VSTRUCT_3Q
0 "register_operand")
7560 (unspec:VSTRUCT_3Q [
7561 (match_operand:VSTRUCT_3Q
1 "aarch64_simd_struct_operand")]
7565 if (BYTES_BIG_ENDIAN)
7567 rtx tmp = gen_reg_rtx (<MODE>mode);
7568 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7569 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7570 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[
1]));
7571 emit_insn (gen_aarch64_rev_reglist<mode> (operands[
0], tmp, mask));
7574 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[
0], operands[
1]));
7578 (define_insn "aarch64_simd_st3<vstruct_elt>"
7579 [(set (match_operand:VSTRUCT_3Q
0 "aarch64_simd_struct_operand" "=Utv")
7580 (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q
1 "register_operand" "w")]
7583 "st3
\\t{%S1.<Vtype> - %U1.<Vtype>}, %
0"
7584 [(set_attr "type" "neon_store3_3reg<q>")]
7587 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7588 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7589 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
7590 (unspec:BLK [(match_operand:VSTRUCT_3QD
1 "register_operand" "w")
7591 (match_operand:SI
2 "immediate_operand" "i")]
7595 operands[
2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7596 INTVAL (operands[
2]));
7597 return "st3
\\t{%S1.<Vetype> - %U1.<Vetype>}[%
2], %
0";
7599 [(set_attr "type" "neon_store3_one_lane<q>")]
7602 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7603 [(set (match_operand:VSTRUCT_3Q
0 "aarch64_simd_struct_operand")
7604 (unspec:VSTRUCT_3Q [
7605 (match_operand:VSTRUCT_3Q
1 "register_operand")]
7609 if (BYTES_BIG_ENDIAN)
7611 rtx tmp = gen_reg_rtx (<MODE>mode);
7612 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7613 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7614 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[
1], mask));
7615 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[
0], tmp));
7618 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[
0], operands[
1]));
7622 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7623 [(set (match_operand:VSTRUCT_4Q
0 "register_operand" "=w")
7624 (unspec:VSTRUCT_4Q [
7625 (match_operand:VSTRUCT_4Q
1 "aarch64_simd_struct_operand" "Utv")]
7628 "ld4
\\t{%S0.<Vtype> - %V0.<Vtype>}, %
1"
7629 [(set_attr "type" "neon_load4_4reg<q>")]
7632 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
7633 [(set (match_operand:VSTRUCT_4QD
0 "register_operand" "=w")
7634 (unspec:VSTRUCT_4QD [
7635 (match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")]
7638 "ld4r
\\t{%S0.<Vtype> - %V0.<Vtype>}, %
1"
7639 [(set_attr "type" "neon_load4_all_lanes<q>")]
7642 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7643 [(set (match_operand:VSTRUCT_4QD
0 "register_operand" "=w")
7644 (unspec:VSTRUCT_4QD [
7645 (match_operand:BLK
1 "aarch64_simd_struct_operand" "Utv")
7646 (match_operand:VSTRUCT_4QD
2 "register_operand" "
0")
7647 (match_operand:SI
3 "immediate_operand" "i")]
7651 operands[
3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7652 INTVAL (operands[
3]));
7653 return "ld4
\\t{%S0.<Vetype> - %V0.<Vetype>}[%
3], %
1";
7655 [(set_attr "type" "neon_load4_one_lane")]
7658 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7659 [(set (match_operand:VSTRUCT_4Q
0 "register_operand")
7660 (unspec:VSTRUCT_4Q [
7661 (match_operand:VSTRUCT_4Q
1 "aarch64_simd_struct_operand")]
7665 if (BYTES_BIG_ENDIAN)
7667 rtx tmp = gen_reg_rtx (<MODE>mode);
7668 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7669 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7670 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[
1]));
7671 emit_insn (gen_aarch64_rev_reglist<mode> (operands[
0], tmp, mask));
7674 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[
0], operands[
1]));
7678 (define_insn "aarch64_simd_st4<vstruct_elt>"
7679 [(set (match_operand:VSTRUCT_4Q
0 "aarch64_simd_struct_operand" "=Utv")
7680 (unspec:VSTRUCT_4Q [
7681 (match_operand:VSTRUCT_4Q
1 "register_operand" "w")]
7684 "st4
\\t{%S1.<Vtype> - %V1.<Vtype>}, %
0"
7685 [(set_attr "type" "neon_store4_4reg<q>")]
7688 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7689 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7690 [(set (match_operand:BLK
0 "aarch64_simd_struct_operand" "=Utv")
7691 (unspec:BLK [(match_operand:VSTRUCT_4QD
1 "register_operand" "w")
7692 (match_operand:SI
2 "immediate_operand" "i")]
7696 operands[
2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7697 INTVAL (operands[
2]));
7698 return "st4
\\t{%S1.<Vetype> - %V1.<Vetype>}[%
2], %
0";
7700 [(set_attr "type" "neon_store4_one_lane<q>")]
7703 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7704 [(set (match_operand:VSTRUCT_4Q
0 "aarch64_simd_struct_operand")
7705 (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q
1 "register_operand")]
7709 if (BYTES_BIG_ENDIAN)
7711 rtx tmp = gen_reg_rtx (<MODE>mode);
7712 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7713 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7714 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[
1], mask));
7715 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[
0], tmp));
7718 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[
0], operands[
1]));
7722 (define_insn_and_split "aarch64_rev_reglist<mode>"
7723 [(set (match_operand:VSTRUCT_QD
0 "register_operand" "=&w")
7725 [(match_operand:VSTRUCT_QD
1 "register_operand" "w")
7726 (match_operand:V16QI
2 "register_operand" "w")]
7727 UNSPEC_REV_REGLIST))]
7730 "&& reload_completed"
7734 int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7735 for (i =
0; i < nregs; i++)
7737 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[
0]) + i);
7738 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[
1]) + i);
7739 emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[
2]));
7743 [(set_attr "type" "neon_tbl1_q")
7744 (set_attr "length" "<insn_count>")]
7747 ;; Reload patterns for AdvSIMD register list operands.
7749 (define_expand "mov<mode>"
7750 [(set (match_operand:VSTRUCT_QD
0 "nonimmediate_operand")
7751 (match_operand:VSTRUCT_QD
1 "general_operand"))]
7754 if (can_create_pseudo_p ())
7756 if (GET_CODE (operands[
0]) != REG)
7757 operands[
1] = force_reg (<MODE>mode, operands[
1]);
7761 (define_expand "mov<mode>"
7762 [(set (match_operand:VSTRUCT
0 "nonimmediate_operand")
7763 (match_operand:VSTRUCT
1 "general_operand"))]
7766 if (can_create_pseudo_p ())
7768 if (GET_CODE (operands[
0]) != REG)
7769 operands[
1] = force_reg (<MODE>mode, operands[
1]);
7773 (define_expand "movv8di"
7774 [(set (match_operand:V8DI
0 "nonimmediate_operand")
7775 (match_operand:V8DI
1 "general_operand"))]
7778 if (can_create_pseudo_p () && MEM_P (operands[
0]))
7779 operands[
1] = force_reg (V8DImode, operands[
1]);
7782 (define_expand "aarch64_ld1x3<vstruct_elt>"
7783 [(match_operand:VSTRUCT_3QD
0 "register_operand")
7784 (match_operand:DI
1 "register_operand")]
7787 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
1]);
7788 emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[
0], mem));
7792 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7793 [(set (match_operand:VSTRUCT_3QD
0 "register_operand" "=w")
7795 [(match_operand:VSTRUCT_3QD
1 "aarch64_simd_struct_operand" "Utv")]
7798 "ld1
\\t{%S0.<Vtype> - %U0.<Vtype>}, %
1"
7799 [(set_attr "type" "neon_load1_3reg<q>")]
7802 (define_expand "aarch64_ld1x4<vstruct_elt>"
7803 [(match_operand:VSTRUCT_4QD
0 "register_operand" "=w")
7804 (match_operand:DI
1 "register_operand" "r")]
7807 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
1]);
7808 emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[
0], mem));
7812 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7813 [(set (match_operand:VSTRUCT_4QD
0 "register_operand" "=w")
7815 [(match_operand:VSTRUCT_4QD
1 "aarch64_simd_struct_operand" "Utv")]
7818 "ld1
\\t{%S0.<Vtype> - %V0.<Vtype>}, %
1"
7819 [(set_attr "type" "neon_load1_4reg<q>")]
7822 (define_expand "aarch64_st1x2<vstruct_elt>"
7823 [(match_operand:DI
0 "register_operand")
7824 (match_operand:VSTRUCT_2QD
1 "register_operand")]
7827 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
0]);
7828 emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[
1]));
7832 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7833 [(set (match_operand:VSTRUCT_2QD
0 "aarch64_simd_struct_operand" "=Utv")
7835 [(match_operand:VSTRUCT_2QD
1 "register_operand" "w")]
7838 "st1
\\t{%S1.<Vtype> - %T1.<Vtype>}, %
0"
7839 [(set_attr "type" "neon_store1_2reg<q>")]
7842 (define_expand "aarch64_st1x3<vstruct_elt>"
7843 [(match_operand:DI
0 "register_operand")
7844 (match_operand:VSTRUCT_3QD
1 "register_operand")]
7847 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
0]);
7848 emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[
1]));
7852 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7853 [(set (match_operand:VSTRUCT_3QD
0 "aarch64_simd_struct_operand" "=Utv")
7855 [(match_operand:VSTRUCT_3QD
1 "register_operand" "w")]
7858 "st1
\\t{%S1.<Vtype> - %U1.<Vtype>}, %
0"
7859 [(set_attr "type" "neon_store1_3reg<q>")]
7862 (define_expand "aarch64_st1x4<vstruct_elt>"
7863 [(match_operand:DI
0 "register_operand" "")
7864 (match_operand:VSTRUCT_4QD
1 "register_operand" "")]
7867 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
0]);
7868 emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[
1]));
7872 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7873 [(set (match_operand:VSTRUCT_4QD
0 "aarch64_simd_struct_operand" "=Utv")
7875 [(match_operand:VSTRUCT_4QD
1 "register_operand" "w")]
7878 "st1
\\t{%S1.<Vtype> - %V1.<Vtype>}, %
0"
7879 [(set_attr "type" "neon_store1_4reg<q>")]
7882 (define_insn "*aarch64_mov<mode>"
7883 [(set (match_operand:VSTRUCT_QD
0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7884 (match_operand:VSTRUCT_QD
1 "aarch64_simd_general_operand" " w,w,Utv"))]
7885 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7886 && (register_operand (operands[
0], <MODE>mode)
7887 || register_operand (operands[
1], <MODE>mode))"
7890 st1
\\t{%S1.<Vtype> - %<Vendreg>
1.<Vtype>}, %
0
7891 ld1
\\t{%S0.<Vtype> - %<Vendreg>
0.<Vtype>}, %
1"
7892 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7893 neon_load<nregs>_<nregs>reg_q")
7894 (set_attr "length" "<insn_count>,
4,
4")]
7897 (define_insn "*aarch64_mov<mode>"
7898 [(set (match_operand:VSTRUCT
0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7899 (match_operand:VSTRUCT
1 "aarch64_simd_general_operand" " w,w,Utv"))]
7900 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7901 && (register_operand (operands[
0], <MODE>mode)
7902 || register_operand (operands[
1], <MODE>mode))"
7905 st1
\\t{%S1.16b - %<Vendreg>
1.16b}, %
0
7906 ld1
\\t{%S0.16b - %<Vendreg>
0.16b}, %
1"
7907 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7908 neon_load<nregs>_<nregs>reg_q")
7909 (set_attr "length" "<insn_count>,
4,
4")]
7912 (define_insn "*aarch64_movv8di"
7913 [(set (match_operand:V8DI
0 "nonimmediate_operand" "=r,m,r")
7914 (match_operand:V8DI
1 "general_operand" " r,r,m"))]
7915 "(register_operand (operands[
0], V8DImode)
7916 || register_operand (operands[
1], V8DImode))"
7918 [(set_attr "type" "multiple,multiple,multiple")
7919 (set_attr "length" "
32,
16,
16")]
7922 (define_insn "aarch64_be_ld1<mode>"
7923 [(set (match_operand:VALLDI_F16
0 "register_operand" "=w")
7924 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16
1
7925 "aarch64_simd_struct_operand" "Utv")]
7928 "ld1
\\t{%
0<Vmtype>}, %
1"
7929 [(set_attr "type" "neon_load1_1reg<q>")]
7932 (define_insn "aarch64_be_st1<mode>"
7933 [(set (match_operand:VALLDI_F16
0 "aarch64_simd_struct_operand" "=Utv")
7934 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16
1 "register_operand" "w")]
7937 "st1
\\t{%
1<Vmtype>}, %
0"
7938 [(set_attr "type" "neon_store1_1reg<q>")]
7941 (define_insn "*aarch64_be_mov<mode>"
7942 [(set (match_operand:VSTRUCT_2D
0 "nonimmediate_operand" "=w,m,w")
7943 (match_operand:VSTRUCT_2D
1 "general_operand" " w,w,m"))]
7945 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7946 && (register_operand (operands[
0], <MODE>mode)
7947 || register_operand (operands[
1], <MODE>mode))"
7952 [(set_attr "type" "multiple,neon_stp,neon_ldp")
7953 (set_attr "length" "
8,
4,
4")]
7956 (define_insn "*aarch64_be_mov<mode>"
7957 [(set (match_operand:VSTRUCT_2Q
0 "nonimmediate_operand" "=w,m,w")
7958 (match_operand:VSTRUCT_2Q
1 "general_operand" " w,w,m"))]
7960 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7961 && (register_operand (operands[
0], <MODE>mode)
7962 || register_operand (operands[
1], <MODE>mode))"
7967 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7968 (set_attr "arch" "simd,*,*")
7969 (set_attr "length" "
8,
4,
4")]
7972 (define_insn "*aarch64_be_movoi"
7973 [(set (match_operand:OI
0 "nonimmediate_operand" "=w,m,w")
7974 (match_operand:OI
1 "general_operand" " w,w,m"))]
7976 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7977 && (register_operand (operands[
0], OImode)
7978 || register_operand (operands[
1], OImode))"
7983 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7984 (set_attr "arch" "simd,*,*")
7985 (set_attr "length" "
8,
4,
4")]
7988 (define_insn "*aarch64_be_mov<mode>"
7989 [(set (match_operand:VSTRUCT_3QD
0 "nonimmediate_operand" "=w,o,w")
7990 (match_operand:VSTRUCT_3QD
1 "general_operand" " w,w,o"))]
7992 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7993 && (register_operand (operands[
0], <MODE>mode)
7994 || register_operand (operands[
1], <MODE>mode))"
7996 [(set_attr "type" "multiple")
7997 (set_attr "arch" "fp<q>,*,*")
7998 (set_attr "length" "
12,
8,
8")]
8001 (define_insn "*aarch64_be_movci"
8002 [(set (match_operand:CI
0 "nonimmediate_operand" "=w,o,w")
8003 (match_operand:CI
1 "general_operand" " w,w,o"))]
8005 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8006 && (register_operand (operands[
0], CImode)
8007 || register_operand (operands[
1], CImode))"
8009 [(set_attr "type" "multiple")
8010 (set_attr "arch" "simd,*,*")
8011 (set_attr "length" "
12,
8,
8")]
8014 (define_insn "*aarch64_be_mov<mode>"
8015 [(set (match_operand:VSTRUCT_4QD
0 "nonimmediate_operand" "=w,o,w")
8016 (match_operand:VSTRUCT_4QD
1 "general_operand" " w,w,o"))]
8018 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8019 && (register_operand (operands[
0], <MODE>mode)
8020 || register_operand (operands[
1], <MODE>mode))"
8022 [(set_attr "type" "multiple")
8023 (set_attr "arch" "fp<q>,*,*")
8024 (set_attr "length" "
16,
8,
8")]
8027 (define_insn "*aarch64_be_movxi"
8028 [(set (match_operand:XI
0 "nonimmediate_operand" "=w,o,w")
8029 (match_operand:XI
1 "general_operand" " w,w,o"))]
8031 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8032 && (register_operand (operands[
0], XImode)
8033 || register_operand (operands[
1], XImode))"
8035 [(set_attr "type" "multiple")
8036 (set_attr "arch" "simd,*,*")
8037 (set_attr "length" "
16,
8,
8")]
8041 [(set (match_operand:VSTRUCT_2QD
0 "register_operand")
8042 (match_operand:VSTRUCT_2QD
1 "register_operand"))]
8043 "TARGET_FLOAT && reload_completed"
8046 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode,
2);
8051 [(set (match_operand:OI
0 "register_operand")
8052 (match_operand:OI
1 "register_operand"))]
8053 "TARGET_FLOAT && reload_completed"
8056 aarch64_simd_emit_reg_reg_move (operands, TImode,
2);
8061 [(set (match_operand:VSTRUCT_3QD
0 "nonimmediate_operand")
8062 (match_operand:VSTRUCT_3QD
1 "general_operand"))]
8063 "TARGET_FLOAT && reload_completed"
8066 if (register_operand (operands[
0], <MODE>mode)
8067 && register_operand (operands[
1], <MODE>mode))
8069 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode,
3);
8072 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8074 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8075 machine_mode pair_mode = elt_size ==
16 ? V2x16QImode : V2x8QImode;
8076 emit_move_insn (simplify_gen_subreg (pair_mode, operands[
0],
8078 simplify_gen_subreg (pair_mode, operands[
1],
8080 emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8081 simplify_gen_subreg (<VSTRUCT_ELT>mode,
8085 gen_lowpart (<VSTRUCT_ELT>mode,
8086 simplify_gen_subreg (<VSTRUCT_ELT>mode,
8097 [(set (match_operand:CI
0 "nonimmediate_operand")
8098 (match_operand:CI
1 "general_operand"))]
8099 "TARGET_FLOAT && reload_completed"
8102 if (register_operand (operands[
0], CImode)
8103 && register_operand (operands[
1], CImode))
8105 aarch64_simd_emit_reg_reg_move (operands, TImode,
3);
8108 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8110 emit_move_insn (simplify_gen_subreg (OImode, operands[
0], CImode,
0),
8111 simplify_gen_subreg (OImode, operands[
1], CImode,
0));
8112 emit_move_insn (gen_lowpart (V16QImode,
8113 simplify_gen_subreg (TImode, operands[
0],
8115 gen_lowpart (V16QImode,
8116 simplify_gen_subreg (TImode, operands[
1],
8125 [(set (match_operand:VSTRUCT_4QD
0 "nonimmediate_operand")
8126 (match_operand:VSTRUCT_4QD
1 "general_operand"))]
8127 "TARGET_FLOAT && reload_completed"
8130 if (register_operand (operands[
0], <MODE>mode)
8131 && register_operand (operands[
1], <MODE>mode))
8133 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode,
4);
8136 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8138 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8139 machine_mode pair_mode = elt_size ==
16 ? V2x16QImode : V2x8QImode;
8140 emit_move_insn (simplify_gen_subreg (pair_mode, operands[
0],
8142 simplify_gen_subreg (pair_mode, operands[
1],
8144 emit_move_insn (simplify_gen_subreg (pair_mode, operands[
0],
8145 <MODE>mode,
2 * elt_size),
8146 simplify_gen_subreg (pair_mode, operands[
1],
8147 <MODE>mode,
2 * elt_size));
8155 [(set (match_operand:XI
0 "nonimmediate_operand")
8156 (match_operand:XI
1 "general_operand"))]
8157 "TARGET_FLOAT && reload_completed"
8160 if (register_operand (operands[
0], XImode)
8161 && register_operand (operands[
1], XImode))
8163 aarch64_simd_emit_reg_reg_move (operands, TImode,
4);
8166 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8168 emit_move_insn (simplify_gen_subreg (OImode, operands[
0], XImode,
0),
8169 simplify_gen_subreg (OImode, operands[
1], XImode,
0));
8170 emit_move_insn (simplify_gen_subreg (OImode, operands[
0], XImode,
32),
8171 simplify_gen_subreg (OImode, operands[
1], XImode,
32));
8179 [(set (match_operand:V8DI
0 "nonimmediate_operand")
8180 (match_operand:V8DI
1 "general_operand"))]
8184 if (register_operand (operands[
0], V8DImode)
8185 && register_operand (operands[
1], V8DImode))
8187 aarch64_simd_emit_reg_reg_move (operands, DImode,
8);
8190 else if ((register_operand (operands[
0], V8DImode)
8191 && memory_operand (operands[
1], V8DImode))
8192 || (memory_operand (operands[
0], V8DImode)
8193 && register_operand (operands[
1], V8DImode)))
8195 for (int offset =
0; offset <
64; offset +=
16)
8196 emit_move_insn (simplify_gen_subreg (TImode, operands[
0],
8198 simplify_gen_subreg (TImode, operands[
1],
8206 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8207 [(match_operand:VSTRUCT_QD
0 "register_operand")
8208 (match_operand:DI
1 "register_operand")]
8211 rtx mem = gen_rtx_MEM (BLKmode, operands[
1]);
8212 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8214 emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[
0], mem));
8218 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8219 [(set (match_operand:VSTRUCT_2DNX
0 "register_operand" "=w")
8220 (unspec:VSTRUCT_2DNX [
8221 (match_operand:VSTRUCT_2DNX
1 "aarch64_simd_struct_operand" "Utv")]
8224 "ld2
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
8225 [(set_attr "type" "neon_load2_2reg<q>")]
8228 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8229 [(set (match_operand:VSTRUCT_2DX
0 "register_operand" "=w")
8230 (unspec:VSTRUCT_2DX [
8231 (match_operand:VSTRUCT_2DX
1 "aarch64_simd_struct_operand" "Utv")]
8234 "ld1
\\t{%S0.1d - %T0.1d}, %
1"
8235 [(set_attr "type" "neon_load1_2reg<q>")]
8238 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8239 [(set (match_operand:VSTRUCT_3DNX
0 "register_operand" "=w")
8240 (unspec:VSTRUCT_3DNX [
8241 (match_operand:VSTRUCT_3DNX
1 "aarch64_simd_struct_operand" "Utv")]
8244 "ld3
\\t{%S0.<Vtype> - %U0.<Vtype>}, %
1"
8245 [(set_attr "type" "neon_load3_3reg<q>")]
8248 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8249 [(set (match_operand:VSTRUCT_3DX
0 "register_operand" "=w")
8250 (unspec:VSTRUCT_3DX [
8251 (match_operand:VSTRUCT_3DX
1 "aarch64_simd_struct_operand" "Utv")]
8254 "ld1
\\t{%S0.1d - %U0.1d}, %
1"
8255 [(set_attr "type" "neon_load1_3reg<q>")]
8258 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8259 [(set (match_operand:VSTRUCT_4DNX
0 "register_operand" "=w")
8260 (unspec:VSTRUCT_4DNX [
8261 (match_operand:VSTRUCT_4DNX
1 "aarch64_simd_struct_operand" "Utv")]
8264 "ld4
\\t{%S0.<Vtype> - %V0.<Vtype>}, %
1"
8265 [(set_attr "type" "neon_load4_4reg<q>")]
8268 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8269 [(set (match_operand:VSTRUCT_4DX
0 "register_operand" "=w")
8270 (unspec:VSTRUCT_4DX [
8271 (match_operand:VSTRUCT_4DX
1 "aarch64_simd_struct_operand" "Utv")]
8274 "ld1
\\t{%S0.1d - %V0.1d}, %
1"
8275 [(set_attr "type" "neon_load1_4reg<q>")]
8278 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8279 [(match_operand:VSTRUCT_D
0 "register_operand")
8280 (match_operand:DI
1 "register_operand")]
8283 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
1]);
8284 emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[
0], mem));
8288 (define_expand "aarch64_ld1<VALL_F16:mode>"
8289 [(match_operand:VALL_F16
0 "register_operand")
8290 (match_operand:DI
1 "register_operand")]
8293 machine_mode mode = <VALL_F16:MODE>mode;
8294 rtx mem = gen_rtx_MEM (mode, operands[
1]);
8296 if (BYTES_BIG_ENDIAN)
8297 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[
0], mem));
8299 emit_move_insn (operands[
0], mem);
8303 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8304 [(match_operand:VSTRUCT_Q
0 "register_operand")
8305 (match_operand:DI
1 "register_operand")]
8308 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
1]);
8309 emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[
0], mem));
8313 (define_expand "aarch64_ld1x2<vstruct_elt>"
8314 [(match_operand:VSTRUCT_2QD
0 "register_operand")
8315 (match_operand:DI
1 "register_operand")]
8318 machine_mode mode = <MODE>mode;
8319 rtx mem = gen_rtx_MEM (mode, operands[
1]);
8321 emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[
0], mem));
8325 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
8326 [(match_operand:VSTRUCT_QD
0 "register_operand")
8327 (match_operand:DI
1 "register_operand")
8328 (match_operand:VSTRUCT_QD
2 "register_operand")
8329 (match_operand:SI
3 "immediate_operand")]
8332 rtx mem = gen_rtx_MEM (BLKmode, operands[
1]);
8333 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8335 aarch64_simd_lane_bounds (operands[
3],
0,
8336 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8337 emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[
0],
8338 mem, operands[
2], operands[
3]));
8342 ;; Permuted-store expanders for neon intrinsics.
8344 ;; Permute instructions
8348 (define_expand "vec_perm<mode>"
8349 [(match_operand:VB
0 "register_operand")
8350 (match_operand:VB
1 "register_operand")
8351 (match_operand:VB
2 "register_operand")
8352 (match_operand:VB
3 "register_operand")]
8355 aarch64_expand_vec_perm (operands[
0], operands[
1],
8356 operands[
2], operands[
3], <nunits>);
8360 (define_insn "aarch64_qtbl1<mode>"
8361 [(set (match_operand:VB
0 "register_operand" "=w")
8362 (unspec:VB [(match_operand:V16QI
1 "register_operand" "w")
8363 (match_operand:VB
2 "register_operand" "w")]
8366 "tbl
\\t%
0.<Vtype>, {%
1.16b}, %
2.<Vtype>"
8367 [(set_attr "type" "neon_tbl1<q>")]
8370 (define_insn "aarch64_qtbx1<mode>"
8371 [(set (match_operand:VB
0 "register_operand" "=w")
8372 (unspec:VB [(match_operand:VB
1 "register_operand" "
0")
8373 (match_operand:V16QI
2 "register_operand" "w")
8374 (match_operand:VB
3 "register_operand" "w")]
8377 "tbx
\\t%
0.<Vtype>, {%
2.16b}, %
3.<Vtype>"
8378 [(set_attr "type" "neon_tbl1<q>")]
8381 ;; Two source registers.
8383 (define_insn "aarch64_qtbl2<mode>"
8384 [(set (match_operand:VB
0 "register_operand" "=w")
8385 (unspec:VB [(match_operand:V2x16QI
1 "register_operand" "w")
8386 (match_operand:VB
2 "register_operand" "w")]
8389 "tbl
\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8390 [(set_attr "type" "neon_tbl2")]
8393 (define_insn "aarch64_qtbx2<mode>"
8394 [(set (match_operand:VB
0 "register_operand" "=w")
8395 (unspec:VB [(match_operand:VB
1 "register_operand" "
0")
8396 (match_operand:V2x16QI
2 "register_operand" "w")
8397 (match_operand:VB
3 "register_operand" "w")]
8400 "tbx
\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8401 [(set_attr "type" "neon_tbl2")]
8404 ;; Three source registers.
8406 (define_insn "aarch64_qtbl3<mode>"
8407 [(set (match_operand:VB
0 "register_operand" "=w")
8408 (unspec:VB [(match_operand:V3x16QI
1 "register_operand" "w")
8409 (match_operand:VB
2 "register_operand" "w")]
8412 "tbl
\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8413 [(set_attr "type" "neon_tbl3")]
8416 (define_insn "aarch64_qtbx3<mode>"
8417 [(set (match_operand:VB
0 "register_operand" "=w")
8418 (unspec:VB [(match_operand:VB
1 "register_operand" "
0")
8419 (match_operand:V3x16QI
2 "register_operand" "w")
8420 (match_operand:VB
3 "register_operand" "w")]
8423 "tbx
\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8424 [(set_attr "type" "neon_tbl3")]
8427 ;; Four source registers.
8429 (define_insn "aarch64_qtbl4<mode>"
8430 [(set (match_operand:VB
0 "register_operand" "=w")
8431 (unspec:VB [(match_operand:V4x16QI
1 "register_operand" "w")
8432 (match_operand:VB
2 "register_operand" "w")]
8435 "tbl
\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8436 [(set_attr "type" "neon_tbl4")]
8439 (define_insn "aarch64_qtbx4<mode>"
8440 [(set (match_operand:VB
0 "register_operand" "=w")
8441 (unspec:VB [(match_operand:VB
1 "register_operand" "
0")
8442 (match_operand:V4x16QI
2 "register_operand" "w")
8443 (match_operand:VB
3 "register_operand" "w")]
8446 "tbx
\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8447 [(set_attr "type" "neon_tbl4")]
8450 (define_insn_and_split "aarch64_combinev16qi"
8451 [(set (match_operand:V2x16QI
0 "register_operand" "=w")
8452 (unspec:V2x16QI [(match_operand:V16QI
1 "register_operand" "w")
8453 (match_operand:V16QI
2 "register_operand" "w")]
8457 "&& reload_completed"
8460 aarch64_split_combinev16qi (operands);
8463 [(set_attr "type" "multiple")]
8466 ;; This instruction's pattern is generated directly by
8467 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8468 ;; need corresponding changes there.
8469 (define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8470 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
8471 (unspec:VALL_F16 [(match_operand:VALL_F16
1 "register_operand" "w")
8472 (match_operand:VALL_F16
2 "register_operand" "w")]
8475 "<PERMUTE:perm_insn>
\\t%
0.<Vtype>, %
1.<Vtype>, %
2.<Vtype>"
8476 [(set_attr "type" "neon_permute<q>")]
8479 ;; This instruction's pattern is generated directly by
8480 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8481 ;; need corresponding changes there. Note that the immediate (third)
8482 ;; operand is a lane index not a byte index.
8483 (define_insn "aarch64_ext<mode>"
8484 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
8485 (unspec:VALL_F16 [(match_operand:VALL_F16
1 "register_operand" "w")
8486 (match_operand:VALL_F16
2 "register_operand" "w")
8487 (match_operand:SI
3 "immediate_operand" "i")]
8491 operands[
3] = GEN_INT (INTVAL (operands[
3])
8492 * GET_MODE_UNIT_SIZE (<MODE>mode));
8493 return "ext
\\t%
0.<Vbtype>, %
1.<Vbtype>, %
2.<Vbtype>, #%
3";
8495 [(set_attr "type" "neon_ext<q>")]
8498 ;; This instruction's pattern is generated directly by
8499 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8500 ;; need corresponding changes there.
8501 (define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8502 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
8503 (unspec:VALL_F16 [(match_operand:VALL_F16
1 "register_operand" "w")]
8506 "rev<REVERSE:rev_op>
\\t%
0.<Vtype>, %
1.<Vtype>"
8507 [(set_attr "type" "neon_rev<q>")]
8510 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8511 [(set (match_operand:VSTRUCT_2DNX
0 "aarch64_simd_struct_operand" "=Utv")
8512 (unspec:VSTRUCT_2DNX [
8513 (match_operand:VSTRUCT_2DNX
1 "register_operand" "w")]
8516 "st2
\\t{%S1.<Vtype> - %T1.<Vtype>}, %
0"
8517 [(set_attr "type" "neon_store2_2reg")]
8520 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8521 [(set (match_operand:VSTRUCT_2DX
0 "aarch64_simd_struct_operand" "=Utv")
8522 (unspec:VSTRUCT_2DX [
8523 (match_operand:VSTRUCT_2DX
1 "register_operand" "w")]
8526 "st1
\\t{%S1.1d - %T1.1d}, %
0"
8527 [(set_attr "type" "neon_store1_2reg")]
8530 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8531 [(set (match_operand:VSTRUCT_3DNX
0 "aarch64_simd_struct_operand" "=Utv")
8532 (unspec:VSTRUCT_3DNX [
8533 (match_operand:VSTRUCT_3DNX
1 "register_operand" "w")]
8536 "st3
\\t{%S1.<Vtype> - %U1.<Vtype>}, %
0"
8537 [(set_attr "type" "neon_store3_3reg")]
8540 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8541 [(set (match_operand:VSTRUCT_3DX
0 "aarch64_simd_struct_operand" "=Utv")
8542 (unspec:VSTRUCT_3DX [
8543 (match_operand:VSTRUCT_3DX
1 "register_operand" "w")]
8546 "st1
\\t{%S1.1d - %U1.1d}, %
0"
8547 [(set_attr "type" "neon_store1_3reg")]
8550 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8551 [(set (match_operand:VSTRUCT_4DNX
0 "aarch64_simd_struct_operand" "=Utv")
8552 (unspec:VSTRUCT_4DNX [
8553 (match_operand:VSTRUCT_4DNX
1 "register_operand" "w")]
8556 "st4
\\t{%S1.<Vtype> - %V1.<Vtype>}, %
0"
8557 [(set_attr "type" "neon_store4_4reg")]
8560 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8561 [(set (match_operand:VSTRUCT_4DX
0 "aarch64_simd_struct_operand" "=Utv")
8562 (unspec:VSTRUCT_4DX [
8563 (match_operand:VSTRUCT_4DX
1 "register_operand" "w")]
8566 "st1
\\t{%S1.1d - %V1.1d}, %
0"
8567 [(set_attr "type" "neon_store1_4reg")]
8570 (define_expand "aarch64_st<nregs><vstruct_elt>"
8571 [(match_operand:DI
0 "register_operand")
8572 (match_operand:VSTRUCT_D
1 "register_operand")]
8575 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
0]);
8576 emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[
1]));
8580 (define_expand "aarch64_st<nregs><vstruct_elt>"
8581 [(match_operand:DI
0 "register_operand")
8582 (match_operand:VSTRUCT_Q
1 "register_operand")]
8585 rtx mem = gen_rtx_MEM (<MODE>mode, operands[
0]);
8586 emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[
1]));
8590 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
8591 [(match_operand:DI
0 "register_operand")
8592 (match_operand:VSTRUCT_QD
1 "register_operand")
8593 (match_operand:SI
2 "immediate_operand")]
8596 rtx mem = gen_rtx_MEM (BLKmode, operands[
0]);
8597 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8599 aarch64_simd_lane_bounds (operands[
2],
0,
8600 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8601 emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8602 operands[
1], operands[
2]));
8606 (define_expand "aarch64_st1<VALL_F16:mode>"
8607 [(match_operand:DI
0 "register_operand")
8608 (match_operand:VALL_F16
1 "register_operand")]
8611 machine_mode mode = <VALL_F16:MODE>mode;
8612 rtx mem = gen_rtx_MEM (mode, operands[
0]);
8614 if (BYTES_BIG_ENDIAN)
8615 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[
1]));
8617 emit_move_insn (mem, operands[
1]);
8621 ;; Standard pattern name vec_init<mode><Vel>.
8623 (define_expand "vec_init<mode><Vel>"
8624 [(match_operand:VALL_F16
0 "register_operand")
8625 (match_operand
1 "" "")]
8628 aarch64_expand_vector_init (operands[
0], operands[
1]);
8632 (define_expand "vec_init<mode><Vhalf>"
8633 [(match_operand:VQ_NO2E
0 "register_operand")
8634 (match_operand
1 "" "")]
8637 aarch64_expand_vector_init (operands[
0], operands[
1]);
8641 (define_insn "*aarch64_simd_ld1r<mode>"
8642 [(set (match_operand:VALL_F16
0 "register_operand" "=w")
8643 (vec_duplicate:VALL_F16
8644 (match_operand:<VEL>
1 "aarch64_simd_struct_operand" "Utv")))]
8646 "ld1r
\\t{%
0.<Vtype>}, %
1"
8647 [(set_attr "type" "neon_load1_all_lanes")]
8650 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8651 [(set (match_operand:VSTRUCT_2QD
0 "register_operand" "=w")
8652 (unspec:VSTRUCT_2QD [
8653 (match_operand:VSTRUCT_2QD
1 "aarch64_simd_struct_operand" "Utv")]
8656 "ld1
\\t{%S0.<Vtype> - %T0.<Vtype>}, %
1"
8657 [(set_attr "type" "neon_load1_2reg<q>")]
8661 (define_insn "@aarch64_frecpe<mode>"
8662 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
8664 [(match_operand:VHSDF_HSDF
1 "register_operand" "w")]
8667 "frecpe
\t%<v>
0<Vmtype>, %<v>
1<Vmtype>"
8668 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8671 (define_insn "aarch64_frecpx<mode>"
8672 [(set (match_operand:GPF_F16
0 "register_operand" "=w")
8673 (unspec:GPF_F16 [(match_operand:GPF_F16
1 "register_operand" "w")]
8676 "frecpx
\t%<s>
0, %<s>
1"
8677 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8680 (define_insn "@aarch64_frecps<mode>"
8681 [(set (match_operand:VHSDF_HSDF
0 "register_operand" "=w")
8683 [(match_operand:VHSDF_HSDF
1 "register_operand" "w")
8684 (match_operand:VHSDF_HSDF
2 "register_operand" "w")]
8687 "frecps
\\t%<v>
0<Vmtype>, %<v>
1<Vmtype>, %<v>
2<Vmtype>"
8688 [(set_attr "type" "neon_fp_recps_<stype><q>")]
8691 (define_insn "aarch64_urecpe<mode>"
8692 [(set (match_operand:VDQ_SI
0 "register_operand" "=w")
8693 (unspec:VDQ_SI [(match_operand:VDQ_SI
1 "register_operand" "w")]
8696 "urecpe
\\t%
0.<Vtype>, %
1.<Vtype>"
8697 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8699 ;; Standard pattern name vec_extract<mode><Vel>.
8701 (define_expand "vec_extract<mode><Vel>"
8702 [(match_operand:<VEL>
0 "aarch64_simd_nonimmediate_operand")
8703 (match_operand:VALL_F16
1 "register_operand")
8704 (match_operand:SI
2 "immediate_operand")]
8708 (gen_aarch64_get_lane<mode> (operands[
0], operands[
1], operands[
2]));
8712 ;; Extract a
64-bit vector from one half of a
128-bit vector.
8713 (define_expand "vec_extract<mode><Vhalf>"
8714 [(match_operand:<VHALF>
0 "register_operand")
8715 (match_operand:VQMOV_NO2E
1 "register_operand")
8716 (match_operand
2 "immediate_operand")]
8719 int start = INTVAL (operands[
2]);
8720 gcc_assert (start ==
0 || start ==
1);
8721 start *= <nunits> /
2;
8722 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> /
2, start,
1);
8723 emit_insn (gen_aarch64_get_half<mode> (operands[
0], operands[
1], sel));
8727 ;; Extract a single-element
64-bit vector from one half of a
128-bit vector.
8728 (define_expand "vec_extract<mode><V1half>"
8729 [(match_operand:<V1HALF>
0 "register_operand")
8730 (match_operand:VQ_2E
1 "register_operand")
8731 (match_operand
2 "immediate_operand")]
8734 /* V1DI and V1DF are rarely used by other patterns, so it should be better
8735 to hide it in a subreg destination of a normal DI or DF op. */
8736 rtx scalar0 = gen_lowpart (<VHALF>mode, operands[
0]);
8737 emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[
1], operands[
2]));
8743 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8744 [(set (match_operand:V16QI
0 "register_operand" "=w")
8747 (match_operand:V16QI
1 "register_operand" "%
0")
8748 (match_operand:V16QI
2 "register_operand" "w"))]
8751 "aes<aes_op>
\\t%
0.16b, %
2.16b"
8752 [(set_attr "type" "crypto_aese")]
8755 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8756 [(set (match_operand:V16QI
0 "register_operand" "=w")
8757 (unspec:V16QI [(match_operand:V16QI
1 "register_operand" "w")]
8760 "aes<aesmc_op>
\\t%
0.16b, %
1.16b"
8761 [(set_attr "type" "crypto_aesmc")]
8764 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8765 ;; and enforce the register dependency without scheduling or register
8766 ;; allocation messing up the order or introducing moves inbetween.
8767 ;; Mash the two together during combine.
8769 (define_insn "*aarch64_crypto_aese_fused"
8770 [(set (match_operand:V16QI
0 "register_operand" "=w")
8774 (match_operand:V16QI
1 "register_operand" "%
0")
8775 (match_operand:V16QI
2 "register_operand" "w"))]
8779 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8780 "aese
\\t%
0.16b, %
2.16b\;aesmc
\\t%
0.16b, %
0.16b"
8781 [(set_attr "type" "crypto_aese")
8782 (set_attr "length" "
8")]
8785 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8786 ;; and enforce the register dependency without scheduling or register
8787 ;; allocation messing up the order or introducing moves inbetween.
8788 ;; Mash the two together during combine.
8790 (define_insn "*aarch64_crypto_aesd_fused"
8791 [(set (match_operand:V16QI
0 "register_operand" "=w")
8795 (match_operand:V16QI
1 "register_operand" "%
0")
8796 (match_operand:V16QI
2 "register_operand" "w"))]
8800 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8801 "aesd
\\t%
0.16b, %
2.16b\;aesimc
\\t%
0.16b, %
0.16b"
8802 [(set_attr "type" "crypto_aese")
8803 (set_attr "length" "
8")]
8808 (define_insn "aarch64_crypto_sha1hsi"
8809 [(set (match_operand:SI
0 "register_operand" "=w")
8810 (unspec:SI [(match_operand:SI
1
8811 "register_operand" "w")]
8815 [(set_attr "type" "crypto_sha1_fast")]
8818 (define_insn "aarch64_crypto_sha1hv4si"
8819 [(set (match_operand:SI
0 "register_operand" "=w")
8820 (unspec:SI [(vec_select:SI (match_operand:V4SI
1 "register_operand" "w")
8821 (parallel [(const_int
0)]))]
8823 "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8825 [(set_attr "type" "crypto_sha1_fast")]
8828 (define_insn "aarch64_be_crypto_sha1hv4si"
8829 [(set (match_operand:SI
0 "register_operand" "=w")
8830 (unspec:SI [(vec_select:SI (match_operand:V4SI
1 "register_operand" "w")
8831 (parallel [(const_int
3)]))]
8833 "TARGET_SHA2 && BYTES_BIG_ENDIAN"
8835 [(set_attr "type" "crypto_sha1_fast")]
8838 (define_insn "aarch64_crypto_sha1su1v4si"
8839 [(set (match_operand:V4SI
0 "register_operand" "=w")
8840 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
8841 (match_operand:V4SI
2 "register_operand" "w")]
8844 "sha1su1
\\t%
0.4s, %
2.4s"
8845 [(set_attr "type" "crypto_sha1_fast")]
8848 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8849 [(set (match_operand:V4SI
0 "register_operand" "=w")
8850 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
8851 (match_operand:SI
2 "register_operand" "w")
8852 (match_operand:V4SI
3 "register_operand" "w")]
8855 "sha1<sha1_op>
\\t%q0, %s2, %
3.4s"
8856 [(set_attr "type" "crypto_sha1_slow")]
8859 (define_insn "aarch64_crypto_sha1su0v4si"
8860 [(set (match_operand:V4SI
0 "register_operand" "=w")
8861 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
8862 (match_operand:V4SI
2 "register_operand" "w")
8863 (match_operand:V4SI
3 "register_operand" "w")]
8866 "sha1su0
\\t%
0.4s, %
2.4s, %
3.4s"
8867 [(set_attr "type" "crypto_sha1_xor")]
8872 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8873 [(set (match_operand:V4SI
0 "register_operand" "=w")
8874 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
8875 (match_operand:V4SI
2 "register_operand" "w")
8876 (match_operand:V4SI
3 "register_operand" "w")]
8879 "sha256h<sha256_op>
\\t%q0, %q2, %
3.4s"
8880 [(set_attr "type" "crypto_sha256_slow")]
8883 (define_insn "aarch64_crypto_sha256su0v4si"
8884 [(set (match_operand:V4SI
0 "register_operand" "=w")
8885 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
8886 (match_operand:V4SI
2 "register_operand" "w")]
8889 "sha256su0
\\t%
0.4s, %
2.4s"
8890 [(set_attr "type" "crypto_sha256_fast")]
8893 (define_insn "aarch64_crypto_sha256su1v4si"
8894 [(set (match_operand:V4SI
0 "register_operand" "=w")
8895 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
8896 (match_operand:V4SI
2 "register_operand" "w")
8897 (match_operand:V4SI
3 "register_operand" "w")]
8900 "sha256su1
\\t%
0.4s, %
2.4s, %
3.4s"
8901 [(set_attr "type" "crypto_sha256_slow")]
8906 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8907 [(set (match_operand:V2DI
0 "register_operand" "=w")
8908 (unspec:V2DI [(match_operand:V2DI
1 "register_operand" "
0")
8909 (match_operand:V2DI
2 "register_operand" "w")
8910 (match_operand:V2DI
3 "register_operand" "w")]
8913 "sha512h<sha512_op>
\\t%q0, %q2, %
3.2d"
8914 [(set_attr "type" "crypto_sha512")]
8917 (define_insn "aarch64_crypto_sha512su0qv2di"
8918 [(set (match_operand:V2DI
0 "register_operand" "=w")
8919 (unspec:V2DI [(match_operand:V2DI
1 "register_operand" "
0")
8920 (match_operand:V2DI
2 "register_operand" "w")]
8923 "sha512su0
\\t%
0.2d, %
2.2d"
8924 [(set_attr "type" "crypto_sha512")]
8927 (define_insn "aarch64_crypto_sha512su1qv2di"
8928 [(set (match_operand:V2DI
0 "register_operand" "=w")
8929 (unspec:V2DI [(match_operand:V2DI
1 "register_operand" "
0")
8930 (match_operand:V2DI
2 "register_operand" "w")
8931 (match_operand:V2DI
3 "register_operand" "w")]
8934 "sha512su1
\\t%
0.2d, %
2.2d, %
3.2d"
8935 [(set_attr "type" "crypto_sha512")]
8940 (define_insn "eor3q<mode>
4"
8941 [(set (match_operand:VQ_I
0 "register_operand" "=w")
8944 (match_operand:VQ_I
2 "register_operand" "w")
8945 (match_operand:VQ_I
3 "register_operand" "w"))
8946 (match_operand:VQ_I
1 "register_operand" "w")))]
8948 "eor3
\\t%
0.16b, %
1.16b, %
2.16b, %
3.16b"
8949 [(set_attr "type" "crypto_sha3")]
8952 (define_insn "aarch64_rax1qv2di"
8953 [(set (match_operand:V2DI
0 "register_operand" "=w")
8956 (match_operand:V2DI
2 "register_operand" "w")
8958 (match_operand:V2DI
1 "register_operand" "w")))]
8960 "rax1
\\t%
0.2d, %
1.2d, %
2.2d"
8961 [(set_attr "type" "crypto_sha3")]
8964 (define_insn "aarch64_xarqv2di"
8965 [(set (match_operand:V2DI
0 "register_operand" "=w")
8968 (match_operand:V2DI
1 "register_operand" "%w")
8969 (match_operand:V2DI
2 "register_operand" "w"))
8970 (match_operand:SI
3 "aarch64_simd_shift_imm_di" "Usd")))]
8972 "xar
\\t%
0.2d, %
1.2d, %
2.2d, %
3"
8973 [(set_attr "type" "crypto_sha3")]
8976 (define_insn "bcaxq<mode>
4"
8977 [(set (match_operand:VQ_I
0 "register_operand" "=w")
8980 (not:VQ_I (match_operand:VQ_I
3 "register_operand" "w"))
8981 (match_operand:VQ_I
2 "register_operand" "w"))
8982 (match_operand:VQ_I
1 "register_operand" "w")))]
8984 "bcax
\\t%
0.16b, %
1.16b, %
2.16b, %
3.16b"
8985 [(set_attr "type" "crypto_sha3")]
8990 (define_insn "aarch64_sm3ss1qv4si"
8991 [(set (match_operand:V4SI
0 "register_operand" "=w")
8992 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "w")
8993 (match_operand:V4SI
2 "register_operand" "w")
8994 (match_operand:V4SI
3 "register_operand" "w")]
8997 "sm3ss1
\\t%
0.4s, %
1.4s, %
2.4s, %
3.4s"
8998 [(set_attr "type" "crypto_sm3")]
9002 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9003 [(set (match_operand:V4SI
0 "register_operand" "=w")
9004 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
9005 (match_operand:V4SI
2 "register_operand" "w")
9006 (match_operand:V4SI
3 "register_operand" "w")
9007 (match_operand:SI
4 "aarch64_imm2" "Ui2")]
9010 "sm3tt<sm3tt_op>
\\t%
0.4s, %
2.4s, %
3.4s[%
4]"
9011 [(set_attr "type" "crypto_sm3")]
9014 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9015 [(set (match_operand:V4SI
0 "register_operand" "=w")
9016 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
9017 (match_operand:V4SI
2 "register_operand" "w")
9018 (match_operand:V4SI
3 "register_operand" "w")]
9021 "sm3partw<sm3part_op>
\\t%
0.4s, %
2.4s, %
3.4s"
9022 [(set_attr "type" "crypto_sm3")]
9027 (define_insn "aarch64_sm4eqv4si"
9028 [(set (match_operand:V4SI
0 "register_operand" "=w")
9029 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "
0")
9030 (match_operand:V4SI
2 "register_operand" "w")]
9033 "sm4e
\\t%
0.4s, %
2.4s"
9034 [(set_attr "type" "crypto_sm4")]
9037 (define_insn "aarch64_sm4ekeyqv4si"
9038 [(set (match_operand:V4SI
0 "register_operand" "=w")
9039 (unspec:V4SI [(match_operand:V4SI
1 "register_operand" "w")
9040 (match_operand:V4SI
2 "register_operand" "w")]
9043 "sm4ekey
\\t%
0.4s, %
1.4s, %
2.4s"
9044 [(set_attr "type" "crypto_sm4")]
9049 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9050 [(set (match_operand:VDQSF
0 "register_operand")
9052 [(match_operand:VDQSF
1 "register_operand")
9053 (match_operand:<VFMLA_W>
2 "register_operand")
9054 (match_operand:<VFMLA_W>
3 "register_operand")]
9058 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9059 <nunits> *
2, false);
9060 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9061 <nunits> *
2, false);
9063 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[
0],
9072 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9073 [(set (match_operand:VDQSF
0 "register_operand")
9075 [(match_operand:VDQSF
1 "register_operand")
9076 (match_operand:<VFMLA_W>
2 "register_operand")
9077 (match_operand:<VFMLA_W>
3 "register_operand")]
9081 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> *
2, true);
9082 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> *
2, true);
9084 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[
0],
9092 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9093 [(set (match_operand:VDQSF
0 "register_operand" "=w")
9096 (vec_select:<VFMLA_SEL_W>
9097 (match_operand:<VFMLA_W>
2 "register_operand" "w")
9098 (match_operand:<VFMLA_W>
4 "vect_par_cnst_lo_half" "")))
9100 (vec_select:<VFMLA_SEL_W>
9101 (match_operand:<VFMLA_W>
3 "register_operand" "w")
9102 (match_operand:<VFMLA_W>
5 "vect_par_cnst_lo_half" "")))
9103 (match_operand:VDQSF
1 "register_operand" "
0")))]
9105 "fmlal
\\t%
0.<nunits>s, %
2.<nunits>h, %
3.<nunits>h"
9106 [(set_attr "type" "neon_fp_mul_s")]
9109 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9110 [(set (match_operand:VDQSF
0 "register_operand" "=w")
9114 (vec_select:<VFMLA_SEL_W>
9115 (match_operand:<VFMLA_W>
2 "register_operand" "w")
9116 (match_operand:<VFMLA_W>
4 "vect_par_cnst_lo_half" ""))))
9118 (vec_select:<VFMLA_SEL_W>
9119 (match_operand:<VFMLA_W>
3 "register_operand" "w")
9120 (match_operand:<VFMLA_W>
5 "vect_par_cnst_lo_half" "")))
9121 (match_operand:VDQSF
1 "register_operand" "
0")))]
9123 "fmlsl
\\t%
0.<nunits>s, %
2.<nunits>h, %
3.<nunits>h"
9124 [(set_attr "type" "neon_fp_mul_s")]
9127 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9128 [(set (match_operand:VDQSF
0 "register_operand" "=w")
9131 (vec_select:<VFMLA_SEL_W>
9132 (match_operand:<VFMLA_W>
2 "register_operand" "w")
9133 (match_operand:<VFMLA_W>
4 "vect_par_cnst_hi_half" "")))
9135 (vec_select:<VFMLA_SEL_W>
9136 (match_operand:<VFMLA_W>
3 "register_operand" "w")
9137 (match_operand:<VFMLA_W>
5 "vect_par_cnst_hi_half" "")))
9138 (match_operand:VDQSF
1 "register_operand" "
0")))]
9140 "fmlal2
\\t%
0.<nunits>s, %
2.<nunits>h, %
3.<nunits>h"
9141 [(set_attr "type" "neon_fp_mul_s")]
9144 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9145 [(set (match_operand:VDQSF
0 "register_operand" "=w")
9149 (vec_select:<VFMLA_SEL_W>
9150 (match_operand:<VFMLA_W>
2 "register_operand" "w")
9151 (match_operand:<VFMLA_W>
4 "vect_par_cnst_hi_half" ""))))
9153 (vec_select:<VFMLA_SEL_W>
9154 (match_operand:<VFMLA_W>
3 "register_operand" "w")
9155 (match_operand:<VFMLA_W>
5 "vect_par_cnst_hi_half" "")))
9156 (match_operand:VDQSF
1 "register_operand" "
0")))]
9158 "fmlsl2
\\t%
0.<nunits>s, %
2.<nunits>h, %
3.<nunits>h"
9159 [(set_attr "type" "neon_fp_mul_s")]
9162 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9163 [(set (match_operand:V2SF
0 "register_operand")
9164 (unspec:V2SF [(match_operand:V2SF
1 "register_operand")
9165 (match_operand:V4HF
2 "register_operand")
9166 (match_operand:V4HF
3 "register_operand")
9167 (match_operand:SI
4 "aarch64_imm2")]
9171 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
4, false);
9172 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
9174 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[
0],
9183 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9184 [(set (match_operand:V2SF
0 "register_operand")
9185 (unspec:V2SF [(match_operand:V2SF
1 "register_operand")
9186 (match_operand:V4HF
2 "register_operand")
9187 (match_operand:V4HF
3 "register_operand")
9188 (match_operand:SI
4 "aarch64_imm2")]
9192 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
4, true);
9193 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
9195 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[
0],
9203 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9204 [(set (match_operand:V2SF
0 "register_operand" "=w")
9208 (match_operand:V4HF
2 "register_operand" "w")
9209 (match_operand:V4HF
4 "vect_par_cnst_lo_half" "")))
9213 (match_operand:V4HF
3 "register_operand" "x")
9214 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
9215 (match_operand:V2SF
1 "register_operand" "
0")))]
9217 "fmlal
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
9218 [(set_attr "type" "neon_fp_mul_s")]
9221 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9222 [(set (match_operand:V2SF
0 "register_operand" "=w")
9227 (match_operand:V4HF
2 "register_operand" "w")
9228 (match_operand:V4HF
4 "vect_par_cnst_lo_half" ""))))
9232 (match_operand:V4HF
3 "register_operand" "x")
9233 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
9234 (match_operand:V2SF
1 "register_operand" "
0")))]
9236 "fmlsl
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
9237 [(set_attr "type" "neon_fp_mul_s")]
9240 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9241 [(set (match_operand:V2SF
0 "register_operand" "=w")
9245 (match_operand:V4HF
2 "register_operand" "w")
9246 (match_operand:V4HF
4 "vect_par_cnst_hi_half" "")))
9250 (match_operand:V4HF
3 "register_operand" "x")
9251 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
9252 (match_operand:V2SF
1 "register_operand" "
0")))]
9254 "fmlal2
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
9255 [(set_attr "type" "neon_fp_mul_s")]
9258 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9259 [(set (match_operand:V2SF
0 "register_operand" "=w")
9264 (match_operand:V4HF
2 "register_operand" "w")
9265 (match_operand:V4HF
4 "vect_par_cnst_hi_half" ""))))
9269 (match_operand:V4HF
3 "register_operand" "x")
9270 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
9271 (match_operand:V2SF
1 "register_operand" "
0")))]
9273 "fmlsl2
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
9274 [(set_attr "type" "neon_fp_mul_s")]
9277 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9278 [(set (match_operand:V4SF
0 "register_operand")
9279 (unspec:V4SF [(match_operand:V4SF
1 "register_operand")
9280 (match_operand:V8HF
2 "register_operand")
9281 (match_operand:V8HF
3 "register_operand")
9282 (match_operand:SI
4 "aarch64_lane_imm3")]
9286 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
8, false);
9287 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[
4]));
9289 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[
0],
9297 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9298 [(set (match_operand:V4SF
0 "register_operand")
9299 (unspec:V4SF [(match_operand:V4SF
1 "register_operand")
9300 (match_operand:V8HF
2 "register_operand")
9301 (match_operand:V8HF
3 "register_operand")
9302 (match_operand:SI
4 "aarch64_lane_imm3")]
9306 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
8, true);
9307 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[
4]));
9309 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[
0],
9317 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9318 [(set (match_operand:V4SF
0 "register_operand" "=w")
9322 (match_operand:V8HF
2 "register_operand" "w")
9323 (match_operand:V8HF
4 "vect_par_cnst_lo_half" "")))
9327 (match_operand:V8HF
3 "register_operand" "x")
9328 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
9329 (match_operand:V4SF
1 "register_operand" "
0")))]
9331 "fmlal
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
9332 [(set_attr "type" "neon_fp_mul_s")]
9335 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9336 [(set (match_operand:V4SF
0 "register_operand" "=w")
9341 (match_operand:V8HF
2 "register_operand" "w")
9342 (match_operand:V8HF
4 "vect_par_cnst_lo_half" ""))))
9346 (match_operand:V8HF
3 "register_operand" "x")
9347 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
9348 (match_operand:V4SF
1 "register_operand" "
0")))]
9350 "fmlsl
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
9351 [(set_attr "type" "neon_fp_mul_s")]
9354 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9355 [(set (match_operand:V4SF
0 "register_operand" "=w")
9359 (match_operand:V8HF
2 "register_operand" "w")
9360 (match_operand:V8HF
4 "vect_par_cnst_hi_half" "")))
9364 (match_operand:V8HF
3 "register_operand" "x")
9365 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
9366 (match_operand:V4SF
1 "register_operand" "
0")))]
9368 "fmlal2
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
9369 [(set_attr "type" "neon_fp_mul_s")]
9372 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9373 [(set (match_operand:V4SF
0 "register_operand" "=w")
9378 (match_operand:V8HF
2 "register_operand" "w")
9379 (match_operand:V8HF
4 "vect_par_cnst_hi_half" ""))))
9383 (match_operand:V8HF
3 "register_operand" "x")
9384 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
9385 (match_operand:V4SF
1 "register_operand" "
0")))]
9387 "fmlsl2
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
9388 [(set_attr "type" "neon_fp_mul_s")]
9391 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9392 [(set (match_operand:V2SF
0 "register_operand")
9393 (unspec:V2SF [(match_operand:V2SF
1 "register_operand")
9394 (match_operand:V4HF
2 "register_operand")
9395 (match_operand:V8HF
3 "register_operand")
9396 (match_operand:SI
4 "aarch64_lane_imm3")]
9400 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
4, false);
9401 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[
4]));
9403 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[
0],
9412 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9413 [(set (match_operand:V2SF
0 "register_operand")
9414 (unspec:V2SF [(match_operand:V2SF
1 "register_operand")
9415 (match_operand:V4HF
2 "register_operand")
9416 (match_operand:V8HF
3 "register_operand")
9417 (match_operand:SI
4 "aarch64_lane_imm3")]
9421 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
4, true);
9422 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[
4]));
9424 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[
0],
9433 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9434 [(set (match_operand:V2SF
0 "register_operand" "=w")
9438 (match_operand:V4HF
2 "register_operand" "w")
9439 (match_operand:V4HF
4 "vect_par_cnst_lo_half" "")))
9443 (match_operand:V8HF
3 "register_operand" "x")
9444 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
9445 (match_operand:V2SF
1 "register_operand" "
0")))]
9447 "fmlal
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
9448 [(set_attr "type" "neon_fp_mul_s")]
9451 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9452 [(set (match_operand:V2SF
0 "register_operand" "=w")
9457 (match_operand:V4HF
2 "register_operand" "w")
9458 (match_operand:V4HF
4 "vect_par_cnst_lo_half" ""))))
9462 (match_operand:V8HF
3 "register_operand" "x")
9463 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
9464 (match_operand:V2SF
1 "register_operand" "
0")))]
9466 "fmlsl
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
9467 [(set_attr "type" "neon_fp_mul_s")]
9470 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9471 [(set (match_operand:V2SF
0 "register_operand" "=w")
9475 (match_operand:V4HF
2 "register_operand" "w")
9476 (match_operand:V4HF
4 "vect_par_cnst_hi_half" "")))
9480 (match_operand:V8HF
3 "register_operand" "x")
9481 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
9482 (match_operand:V2SF
1 "register_operand" "
0")))]
9484 "fmlal2
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
9485 [(set_attr "type" "neon_fp_mul_s")]
9488 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9489 [(set (match_operand:V2SF
0 "register_operand" "=w")
9494 (match_operand:V4HF
2 "register_operand" "w")
9495 (match_operand:V4HF
4 "vect_par_cnst_hi_half" ""))))
9499 (match_operand:V8HF
3 "register_operand" "x")
9500 (parallel [(match_operand:SI
5 "aarch64_lane_imm3" "Ui7")]))))
9501 (match_operand:V2SF
1 "register_operand" "
0")))]
9503 "fmlsl2
\\t%
0.2s, %
2.2h, %
3.h[%
5]"
9504 [(set_attr "type" "neon_fp_mul_s")]
9507 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9508 [(set (match_operand:V4SF
0 "register_operand")
9509 (unspec:V4SF [(match_operand:V4SF
1 "register_operand")
9510 (match_operand:V8HF
2 "register_operand")
9511 (match_operand:V4HF
3 "register_operand")
9512 (match_operand:SI
4 "aarch64_imm2")]
9516 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
8, false);
9517 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
9519 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[
0],
9527 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9528 [(set (match_operand:V4SF
0 "register_operand")
9529 (unspec:V4SF [(match_operand:V4SF
1 "register_operand")
9530 (match_operand:V8HF
2 "register_operand")
9531 (match_operand:V4HF
3 "register_operand")
9532 (match_operand:SI
4 "aarch64_imm2")]
9536 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
8, true);
9537 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[
4]));
9539 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[
0],
9547 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9548 [(set (match_operand:V4SF
0 "register_operand" "=w")
9552 (match_operand:V8HF
2 "register_operand" "w")
9553 (match_operand:V8HF
4 "vect_par_cnst_lo_half" "")))
9557 (match_operand:V4HF
3 "register_operand" "x")
9558 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
9559 (match_operand:V4SF
1 "register_operand" "
0")))]
9561 "fmlal
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
9562 [(set_attr "type" "neon_fp_mul_s")]
9565 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9566 [(set (match_operand:V4SF
0 "register_operand" "=w")
9571 (match_operand:V8HF
2 "register_operand" "w")
9572 (match_operand:V8HF
4 "vect_par_cnst_lo_half" ""))))
9576 (match_operand:V4HF
3 "register_operand" "x")
9577 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
9578 (match_operand:V4SF
1 "register_operand" "
0")))]
9580 "fmlsl
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
9581 [(set_attr "type" "neon_fp_mul_s")]
9584 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9585 [(set (match_operand:V4SF
0 "register_operand" "=w")
9589 (match_operand:V8HF
2 "register_operand" "w")
9590 (match_operand:V8HF
4 "vect_par_cnst_hi_half" "")))
9594 (match_operand:V4HF
3 "register_operand" "x")
9595 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
9596 (match_operand:V4SF
1 "register_operand" "
0")))]
9598 "fmlal2
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
9599 [(set_attr "type" "neon_fp_mul_s")]
9602 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9603 [(set (match_operand:V4SF
0 "register_operand" "=w")
9608 (match_operand:V8HF
2 "register_operand" "w")
9609 (match_operand:V8HF
4 "vect_par_cnst_hi_half" ""))))
9613 (match_operand:V4HF
3 "register_operand" "x")
9614 (parallel [(match_operand:SI
5 "aarch64_imm2" "Ui2")]))))
9615 (match_operand:V4SF
1 "register_operand" "
0")))]
9617 "fmlsl2
\\t%
0.4s, %
2.4h, %
3.h[%
5]"
9618 [(set_attr "type" "neon_fp_mul_s")]
9623 (define_insn "aarch64_crypto_pmulldi"
9624 [(set (match_operand:TI
0 "register_operand" "=w")
9625 (unspec:TI [(match_operand:DI
1 "register_operand" "w")
9626 (match_operand:DI
2 "register_operand" "w")]
9629 "pmull
\\t%
0.1q, %
1.1d, %
2.1d"
9630 [(set_attr "type" "crypto_pmull")]
9633 (define_insn "aarch64_crypto_pmullv2di"
9634 [(set (match_operand:TI
0 "register_operand" "=w")
9635 (unspec:TI [(match_operand:V2DI
1 "register_operand" "w")
9636 (match_operand:V2DI
2 "register_operand" "w")]
9639 "pmull2
\\t%
0.1q, %
1.2d, %
2.2d"
9640 [(set_attr "type" "crypto_pmull")]
9643 ;; Sign- or zero-extend a
64-bit integer vector to a
128-bit vector.
9644 (define_insn "<optab><Vnarrowq><mode>
2"
9645 [(set (match_operand:VQN
0 "register_operand" "=w")
9646 (ANY_EXTEND:VQN (match_operand:<VNARROWQ>
1 "register_operand" "w")))]
9648 "<su>xtl
\t%
0.<Vtype>, %
1.<Vntype>"
9649 [(set_attr "type" "neon_shift_imm_long")]
9652 (define_expand "aarch64_<su>xtl<mode>"
9653 [(set (match_operand:VQN
0 "register_operand" "=w")
9654 (ANY_EXTEND:VQN (match_operand:<VNARROWQ>
1 "register_operand" "w")))]
9659 ;; Truncate a
128-bit integer vector to a
64-bit vector.
9660 (define_insn "trunc<mode><Vnarrowq>
2<vczle><vczbe>"
9661 [(set (match_operand:<VNARROWQ>
0 "register_operand" "=w")
9662 (truncate:<VNARROWQ> (match_operand:VQN
1 "register_operand" "w")))]
9664 "xtn
\t%
0.<Vntype>, %
1.<Vtype>"
9665 [(set_attr "type" "neon_move_narrow_q")]
9668 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9670 (define_expand "aarch64_xtn<mode>"
9671 [(set (match_operand:<VNARROWQ>
0 "register_operand")
9672 (truncate:<VNARROWQ> (match_operand:VQN
1 "register_operand")))]
9677 (define_insn "aarch64_bfdot<mode>"
9678 [(set (match_operand:VDQSF
0 "register_operand" "=w")
9681 [(match_operand:<VBFMLA_W>
2 "register_operand" "w")
9682 (match_operand:<VBFMLA_W>
3 "register_operand" "w")]
9684 (match_operand:VDQSF
1 "register_operand" "
0")))]
9686 "bfdot
\t%
0.<Vtype>, %
2.<Vbfdottype>, %
3.<Vbfdottype>"
9687 [(set_attr "type" "neon_dot<q>")]
9690 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9691 [(set (match_operand:VDQSF
0 "register_operand" "=w")
9694 [(match_operand:<VDQSF:VBFMLA_W>
2 "register_operand" "w")
9695 (match_operand:VBF
3 "register_operand" "w")
9696 (match_operand:SI
4 "const_int_operand" "n")]
9698 (match_operand:VDQSF
1 "register_operand" "
0")))]
9701 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9702 int lane = INTVAL (operands[
4]);
9703 operands[
4] = gen_int_mode (ENDIAN_LANE_N (nunits /
2, lane), SImode);
9704 return "bfdot
\t%
0.<VDQSF:Vtype>, %
2.<VDQSF:Vbfdottype>, %
3.2h[%
4]";
9706 [(set_attr "type" "neon_dot<VDQSF:q>")]
9709 ;; vget_low/high_bf16
9710 (define_expand "aarch64_vget_lo_halfv8bf"
9711 [(match_operand:V4BF
0 "register_operand")
9712 (match_operand:V8BF
1 "register_operand")]
9715 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode,
8, false);
9716 emit_insn (gen_aarch64_get_halfv8bf (operands[
0], operands[
1], p));
9720 (define_expand "aarch64_vget_hi_halfv8bf"
9721 [(match_operand:V4BF
0 "register_operand")
9722 (match_operand:V8BF
1 "register_operand")]
9725 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode,
8, true);
9726 emit_insn (gen_aarch64_get_halfv8bf (operands[
0], operands[
1], p));
9731 (define_insn "aarch64_bfmmlaqv4sf"
9732 [(set (match_operand:V4SF
0 "register_operand" "=w")
9733 (plus:V4SF (match_operand:V4SF
1 "register_operand" "
0")
9734 (unspec:V4SF [(match_operand:V8BF
2 "register_operand" "w")
9735 (match_operand:V8BF
3 "register_operand" "w")]
9738 "bfmmla
\\t%
0.4s, %
2.8h, %
3.8h"
9739 [(set_attr "type" "neon_fp_mla_s_q")]
9743 (define_insn "aarch64_bfmlal<bt>v4sf"
9744 [(set (match_operand:V4SF
0 "register_operand" "=w")
9745 (plus: V4SF (match_operand:V4SF
1 "register_operand" "
0")
9746 (unspec:V4SF [(match_operand:V8BF
2 "register_operand" "w")
9747 (match_operand:V8BF
3 "register_operand" "w")]
9750 "bfmlal<bt>
\\t%
0.4s, %
2.8h, %
3.8h"
9751 [(set_attr "type" "neon_fp_mla_s_q")]
9754 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9755 [(set (match_operand:V4SF
0 "register_operand" "=w")
9756 (plus: V4SF (match_operand:V4SF
1 "register_operand" "
0")
9757 (unspec:V4SF [(match_operand:V8BF
2 "register_operand" "w")
9758 (match_operand:VBF
3 "register_operand" "x")
9759 (match_operand:SI
4 "const_int_operand" "n")]
9763 operands[
4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[
4]));
9764 return "bfmlal<bt>
\\t%
0.4s, %
2.8h, %
3.h[%
4]";
9766 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9769 ;;
8-bit integer matrix multiply-accumulate
9770 (define_insn "aarch64_simd_<sur>mmlav16qi"
9771 [(set (match_operand:V4SI
0 "register_operand" "=w")
9773 (unspec:V4SI [(match_operand:V16QI
2 "register_operand" "w")
9774 (match_operand:V16QI
3 "register_operand" "w")] MATMUL)
9775 (match_operand:V4SI
1 "register_operand" "
0")))]
9777 "<sur>mmla
\\t%
0.4s, %
2.16b, %
3.16b"
9778 [(set_attr "type" "neon_mla_s_q")]
9782 (define_insn "aarch64_bfcvtn<q><mode>"
9783 [(set (match_operand:V4SF_TO_BF
0 "register_operand" "=w")
9784 (unspec:V4SF_TO_BF [(match_operand:V4SF
1 "register_operand" "w")]
9787 "bfcvtn
\\t%
0.4h, %
1.4s"
9788 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9791 (define_insn "aarch64_bfcvtn2v8bf"
9792 [(set (match_operand:V8BF
0 "register_operand" "=w")
9793 (unspec:V8BF [(match_operand:V8BF
1 "register_operand" "
0")
9794 (match_operand:V4SF
2 "register_operand" "w")]
9797 "bfcvtn2
\\t%
0.8h, %
2.4s"
9798 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9801 (define_insn "aarch64_bfcvtbf"
9802 [(set (match_operand:BF
0 "register_operand" "=w")
9803 (unspec:BF [(match_operand:SF
1 "register_operand" "w")]
9807 [(set_attr "type" "f_cvt")]
9810 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9811 (define_insn "aarch64_vbfcvt<mode>"
9812 [(set (match_operand:V4SF
0 "register_operand" "=w")
9813 (unspec:V4SF [(match_operand:VBF
1 "register_operand" "w")]
9816 "shll
\\t%
0.4s, %
1.4h, #
16"
9817 [(set_attr "type" "neon_shift_imm_long")]
9820 (define_insn "aarch64_vbfcvt_highv8bf"
9821 [(set (match_operand:V4SF
0 "register_operand" "=w")
9822 (unspec:V4SF [(match_operand:V8BF
1 "register_operand" "w")]
9825 "shll2
\\t%
0.4s, %
1.8h, #
16"
9826 [(set_attr "type" "neon_shift_imm_long")]
9829 (define_insn "aarch64_bfcvtsf"
9830 [(set (match_operand:SF
0 "register_operand" "=w")
9831 (unspec:SF [(match_operand:BF
1 "register_operand" "w")]
9834 "shl
\\t%d0, %d1, #
16"
9835 [(set_attr "type" "neon_shift_imm")]