]> gcc.gnu.org Git - gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
AArch64: Undo vec_widen_<sur>shiftl optabs [PR106346]
[gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2023 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; The following define_subst rules are used to produce patterns representing
22 ;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
23 ;; a vec_concat with zeroes. The order of the vec_concat operands differs
24 ;; for big-endian so we have a separate define_subst rule for each endianness.
25 (define_subst "add_vec_concat_subst_le"
26 [(set (match_operand:VDZ 0)
27 (match_operand:VDZ 1))]
28 "!BYTES_BIG_ENDIAN"
29 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
30 (vec_concat:<VDBL>
31 (match_dup 1)
32 (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
33
34 (define_subst "add_vec_concat_subst_be"
35 [(set (match_operand:VDZ 0)
36 (match_operand:VDZ 1))]
37 "BYTES_BIG_ENDIAN"
38 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
39 (vec_concat:<VDBL>
40 (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
41 (match_dup 1)))])
42
43 ;; The subst_attr definitions used to annotate patterns further in the file.
44 ;; Patterns that need to have the above substitutions added to them should
45 ;; have <vczle><vczbe> added to their name.
46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
48
49 (define_expand "mov<mode>"
50 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
51 (match_operand:VALL_F16 1 "general_operand"))]
52 "TARGET_FLOAT"
53 "
54 /* Force the operand into a register if it is not an
55 immediate whose use can be replaced with xzr.
56 If the mode is 16 bytes wide, then we will be doing
57 a stp in DI mode, so we check the validity of that.
58 If the mode is 8 bytes wide, then we will do doing a
59 normal str, so the check need not apply. */
60 if (GET_CODE (operands[0]) == MEM
61 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
62 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
63 && aarch64_mem_pair_operand (operands[0], DImode))
64 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
65 operands[1] = force_reg (<MODE>mode, operands[1]);
66
67 /* If a constant is too complex to force to memory (e.g. because it
68 contains CONST_POLY_INTs), build it up from individual elements instead.
69 We should only need to do this before RA; aarch64_legitimate_constant_p
70 should ensure that we don't try to rematerialize the constant later. */
71 if (GET_CODE (operands[1]) == CONST_VECTOR
72 && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
73 {
74 aarch64_expand_vector_init (operands[0], operands[1]);
75 DONE;
76 }
77 "
78 )
79
80 (define_expand "movmisalign<mode>"
81 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
82 (match_operand:VALL_F16 1 "general_operand"))]
83 "TARGET_FLOAT && !STRICT_ALIGNMENT"
84 {
85 /* This pattern is not permitted to fail during expansion: if both arguments
86 are non-registers (e.g. memory := constant, which can be created by the
87 auto-vectorizer), force operand 1 into a register. */
88 if (!register_operand (operands[0], <MODE>mode)
89 && !register_operand (operands[1], <MODE>mode))
90 operands[1] = force_reg (<MODE>mode, operands[1]);
91 })
92
93 (define_insn "aarch64_simd_dup<mode>"
94 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
95 (vec_duplicate:VDQ_I
96 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
97 "TARGET_SIMD"
98 "@
99 dup\\t%0.<Vtype>, %1.<Vetype>[0]
100 dup\\t%0.<Vtype>, %<vwcore>1"
101 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
102 )
103
104 (define_insn "aarch64_simd_dup<mode>"
105 [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
106 (vec_duplicate:VDQF_F16
107 (match_operand:<VEL> 1 "register_operand" "w,r")))]
108 "TARGET_SIMD"
109 "@
110 dup\\t%0.<Vtype>, %1.<Vetype>[0]
111 dup\\t%0.<Vtype>, %<vwcore>1"
112 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
113 )
114
115 (define_insn "aarch64_dup_lane<mode>"
116 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
117 (vec_duplicate:VALL_F16
118 (vec_select:<VEL>
119 (match_operand:VALL_F16 1 "register_operand" "w")
120 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
121 )))]
122 "TARGET_SIMD"
123 {
124 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
125 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
126 }
127 [(set_attr "type" "neon_dup<q>")]
128 )
129
130 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
131 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
132 (vec_duplicate:VALL_F16_NO_V2Q
133 (vec_select:<VEL>
134 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
135 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
136 )))]
137 "TARGET_SIMD"
138 {
139 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
140 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
141 }
142 [(set_attr "type" "neon_dup<q>")]
143 )
144
145 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
146 [(set (match_operand:VDMOV 0 "nonimmediate_operand"
147 "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
148 (match_operand:VDMOV 1 "general_operand"
149 "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
150 "TARGET_FLOAT
151 && (register_operand (operands[0], <MODE>mode)
152 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
153 "@
154 ldr\t%d0, %1
155 ldr\t%x0, %1
156 str\txzr, %0
157 str\t%d1, %0
158 str\t%x1, %0
159 * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
160 * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
161 fmov\t%d0, %1
162 mov\t%0, %1
163 * return aarch64_output_simd_mov_immediate (operands[1], 64);
164 fmov\t%d0, xzr"
165 [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
166 store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
167 mov_reg, neon_move<q>, f_mcr")
168 (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
169 )
170
171 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
172 [(set (match_operand:VQMOV 0 "nonimmediate_operand"
173 "=w, Umn, m, w, ?r, ?w, ?r, w, w")
174 (match_operand:VQMOV 1 "general_operand"
175 "m, Dz, w, w, w, r, r, Dn, Dz"))]
176 "TARGET_FLOAT
177 && (register_operand (operands[0], <MODE>mode)
178 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
179 "@
180 ldr\t%q0, %1
181 stp\txzr, xzr, %0
182 str\t%q1, %0
183 mov\t%0.<Vbtype>, %1.<Vbtype>
184 #
185 #
186 #
187 * return aarch64_output_simd_mov_immediate (operands[1], 128);
188 fmov\t%d0, xzr"
189 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
190 neon_logic<q>, multiple, multiple,\
191 multiple, neon_move<q>, fmov")
192 (set_attr "length" "4,4,4,4,8,8,8,4,4")
193 (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
194 )
195
196 ;; When storing lane zero we can use the normal STR and its more permissive
197 ;; addressing modes.
198
199 (define_insn "aarch64_store_lane0<mode>"
200 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
201 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
202 (parallel [(match_operand 2 "const_int_operand" "n")])))]
203 "TARGET_SIMD
204 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
205 "str\\t%<Vetype>1, %0"
206 [(set_attr "type" "neon_store1_1reg<q>")]
207 )
208
209 (define_insn "load_pair<DREG:mode><DREG2:mode>"
210 [(set (match_operand:DREG 0 "register_operand" "=w,r")
211 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
212 (set (match_operand:DREG2 2 "register_operand" "=w,r")
213 (match_operand:DREG2 3 "memory_operand" "m,m"))]
214 "TARGET_FLOAT
215 && rtx_equal_p (XEXP (operands[3], 0),
216 plus_constant (Pmode,
217 XEXP (operands[1], 0),
218 GET_MODE_SIZE (<DREG:MODE>mode)))"
219 "@
220 ldp\t%d0, %d2, %z1
221 ldp\t%x0, %x2, %z1"
222 [(set_attr "type" "neon_ldp,load_16")]
223 )
224
225 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
226 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
227 (match_operand:DREG 1 "register_operand" "w,r"))
228 (set (match_operand:DREG2 2 "memory_operand" "=m,m")
229 (match_operand:DREG2 3 "register_operand" "w,r"))]
230 "TARGET_FLOAT
231 && rtx_equal_p (XEXP (operands[2], 0),
232 plus_constant (Pmode,
233 XEXP (operands[0], 0),
234 GET_MODE_SIZE (<DREG:MODE>mode)))"
235 "@
236 stp\t%d1, %d3, %z0
237 stp\t%x1, %x3, %z0"
238 [(set_attr "type" "neon_stp,store_16")]
239 )
240
241 (define_insn "aarch64_simd_stp<mode>"
242 [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand" "=Umn,Umn")
243 (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand" "w,r")))]
244 "TARGET_SIMD"
245 "@
246 stp\\t%<Vetype>1, %<Vetype>1, %y0
247 stp\\t%<vw>1, %<vw>1, %y0"
248 [(set_attr "type" "neon_stp, store_<ldpstp_vel_sz>")]
249 )
250
251 (define_insn "load_pair<VQ:mode><VQ2:mode>"
252 [(set (match_operand:VQ 0 "register_operand" "=w")
253 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
254 (set (match_operand:VQ2 2 "register_operand" "=w")
255 (match_operand:VQ2 3 "memory_operand" "m"))]
256 "TARGET_FLOAT
257 && rtx_equal_p (XEXP (operands[3], 0),
258 plus_constant (Pmode,
259 XEXP (operands[1], 0),
260 GET_MODE_SIZE (<VQ:MODE>mode)))"
261 "ldp\\t%q0, %q2, %z1"
262 [(set_attr "type" "neon_ldp_q")]
263 )
264
265 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
266 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
267 (match_operand:VQ 1 "register_operand" "w"))
268 (set (match_operand:VQ2 2 "memory_operand" "=m")
269 (match_operand:VQ2 3 "register_operand" "w"))]
270 "TARGET_FLOAT
271 && rtx_equal_p (XEXP (operands[2], 0),
272 plus_constant (Pmode,
273 XEXP (operands[0], 0),
274 GET_MODE_SIZE (<VQ:MODE>mode)))"
275 "stp\\t%q1, %q3, %z0"
276 [(set_attr "type" "neon_stp_q")]
277 )
278
279
280 (define_split
281 [(set (match_operand:VQMOV 0 "register_operand" "")
282 (match_operand:VQMOV 1 "register_operand" ""))]
283 "TARGET_FLOAT
284 && reload_completed
285 && GP_REGNUM_P (REGNO (operands[0]))
286 && GP_REGNUM_P (REGNO (operands[1]))"
287 [(const_int 0)]
288 {
289 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
290 DONE;
291 })
292
293 (define_split
294 [(set (match_operand:VQMOV 0 "register_operand" "")
295 (match_operand:VQMOV 1 "register_operand" ""))]
296 "TARGET_FLOAT
297 && reload_completed
298 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
299 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
300 [(const_int 0)]
301 {
302 aarch64_split_simd_move (operands[0], operands[1]);
303 DONE;
304 })
305
306 (define_expand "@aarch64_split_simd_mov<mode>"
307 [(set (match_operand:VQMOV 0)
308 (match_operand:VQMOV 1))]
309 "TARGET_FLOAT"
310 {
311 rtx dst = operands[0];
312 rtx src = operands[1];
313
314 if (GP_REGNUM_P (REGNO (src)))
315 {
316 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
317 rtx src_high_part = gen_highpart (<VHALF>mode, src);
318 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
319
320 emit_move_insn (dst_low_part, src_low_part);
321 emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
322 src_high_part));
323 }
324 else
325 {
326 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
327 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
328 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
329 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
330 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
331 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
332 }
333 DONE;
334 }
335 )
336
337 (define_expand "aarch64_get_half<mode>"
338 [(set (match_operand:<VHALF> 0 "register_operand")
339 (vec_select:<VHALF>
340 (match_operand:VQMOV 1 "register_operand")
341 (match_operand 2 "ascending_int_parallel")))]
342 "TARGET_FLOAT"
343 {
344 if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
345 {
346 emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
347 DONE;
348 }
349 }
350 )
351
352 (define_expand "aarch64_get_low<mode>"
353 [(match_operand:<VHALF> 0 "register_operand")
354 (match_operand:VQMOV 1 "register_operand")]
355 "TARGET_FLOAT"
356 {
357 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
358 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
359 DONE;
360 }
361 )
362
363 (define_expand "aarch64_get_high<mode>"
364 [(match_operand:<VHALF> 0 "register_operand")
365 (match_operand:VQMOV 1 "register_operand")]
366 "TARGET_FLOAT"
367 {
368 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
369 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
370 DONE;
371 }
372 )
373
374 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
375 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
376 (vec_select:<VHALF>
377 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
378 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
379 "TARGET_SIMD"
380 "@
381 #
382 umov\t%0, %1.d[0]"
383 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
384 [(set (match_dup 0) (match_dup 1))]
385 {
386 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
387 }
388 [(set_attr "type" "mov_reg,neon_to_gp<q>")
389 (set_attr "length" "4")]
390 )
391
392 (define_insn "aarch64_simd_mov_from_<mode>high"
393 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
394 (vec_select:<VHALF>
395 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w")
396 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
397 "TARGET_FLOAT"
398 "@
399 dup\t%d0, %1.d[1]
400 umov\t%0, %1.d[1]
401 fmov\t%0, %1.d[1]"
402 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc")
403 (set_attr "arch" "simd,simd,*")
404 (set_attr "length" "4")]
405 )
406
407 (define_insn "orn<mode>3<vczle><vczbe>"
408 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
409 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
410 (match_operand:VDQ_I 2 "register_operand" "w")))]
411 "TARGET_SIMD"
412 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
413 [(set_attr "type" "neon_logic<q>")]
414 )
415
416 (define_insn "bic<mode>3<vczle><vczbe>"
417 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
418 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
419 (match_operand:VDQ_I 2 "register_operand" "w")))]
420 "TARGET_SIMD"
421 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
422 [(set_attr "type" "neon_logic<q>")]
423 )
424
425 (define_insn "add<mode>3<vczle><vczbe>"
426 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
427 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
428 (match_operand:VDQ_I 2 "register_operand" "w")))]
429 "TARGET_SIMD"
430 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
431 [(set_attr "type" "neon_add<q>")]
432 )
433
434 (define_insn "sub<mode>3<vczle><vczbe>"
435 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
436 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
437 (match_operand:VDQ_I 2 "register_operand" "w")))]
438 "TARGET_SIMD"
439 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
440 [(set_attr "type" "neon_sub<q>")]
441 )
442
443 (define_insn "mul<mode>3<vczle><vczbe>"
444 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
445 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
446 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
447 "TARGET_SIMD"
448 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
449 [(set_attr "type" "neon_mul_<Vetype><q>")]
450 )
451
452 ;; Advanced SIMD does not support vector DImode MUL, but SVE does.
453 ;; Make use of the overlap between Z and V registers to implement the V2DI
454 ;; optab for TARGET_SVE. The mulvnx2di3 expander can
455 ;; handle the TARGET_SVE2 case transparently.
456 (define_expand "mulv2di3"
457 [(set (match_operand:V2DI 0 "register_operand")
458 (mult:V2DI (match_operand:V2DI 1 "register_operand")
459 (match_operand:V2DI 2 "aarch64_sve_vsm_operand")))]
460 "TARGET_SVE"
461 {
462 machine_mode sve_mode = VNx2DImode;
463 rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], V2DImode, 0);
464 rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], V2DImode, 0);
465 rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], V2DImode, 0);
466
467 emit_insn (gen_mulvnx2di3 (sve_op0, sve_op1, sve_op2));
468 DONE;
469 }
470 )
471
472 (define_insn "bswap<mode>2"
473 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
474 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
475 "TARGET_SIMD"
476 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
477 [(set_attr "type" "neon_rev<q>")]
478 )
479
480 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
481 [(set (match_operand:VB 0 "register_operand" "=w")
482 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
483 UNSPEC_RBIT))]
484 "TARGET_SIMD"
485 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
486 [(set_attr "type" "neon_rbit")]
487 )
488
489 (define_expand "ctz<mode>2"
490 [(set (match_operand:VS 0 "register_operand")
491 (ctz:VS (match_operand:VS 1 "register_operand")))]
492 "TARGET_SIMD"
493 {
494 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
495 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
496 <MODE>mode, 0);
497 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
498 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
499 DONE;
500 }
501 )
502
503 (define_expand "xorsign<mode>3"
504 [(match_operand:VHSDF 0 "register_operand")
505 (match_operand:VHSDF 1 "register_operand")
506 (match_operand:VHSDF 2 "register_operand")]
507 "TARGET_SIMD"
508 {
509
510 machine_mode imode = <V_INT_EQUIV>mode;
511 rtx v_bitmask = gen_reg_rtx (imode);
512 rtx op1x = gen_reg_rtx (imode);
513 rtx op2x = gen_reg_rtx (imode);
514
515 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
516 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
517
518 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
519
520 emit_move_insn (v_bitmask,
521 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
522 HOST_WIDE_INT_M1U << bits));
523
524 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
525 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
526 emit_move_insn (operands[0],
527 lowpart_subreg (<MODE>mode, op1x, imode));
528 DONE;
529 }
530 )
531
532 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
533 ;; fact that their usage need to guarantee that the source vectors are
534 ;; contiguous. It would be wrong to describe the operation without being able
535 ;; to describe the permute that is also required, but even if that is done
536 ;; the permute would have been created as a LOAD_LANES which means the values
537 ;; in the registers are in the wrong order.
538 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
539 [(set (match_operand:VHSDF 0 "register_operand" "=w")
540 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
541 (match_operand:VHSDF 2 "register_operand" "w")]
542 FCADD))]
543 "TARGET_COMPLEX"
544 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
545 [(set_attr "type" "neon_fcadd")]
546 )
547
548 (define_expand "cadd<rot><mode>3"
549 [(set (match_operand:VHSDF 0 "register_operand")
550 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
551 (match_operand:VHSDF 2 "register_operand")]
552 FCADD))]
553 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
554 )
555
556 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
557 [(set (match_operand:VHSDF 0 "register_operand" "=w")
558 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
559 (match_operand:VHSDF 3 "register_operand" "w")]
560 FCMLA)
561 (match_operand:VHSDF 1 "register_operand" "0")))]
562 "TARGET_COMPLEX"
563 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
564 [(set_attr "type" "neon_fcmla")]
565 )
566
567
568 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
569 [(set (match_operand:VHSDF 0 "register_operand" "=w")
570 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
571 (match_operand:VHSDF 3 "register_operand" "w")
572 (match_operand:SI 4 "const_int_operand" "n")]
573 FCMLA)
574 (match_operand:VHSDF 1 "register_operand" "0")))]
575 "TARGET_COMPLEX"
576 {
577 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
578 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
579 }
580 [(set_attr "type" "neon_fcmla")]
581 )
582
583 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
584 [(set (match_operand:V4HF 0 "register_operand" "=w")
585 (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
586 (match_operand:V8HF 3 "register_operand" "w")
587 (match_operand:SI 4 "const_int_operand" "n")]
588 FCMLA)
589 (match_operand:V4HF 1 "register_operand" "0")))]
590 "TARGET_COMPLEX"
591 {
592 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
593 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
594 }
595 [(set_attr "type" "neon_fcmla")]
596 )
597
598 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
599 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
600 (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
601 (match_operand:<VHALF> 3 "register_operand" "w")
602 (match_operand:SI 4 "const_int_operand" "n")]
603 FCMLA)
604 (match_operand:VQ_HSF 1 "register_operand" "0")))]
605 "TARGET_COMPLEX"
606 {
607 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
608 operands[4]
609 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
610 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
611 }
612 [(set_attr "type" "neon_fcmla")]
613 )
614
615 ;; The complex mla/mls operations always need to expand to two instructions.
616 ;; The first operation does half the computation and the second does the
617 ;; remainder. Because of this, expand early.
618 (define_expand "cml<fcmac1><conj_op><mode>4"
619 [(set (match_operand:VHSDF 0 "register_operand")
620 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
621 (match_operand:VHSDF 2 "register_operand")]
622 FCMLA_OP)
623 (match_operand:VHSDF 3 "register_operand")))]
624 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
625 {
626 rtx tmp = gen_reg_rtx (<MODE>mode);
627 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
628 operands[2], operands[1]));
629 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
630 operands[2], operands[1]));
631 DONE;
632 })
633
634 ;; The complex mul operations always need to expand to two instructions.
635 ;; The first operation does half the computation and the second does the
636 ;; remainder. Because of this, expand early.
637 (define_expand "cmul<conj_op><mode>3"
638 [(set (match_operand:VHSDF 0 "register_operand")
639 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
640 (match_operand:VHSDF 2 "register_operand")]
641 FCMUL_OP))]
642 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
643 {
644 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
645 rtx res1 = gen_reg_rtx (<MODE>mode);
646 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
647 operands[2], operands[1]));
648 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
649 operands[2], operands[1]));
650 DONE;
651 })
652
653 ;; These expands map to the Dot Product optab the vectorizer checks for
654 ;; and to the intrinsics patttern.
655 ;; The auto-vectorizer expects a dot product builtin that also does an
656 ;; accumulation into the provided register.
657 ;; Given the following pattern
658 ;;
659 ;; for (i=0; i<len; i++) {
660 ;; c = a[i] * b[i];
661 ;; r += c;
662 ;; }
663 ;; return result;
664 ;;
665 ;; This can be auto-vectorized to
666 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
667 ;;
668 ;; given enough iterations. However the vectorizer can keep unrolling the loop
669 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
670 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
671 ;; ...
672 ;;
673 ;; and so the vectorizer provides r, in which the result has to be accumulated.
674 (define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
675 [(set (match_operand:VS 0 "register_operand" "=w")
676 (plus:VS
677 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
678 (match_operand:<VSI2QI> 2 "register_operand" "w")]
679 DOTPROD)
680 (match_operand:VS 3 "register_operand" "0")))]
681 "TARGET_DOTPROD"
682 "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
683 [(set_attr "type" "neon_dot<q>")]
684 )
685
686 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
687 ;; (vector) Dot Product operation and the vectorized optab.
688 (define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
689 [(set (match_operand:VS 0 "register_operand" "=w")
690 (plus:VS
691 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
692 (match_operand:<VSI2QI> 2 "register_operand" "w")]
693 UNSPEC_USDOT)
694 (match_operand:VS 3 "register_operand" "0")))]
695 "TARGET_I8MM"
696 "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
697 [(set_attr "type" "neon_dot<q>")]
698 )
699
700 ;; These instructions map to the __builtins for the Dot Product
701 ;; indexed operations.
702 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
703 [(set (match_operand:VS 0 "register_operand" "=w")
704 (plus:VS
705 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
706 (match_operand:V8QI 3 "register_operand" "<h_con>")
707 (match_operand:SI 4 "immediate_operand" "i")]
708 DOTPROD)
709 (match_operand:VS 1 "register_operand" "0")))]
710 "TARGET_DOTPROD"
711 {
712 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
713 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
714 }
715 [(set_attr "type" "neon_dot<q>")]
716 )
717
718 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
719 [(set (match_operand:VS 0 "register_operand" "=w")
720 (plus:VS
721 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
722 (match_operand:V16QI 3 "register_operand" "<h_con>")
723 (match_operand:SI 4 "immediate_operand" "i")]
724 DOTPROD)
725 (match_operand:VS 1 "register_operand" "0")))]
726 "TARGET_DOTPROD"
727 {
728 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
729 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
730 }
731 [(set_attr "type" "neon_dot<q>")]
732 )
733
734 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
735 ;; (by element) Dot Product operations.
736 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
737 [(set (match_operand:VS 0 "register_operand" "=w")
738 (plus:VS
739 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
740 (match_operand:VB 3 "register_operand" "w")
741 (match_operand:SI 4 "immediate_operand" "i")]
742 DOTPROD_I8MM)
743 (match_operand:VS 1 "register_operand" "0")))]
744 "TARGET_I8MM"
745 {
746 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
747 int lane = INTVAL (operands[4]);
748 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
749 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
750 }
751 [(set_attr "type" "neon_dot<VS:q>")]
752 )
753
754 (define_expand "copysign<mode>3"
755 [(match_operand:VHSDF 0 "register_operand")
756 (match_operand:VHSDF 1 "register_operand")
757 (match_operand:VHSDF 2 "register_operand")]
758 "TARGET_SIMD"
759 {
760 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
761 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
762
763 emit_move_insn (v_bitmask,
764 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
765 HOST_WIDE_INT_M1U << bits));
766 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
767 operands[2], operands[1]));
768 DONE;
769 }
770 )
771
772 (define_insn "mul_lane<mode>3"
773 [(set (match_operand:VMULD 0 "register_operand" "=w")
774 (mult:VMULD
775 (vec_duplicate:VMULD
776 (vec_select:<VEL>
777 (match_operand:<VCOND> 2 "register_operand" "<h_con>")
778 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
779 (match_operand:VMULD 1 "register_operand" "w")))]
780 "TARGET_SIMD"
781 {
782 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
783 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
784 }
785 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
786 )
787
788 (define_insn "mul_laneq<mode>3"
789 [(set (match_operand:VMUL 0 "register_operand" "=w")
790 (mult:VMUL
791 (vec_duplicate:VMUL
792 (vec_select:<VEL>
793 (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
794 (parallel [(match_operand:SI 3 "immediate_operand")])))
795 (match_operand:VMUL 1 "register_operand" "w")))]
796 "TARGET_SIMD"
797 {
798 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
799 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
800 }
801 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
802 )
803
804 (define_insn "mul_n<mode>3"
805 [(set (match_operand:VMUL 0 "register_operand" "=w")
806 (mult:VMUL
807 (vec_duplicate:VMUL
808 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
809 (match_operand:VMUL 1 "register_operand" "w")))]
810 "TARGET_SIMD"
811 "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
812 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
813 )
814
815 (define_insn "@aarch64_rsqrte<mode>"
816 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
817 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
818 UNSPEC_RSQRTE))]
819 "TARGET_SIMD"
820 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
821 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
822
823 (define_insn "@aarch64_rsqrts<mode>"
824 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
825 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
826 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
827 UNSPEC_RSQRTS))]
828 "TARGET_SIMD"
829 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
830 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
831
832 (define_expand "rsqrt<mode>2"
833 [(set (match_operand:VALLF 0 "register_operand")
834 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
835 UNSPEC_RSQRT))]
836 "TARGET_SIMD"
837 {
838 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
839 DONE;
840 })
841
842 (define_insn "aarch64_ursqrte<mode>"
843 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
844 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
845 UNSPEC_RSQRTE))]
846 "TARGET_SIMD"
847 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
848 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
849
850 (define_insn "*aarch64_mul3_elt_to_64v2df"
851 [(set (match_operand:DF 0 "register_operand" "=w")
852 (mult:DF
853 (vec_select:DF
854 (match_operand:V2DF 1 "register_operand" "w")
855 (parallel [(match_operand:SI 2 "immediate_operand")]))
856 (match_operand:DF 3 "register_operand" "w")))]
857 "TARGET_SIMD"
858 {
859 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
860 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
861 }
862 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
863 )
864
865 (define_insn "neg<mode>2<vczle><vczbe>"
866 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
867 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
868 "TARGET_SIMD"
869 "neg\t%0.<Vtype>, %1.<Vtype>"
870 [(set_attr "type" "neon_neg<q>")]
871 )
872
873 (define_insn "abs<mode>2<vczle><vczbe>"
874 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
875 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
876 "TARGET_SIMD"
877 "abs\t%0.<Vtype>, %1.<Vtype>"
878 [(set_attr "type" "neon_abs<q>")]
879 )
880
881 ;; The intrinsic version of integer ABS must not be allowed to
882 ;; combine with any operation with an integrated ABS step, such
883 ;; as SABD.
884 (define_insn "aarch64_abs<mode><vczle><vczbe>"
885 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
886 (unspec:VSDQ_I_DI
887 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
888 UNSPEC_ABS))]
889 "TARGET_SIMD"
890 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
891 [(set_attr "type" "neon_abs<q>")]
892 )
893
894 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
895 ;; This isn't accurate as ABS treats always its input as a signed value.
896 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
897 ;; Whereas SABD would return 192 (-64 signed) on the above example.
898 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
899 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
900 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
901 (minus:VDQ_BHSI
902 (USMAX:VDQ_BHSI
903 (match_operand:VDQ_BHSI 1 "register_operand" "w")
904 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
905 (<max_opp>:VDQ_BHSI
906 (match_dup 1)
907 (match_dup 2))))]
908 "TARGET_SIMD"
909 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
910 [(set_attr "type" "neon_abd<q>")]
911 )
912
913 (define_expand "<su>abd<mode>3"
914 [(match_operand:VDQ_BHSI 0 "register_operand")
915 (USMAX:VDQ_BHSI
916 (match_operand:VDQ_BHSI 1 "register_operand")
917 (match_operand:VDQ_BHSI 2 "register_operand"))]
918 "TARGET_SIMD"
919 {
920 emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));
921 DONE;
922 }
923 )
924
925 (define_insn "aarch64_<su>abdl<mode>"
926 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
927 (zero_extend:<VWIDE>
928 (minus:VD_BHSI
929 (USMAX:VD_BHSI
930 (match_operand:VD_BHSI 1 "register_operand" "w")
931 (match_operand:VD_BHSI 2 "register_operand" "w"))
932 (<max_opp>:VD_BHSI
933 (match_dup 1)
934 (match_dup 2)))))]
935 "TARGET_SIMD"
936 "<su>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
937 [(set_attr "type" "neon_abd<q>")]
938 )
939
940 (define_insn "aarch64_<su>abdl2<mode>_insn"
941 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
942 (zero_extend:<VDBLW>
943 (minus:<VHALF>
944 (USMAX:<VHALF>
945 (vec_select:<VHALF>
946 (match_operand:VQW 1 "register_operand" "w")
947 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))
948 (vec_select:<VHALF>
949 (match_operand:VQW 2 "register_operand" "w")
950 (match_dup 3)))
951 (<max_opp>:<VHALF>
952 (vec_select:<VHALF>
953 (match_dup 1)
954 (match_dup 3))
955 (vec_select:<VHALF>
956 (match_dup 2)
957 (match_dup 3))))))]
958
959 "TARGET_SIMD"
960 "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
961 [(set_attr "type" "neon_abd<q>")]
962 )
963
964 (define_expand "aarch64_<su>abdl2<mode>"
965 [(match_operand:<VDBLW> 0 "register_operand")
966 (USMAX:VQW
967 (match_operand:VQW 1 "register_operand")
968 (match_operand:VQW 2 "register_operand"))]
969 "TARGET_SIMD"
970 {
971 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
972 emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[0], operands[1],
973 operands[2], hi));
974 DONE;
975 }
976 )
977
978 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
979 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
980 (abs:<VWIDE>
981 (minus:<VWIDE>
982 (ANY_EXTEND:<VWIDE>
983 (vec_select:<VHALF>
984 (match_operand:VQW 1 "register_operand" "w")
985 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
986 (ANY_EXTEND:<VWIDE>
987 (vec_select:<VHALF>
988 (match_operand:VQW 2 "register_operand" "w")
989 (match_dup 3))))))]
990 "TARGET_SIMD"
991 "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
992 [(set_attr "type" "neon_abd_long")]
993 )
994
995 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
996 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
997 (abs:<VWIDE>
998 (minus:<VWIDE>
999 (ANY_EXTEND:<VWIDE>
1000 (vec_select:<VHALF>
1001 (match_operand:VQW 1 "register_operand" "w")
1002 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1003 (ANY_EXTEND:<VWIDE>
1004 (vec_select:<VHALF>
1005 (match_operand:VQW 2 "register_operand" "w")
1006 (match_dup 3))))))]
1007 "TARGET_SIMD"
1008 "<su>abdl\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1009 [(set_attr "type" "neon_abd_long")]
1010 )
1011
1012 (define_expand "vec_widen_<su>abd_hi_<mode>"
1013 [(match_operand:<VWIDE> 0 "register_operand")
1014 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1015 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1016 "TARGET_SIMD"
1017 {
1018 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1019 emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[0], operands[1],
1020 operands[2], p));
1021 DONE;
1022 }
1023 )
1024
1025 (define_expand "vec_widen_<su>abd_lo_<mode>"
1026 [(match_operand:<VWIDE> 0 "register_operand")
1027 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1028 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1029 "TARGET_SIMD"
1030 {
1031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1032 emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[0], operands[1],
1033 operands[2], p));
1034 DONE;
1035 }
1036 )
1037
1038 (define_insn "aarch64_<su>abal<mode>"
1039 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1040 (plus:<VWIDE>
1041 (zero_extend:<VWIDE>
1042 (minus:VD_BHSI
1043 (USMAX:VD_BHSI
1044 (match_operand:VD_BHSI 2 "register_operand" "w")
1045 (match_operand:VD_BHSI 3 "register_operand" "w"))
1046 (<max_opp>:VD_BHSI
1047 (match_dup 2)
1048 (match_dup 3))))
1049 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1050 "TARGET_SIMD"
1051 "<su>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1052 [(set_attr "type" "neon_arith_acc<q>")]
1053 )
1054
1055 (define_insn "aarch64_<su>abal2<mode>_insn"
1056 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1057 (plus:<VDBLW>
1058 (zero_extend:<VDBLW>
1059 (minus:<VHALF>
1060 (USMAX:<VHALF>
1061 (vec_select:<VHALF>
1062 (match_operand:VQW 2 "register_operand" "w")
1063 (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
1064 (vec_select:<VHALF>
1065 (match_operand:VQW 3 "register_operand" "w")
1066 (match_dup 4)))
1067 (<max_opp>:<VHALF>
1068 (vec_select:<VHALF>
1069 (match_dup 2)
1070 (match_dup 4))
1071 (vec_select:<VHALF>
1072 (match_dup 3)
1073 (match_dup 4)))))
1074 (match_operand:<VDBLW> 1 "register_operand" "0")))]
1075 "TARGET_SIMD"
1076 "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1077 [(set_attr "type" "neon_arith_acc<q>")]
1078 )
1079
1080 (define_expand "aarch64_<su>abal2<mode>"
1081 [(match_operand:<VDBLW> 0 "register_operand")
1082 (match_operand:<VDBLW> 1 "register_operand")
1083 (USMAX:VQW
1084 (match_operand:VQW 2 "register_operand")
1085 (match_operand:VQW 3 "register_operand"))]
1086 "TARGET_SIMD"
1087 {
1088 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1089 emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
1090 operands[2], operands[3], hi));
1091 DONE;
1092 }
1093 )
1094
1095 (define_expand "aarch64_<su>adalp<mode>"
1096 [(set (match_operand:<VDBLW> 0 "register_operand")
1097 (plus:<VDBLW>
1098 (plus:<VDBLW>
1099 (vec_select:<VDBLW>
1100 (ANY_EXTEND:<V2XWIDE>
1101 (match_operand:VDQV_L 2 "register_operand"))
1102 (match_dup 3))
1103 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1104 (match_dup 4)))
1105 (match_operand:<VDBLW> 1 "register_operand")))]
1106 "TARGET_SIMD"
1107 {
1108 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
1109 operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
1110 operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
1111 }
1112 )
1113
1114 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1115 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1116 (plus:<VDBLW>
1117 (plus:<VDBLW>
1118 (vec_select:<VDBLW>
1119 (ANY_EXTEND:<V2XWIDE>
1120 (match_operand:VDQV_L 2 "register_operand" "w"))
1121 (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
1122 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1123 (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
1124 (match_operand:<VDBLW> 1 "register_operand" "0")))]
1125 "TARGET_SIMD
1126 && !rtx_equal_p (operands[3], operands[4])"
1127 "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
1128 [(set_attr "type" "neon_reduc_add<q>")]
1129 )
1130
1131 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1132 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
1133 ;; reduction of the difference into a V4SI vector and accumulate that into
1134 ;; operand 3 before copying that into the result operand 0.
1135 ;; Perform that with a sequence of:
1136 ;; UABDL2 tmp.8h, op1.16b, op2.16b
1137 ;; UABAL tmp.8h, op1.8b, op2.8b
1138 ;; UADALP op3.4s, tmp.8h
1139 ;; MOV op0, op3 // should be eliminated in later passes.
1140 ;;
1141 ;; For TARGET_DOTPROD we do:
1142 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
1143 ;; UABD tmp2.16b, op1.16b, op2.16b
1144 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1145 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
1146 ;;
1147 ;; The signed version just uses the signed variants of the above instructions
1148 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1149 ;; unsigned.
1150
1151 (define_expand "<su>sadv16qi"
1152 [(use (match_operand:V4SI 0 "register_operand"))
1153 (USMAX:V16QI (match_operand:V16QI 1 "register_operand")
1154 (match_operand:V16QI 2 "register_operand"))
1155 (use (match_operand:V4SI 3 "register_operand"))]
1156 "TARGET_SIMD"
1157 {
1158 if (TARGET_DOTPROD)
1159 {
1160 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1161 rtx abd = gen_reg_rtx (V16QImode);
1162 emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
1163 emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
1164 DONE;
1165 }
1166 rtx reduc = gen_reg_rtx (V8HImode);
1167 emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[1],
1168 operands[2]));
1169 emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1170 gen_lowpart (V8QImode, operands[1]),
1171 gen_lowpart (V8QImode,
1172 operands[2])));
1173 emit_insn (gen_aarch64_<su>adalpv8hi (operands[3], operands[3], reduc));
1174 emit_move_insn (operands[0], operands[3]);
1175 DONE;
1176 }
1177 )
1178
1179 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1180 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1181 (plus:VDQ_BHSI (minus:VDQ_BHSI
1182 (USMAX:VDQ_BHSI
1183 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1184 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1185 (<max_opp>:VDQ_BHSI
1186 (match_dup 2)
1187 (match_dup 3)))
1188 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1189 "TARGET_SIMD"
1190 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1191 [(set_attr "type" "neon_arith_acc<q>")]
1192 )
1193
1194 (define_insn "fabd<mode>3<vczle><vczbe>"
1195 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
1196 (abs:VHSDF_HSDF
1197 (minus:VHSDF_HSDF
1198 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
1199 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1200 "TARGET_SIMD"
1201 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
1202 [(set_attr "type" "neon_fp_abd_<stype><q>")]
1203 )
1204
1205 ;; For AND (vector, register) and BIC (vector, immediate)
1206 (define_insn "and<mode>3<vczle><vczbe>"
1207 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1208 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1209 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
1210 "TARGET_SIMD"
1211 "@
1212 and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1213 * return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,\
1214 AARCH64_CHECK_BIC);"
1215 [(set_attr "type" "neon_logic<q>")]
1216 )
1217
1218 ;; For ORR (vector, register) and ORR (vector, immediate)
1219 (define_insn "ior<mode>3<vczle><vczbe>"
1220 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1221 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1222 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
1223 "TARGET_SIMD"
1224 "@
1225 orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1226 * return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,\
1227 AARCH64_CHECK_ORR);"
1228 [(set_attr "type" "neon_logic<q>")]
1229 )
1230
1231 (define_insn "xor<mode>3<vczle><vczbe>"
1232 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1233 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1234 (match_operand:VDQ_I 2 "register_operand" "w")))]
1235 "TARGET_SIMD"
1236 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1237 [(set_attr "type" "neon_logic<q>")]
1238 )
1239
1240 (define_insn "one_cmpl<mode>2<vczle><vczbe>"
1241 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1242 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1243 "TARGET_SIMD"
1244 "not\t%0.<Vbtype>, %1.<Vbtype>"
1245 [(set_attr "type" "neon_logic<q>")]
1246 )
1247
1248 (define_insn "aarch64_simd_vec_set<mode>"
1249 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1250 (vec_merge:VALL_F16
1251 (vec_duplicate:VALL_F16
1252 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1253 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1254 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1255 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1256 {
1257 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1258 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1259 switch (which_alternative)
1260 {
1261 case 0:
1262 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1263 case 1:
1264 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1265 case 2:
1266 return "ld1\\t{%0.<Vetype>}[%p2], %1";
1267 default:
1268 gcc_unreachable ();
1269 }
1270 }
1271 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1272 )
1273
1274 (define_insn "aarch64_simd_vec_set_zero<mode>"
1275 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1276 (vec_merge:VALL_F16
1277 (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
1278 (match_operand:VALL_F16 3 "register_operand" "0")
1279 (match_operand:SI 2 "immediate_operand" "i")))]
1280 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1281 {
1282 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1283 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1284 return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
1285 }
1286 )
1287
1288 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1289 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1290 (vec_merge:VALL_F16
1291 (vec_duplicate:VALL_F16
1292 (vec_select:<VEL>
1293 (match_operand:VALL_F16 3 "register_operand" "w")
1294 (parallel
1295 [(match_operand:SI 4 "immediate_operand" "i")])))
1296 (match_operand:VALL_F16 1 "register_operand" "0")
1297 (match_operand:SI 2 "immediate_operand" "i")))]
1298 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1299 {
1300 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1301 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1302 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1303
1304 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1305 }
1306 [(set_attr "type" "neon_ins<q>")]
1307 )
1308
1309 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1310 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1311 (vec_merge:VALL_F16_NO_V2Q
1312 (vec_duplicate:VALL_F16_NO_V2Q
1313 (vec_select:<VEL>
1314 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1315 (parallel
1316 [(match_operand:SI 4 "immediate_operand" "i")])))
1317 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1318 (match_operand:SI 2 "immediate_operand" "i")))]
1319 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1320 {
1321 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1322 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1323 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1324 INTVAL (operands[4]));
1325
1326 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1327 }
1328 [(set_attr "type" "neon_ins<q>")]
1329 )
1330
1331 (define_expand "signbit<mode>2"
1332 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1333 (use (match_operand:VDQSF 1 "register_operand"))]
1334 "TARGET_SIMD"
1335 {
1336 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1337 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1338 shift_amount);
1339 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1340
1341 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1342 shift_vector));
1343 DONE;
1344 })
1345
1346 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1347 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1348 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1349 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1350 "TARGET_SIMD"
1351 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1352 [(set_attr "type" "neon_shift_imm<q>")]
1353 )
1354
1355 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1356 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1357 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
1358 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))]
1359 "TARGET_SIMD"
1360 "@
1361 cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1362 sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1363 [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
1364 )
1365
1366 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1367 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1368 (plus:VDQ_I
1369 (SHIFTRT:VDQ_I
1370 (match_operand:VDQ_I 2 "register_operand" "w")
1371 (match_operand:VDQ_I 3 "aarch64_simd_rshift_imm"))
1372 (match_operand:VDQ_I 1 "register_operand" "0")))]
1373 "TARGET_SIMD"
1374 "<sra_op>sra\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1375 [(set_attr "type" "neon_shift_acc<q>")]
1376 )
1377
1378 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1379 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
1380 (plus:VSDQ_I_DI
1381 (truncate:VSDQ_I_DI
1382 (SHIFTRT:<V2XWIDE>
1383 (plus:<V2XWIDE>
1384 (<SHIFTEXTEND>:<V2XWIDE>
1385 (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
1386 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
1387 (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1388 (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
1389 "TARGET_SIMD
1390 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
1391 "<sra_op>rsra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1392 [(set_attr "type" "neon_shift_acc<q>")]
1393 )
1394
1395 (define_expand "aarch64_<sra_op>sra_n<mode>"
1396 [(set (match_operand:VDQ_I 0 "register_operand")
1397 (plus:VDQ_I
1398 (SHIFTRT:VDQ_I
1399 (match_operand:VDQ_I 2 "register_operand")
1400 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1401 (match_operand:VDQ_I 1 "register_operand")))]
1402 "TARGET_SIMD"
1403 {
1404 operands[3]
1405 = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[3]));
1406 }
1407 )
1408
1409 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1410 [(match_operand:VSDQ_I_DI 0 "register_operand")
1411 (match_operand:VSDQ_I_DI 1 "register_operand")
1412 (SHIFTRT:VSDQ_I_DI
1413 (match_operand:VSDQ_I_DI 2 "register_operand")
1414 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1415 "TARGET_SIMD"
1416 {
1417 /* Use this expander to create the rounding constant vector, which is
1418 1 << (shift - 1). Use wide_int here to ensure that the right TImode
1419 RTL is generated when handling the DImode expanders. */
1420 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1421 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
1422 rtx shft = gen_int_mode (INTVAL (operands[3]), DImode);
1423 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1424 if (VECTOR_MODE_P (<MODE>mode))
1425 {
1426 shft = gen_const_vec_duplicate (<MODE>mode, shft);
1427 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1428 }
1429
1430 emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[0], operands[1],
1431 operands[2], shft, rnd));
1432 DONE;
1433 }
1434 )
1435
1436 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1437 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1438 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1439 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1440 "TARGET_SIMD"
1441 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1442 [(set_attr "type" "neon_shift_imm<q>")]
1443 )
1444
1445 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1446 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1447 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1448 (match_operand:VDQ_I 2 "register_operand" "w")))]
1449 "TARGET_SIMD"
1450 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1451 [(set_attr "type" "neon_shift_reg<q>")]
1452 )
1453
1454 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1455 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1456 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1457 (match_operand:VDQ_I 2 "register_operand" "w")]
1458 UNSPEC_ASHIFT_UNSIGNED))]
1459 "TARGET_SIMD"
1460 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1461 [(set_attr "type" "neon_shift_reg<q>")]
1462 )
1463
1464 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1465 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1466 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1467 (match_operand:VDQ_I 2 "register_operand" "w")]
1468 UNSPEC_ASHIFT_SIGNED))]
1469 "TARGET_SIMD"
1470 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1471 [(set_attr "type" "neon_shift_reg<q>")]
1472 )
1473
1474 (define_expand "ashl<mode>3"
1475 [(match_operand:VDQ_I 0 "register_operand")
1476 (match_operand:VDQ_I 1 "register_operand")
1477 (match_operand:SI 2 "general_operand")]
1478 "TARGET_SIMD"
1479 {
1480 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1481 int shift_amount;
1482
1483 if (CONST_INT_P (operands[2]))
1484 {
1485 shift_amount = INTVAL (operands[2]);
1486 if (shift_amount >= 0 && shift_amount < bit_width)
1487 {
1488 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1489 shift_amount);
1490 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1491 operands[1],
1492 tmp));
1493 DONE;
1494 }
1495 }
1496
1497 operands[2] = force_reg (SImode, operands[2]);
1498
1499 rtx tmp = gen_reg_rtx (<MODE>mode);
1500 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1501 operands[2],
1502 0)));
1503 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1504 DONE;
1505 })
1506
1507 (define_expand "lshr<mode>3"
1508 [(match_operand:VDQ_I 0 "register_operand")
1509 (match_operand:VDQ_I 1 "register_operand")
1510 (match_operand:SI 2 "general_operand")]
1511 "TARGET_SIMD"
1512 {
1513 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1514 int shift_amount;
1515
1516 if (CONST_INT_P (operands[2]))
1517 {
1518 shift_amount = INTVAL (operands[2]);
1519 if (shift_amount > 0 && shift_amount <= bit_width)
1520 {
1521 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1522 shift_amount);
1523 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1524 operands[1],
1525 tmp));
1526 DONE;
1527 }
1528 }
1529
1530 operands[2] = force_reg (SImode, operands[2]);
1531
1532 rtx tmp = gen_reg_rtx (SImode);
1533 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1534 emit_insn (gen_negsi2 (tmp, operands[2]));
1535 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1536 convert_to_mode (<VEL>mode, tmp, 0)));
1537 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1538 tmp1));
1539 DONE;
1540 })
1541
1542 (define_expand "ashr<mode>3"
1543 [(match_operand:VDQ_I 0 "register_operand")
1544 (match_operand:VDQ_I 1 "register_operand")
1545 (match_operand:SI 2 "general_operand")]
1546 "TARGET_SIMD"
1547 {
1548 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1549 int shift_amount;
1550
1551 if (CONST_INT_P (operands[2]))
1552 {
1553 shift_amount = INTVAL (operands[2]);
1554 if (shift_amount > 0 && shift_amount <= bit_width)
1555 {
1556 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1557 shift_amount);
1558 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1559 operands[1],
1560 tmp));
1561 DONE;
1562 }
1563 }
1564
1565 operands[2] = force_reg (SImode, operands[2]);
1566
1567 rtx tmp = gen_reg_rtx (SImode);
1568 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1569 emit_insn (gen_negsi2 (tmp, operands[2]));
1570 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1571 tmp, 0)));
1572 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1573 tmp1));
1574 DONE;
1575 })
1576
1577 (define_expand "vashl<mode>3"
1578 [(match_operand:VDQ_I 0 "register_operand")
1579 (match_operand:VDQ_I 1 "register_operand")
1580 (match_operand:VDQ_I 2 "register_operand")]
1581 "TARGET_SIMD"
1582 {
1583 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1584 operands[2]));
1585 DONE;
1586 })
1587
1588 (define_expand "vashr<mode>3"
1589 [(match_operand:VDQ_I 0 "register_operand")
1590 (match_operand:VDQ_I 1 "register_operand")
1591 (match_operand:VDQ_I 2 "register_operand")]
1592 "TARGET_SIMD"
1593 {
1594 rtx neg = gen_reg_rtx (<MODE>mode);
1595 emit (gen_neg<mode>2 (neg, operands[2]));
1596 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1597 neg));
1598 DONE;
1599 })
1600
1601 ;; DI vector shift
1602 (define_expand "aarch64_ashr_simddi"
1603 [(match_operand:DI 0 "register_operand")
1604 (match_operand:DI 1 "register_operand")
1605 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1606 "TARGET_SIMD"
1607 {
1608 /* An arithmetic shift right by 64 fills the result with copies of the sign
1609 bit, just like asr by 63 - however the standard pattern does not handle
1610 a shift by 64. */
1611 if (INTVAL (operands[2]) == 64)
1612 operands[2] = GEN_INT (63);
1613 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1614 DONE;
1615 }
1616 )
1617
1618 (define_expand "vlshr<mode>3"
1619 [(match_operand:VDQ_I 0 "register_operand")
1620 (match_operand:VDQ_I 1 "register_operand")
1621 (match_operand:VDQ_I 2 "register_operand")]
1622 "TARGET_SIMD"
1623 {
1624 rtx neg = gen_reg_rtx (<MODE>mode);
1625 emit (gen_neg<mode>2 (neg, operands[2]));
1626 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1627 neg));
1628 DONE;
1629 })
1630
1631 (define_expand "aarch64_lshr_simddi"
1632 [(match_operand:DI 0 "register_operand")
1633 (match_operand:DI 1 "register_operand")
1634 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1635 "TARGET_SIMD"
1636 {
1637 if (INTVAL (operands[2]) == 64)
1638 emit_move_insn (operands[0], const0_rtx);
1639 else
1640 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1641 DONE;
1642 }
1643 )
1644
1645 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1646 (define_insn "vec_shr_<mode><vczle><vczbe>"
1647 [(set (match_operand:VD 0 "register_operand" "=w")
1648 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1649 (match_operand:SI 2 "immediate_operand" "i")]
1650 UNSPEC_VEC_SHR))]
1651 "TARGET_SIMD"
1652 {
1653 if (BYTES_BIG_ENDIAN)
1654 return "shl %d0, %d1, %2";
1655 else
1656 return "ushr %d0, %d1, %2";
1657 }
1658 [(set_attr "type" "neon_shift_imm")]
1659 )
1660
1661 (define_expand "vec_set<mode>"
1662 [(match_operand:VALL_F16 0 "register_operand")
1663 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1664 (match_operand:SI 2 "immediate_operand")]
1665 "TARGET_SIMD"
1666 {
1667 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1668 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1669 GEN_INT (elem), operands[0]));
1670 DONE;
1671 }
1672 )
1673
1674
1675 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1676 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1677 (plus:VDQ_BHSI (mult:VDQ_BHSI
1678 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1679 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1680 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1681 "TARGET_SIMD"
1682 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1683 [(set_attr "type" "neon_mla_<Vetype><q>")]
1684 )
1685
1686 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1687 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1688 (plus:VDQHS
1689 (mult:VDQHS
1690 (vec_duplicate:VDQHS
1691 (vec_select:<VEL>
1692 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1693 (parallel [(match_operand:SI 2 "immediate_operand")])))
1694 (match_operand:VDQHS 3 "register_operand" "w"))
1695 (match_operand:VDQHS 4 "register_operand" "0")))]
1696 "TARGET_SIMD"
1697 {
1698 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1699 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1700 }
1701 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1702 )
1703
1704 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1705 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1706 (plus:VDQHS
1707 (mult:VDQHS
1708 (vec_duplicate:VDQHS
1709 (vec_select:<VEL>
1710 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1711 (parallel [(match_operand:SI 2 "immediate_operand")])))
1712 (match_operand:VDQHS 3 "register_operand" "w"))
1713 (match_operand:VDQHS 4 "register_operand" "0")))]
1714 "TARGET_SIMD"
1715 {
1716 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1717 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1718 }
1719 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1720 )
1721
1722 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1723 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1724 (plus:VDQHS
1725 (mult:VDQHS
1726 (vec_duplicate:VDQHS
1727 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1728 (match_operand:VDQHS 2 "register_operand" "w"))
1729 (match_operand:VDQHS 1 "register_operand" "0")))]
1730 "TARGET_SIMD"
1731 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1732 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1733 )
1734
1735 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1736 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1737 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1738 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1739 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1740 "TARGET_SIMD"
1741 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1742 [(set_attr "type" "neon_mla_<Vetype><q>")]
1743 )
1744
1745 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1746 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1747 (minus:VDQHS
1748 (match_operand:VDQHS 4 "register_operand" "0")
1749 (mult:VDQHS
1750 (vec_duplicate:VDQHS
1751 (vec_select:<VEL>
1752 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1753 (parallel [(match_operand:SI 2 "immediate_operand")])))
1754 (match_operand:VDQHS 3 "register_operand" "w"))))]
1755 "TARGET_SIMD"
1756 {
1757 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1758 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1759 }
1760 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1761 )
1762
1763 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1764 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1765 (minus:VDQHS
1766 (match_operand:VDQHS 4 "register_operand" "0")
1767 (mult:VDQHS
1768 (vec_duplicate:VDQHS
1769 (vec_select:<VEL>
1770 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1771 (parallel [(match_operand:SI 2 "immediate_operand")])))
1772 (match_operand:VDQHS 3 "register_operand" "w"))))]
1773 "TARGET_SIMD"
1774 {
1775 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1776 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1777 }
1778 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1779 )
1780
1781 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1782 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1783 (minus:VDQHS
1784 (match_operand:VDQHS 1 "register_operand" "0")
1785 (mult:VDQHS
1786 (vec_duplicate:VDQHS
1787 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1788 (match_operand:VDQHS 2 "register_operand" "w"))))]
1789 "TARGET_SIMD"
1790 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1791 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1792 )
1793
1794 ;; Max/Min operations.
1795 (define_insn "<su><maxmin><mode>3<vczle><vczbe>"
1796 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1797 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1798 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1799 "TARGET_SIMD"
1800 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1801 [(set_attr "type" "neon_minmax<q>")]
1802 )
1803
1804 (define_expand "<su><maxmin>v2di3"
1805 [(set (match_operand:V2DI 0 "register_operand")
1806 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1807 (match_operand:V2DI 2 "register_operand")))]
1808 "TARGET_SIMD"
1809 {
1810 enum rtx_code cmp_operator;
1811 rtx cmp_fmt;
1812
1813 switch (<CODE>)
1814 {
1815 case UMIN:
1816 cmp_operator = LTU;
1817 break;
1818 case SMIN:
1819 cmp_operator = LT;
1820 break;
1821 case UMAX:
1822 cmp_operator = GTU;
1823 break;
1824 case SMAX:
1825 cmp_operator = GT;
1826 break;
1827 default:
1828 gcc_unreachable ();
1829 }
1830
1831 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1832 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1833 operands[2], cmp_fmt, operands[1], operands[2]));
1834 DONE;
1835 })
1836
1837 ;; Pairwise Integer Max/Min operations.
1838 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1839 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1840 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1841 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1842 MAXMINV))]
1843 "TARGET_SIMD"
1844 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1845 [(set_attr "type" "neon_minmax<q>")]
1846 )
1847
1848 ;; Pairwise FP Max/Min operations.
1849 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1850 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1851 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1852 (match_operand:VHSDF 2 "register_operand" "w")]
1853 FMAXMINV))]
1854 "TARGET_SIMD"
1855 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1856 [(set_attr "type" "neon_minmax<q>")]
1857 )
1858
1859 ;; vec_concat gives a new vector with the low elements from operand 1, and
1860 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1861 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1862 ;; What that means, is that the RTL descriptions of the below patterns
1863 ;; need to change depending on endianness.
1864
1865 ;; Narrowing operations.
1866
1867 (define_insn "aarch64_xtn2<mode>_insn_le"
1868 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1869 (vec_concat:<VNARROWQ2>
1870 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1871 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1872 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1873 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1874 [(set_attr "type" "neon_move_narrow_q")]
1875 )
1876
1877 (define_insn "aarch64_xtn2<mode>_insn_be"
1878 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1879 (vec_concat:<VNARROWQ2>
1880 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1881 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1882 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1883 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1884 [(set_attr "type" "neon_move_narrow_q")]
1885 )
1886
1887 (define_expand "aarch64_xtn2<mode>"
1888 [(match_operand:<VNARROWQ2> 0 "register_operand")
1889 (match_operand:<VNARROWQ> 1 "register_operand")
1890 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1891 "TARGET_SIMD"
1892 {
1893 if (BYTES_BIG_ENDIAN)
1894 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1895 operands[2]));
1896 else
1897 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1898 operands[2]));
1899 DONE;
1900 }
1901 )
1902
1903 (define_insn "*aarch64_narrow_trunc<mode>"
1904 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1905 (vec_concat:<VNARROWQ2>
1906 (truncate:<VNARROWQ>
1907 (match_operand:VQN 1 "register_operand" "w"))
1908 (truncate:<VNARROWQ>
1909 (match_operand:VQN 2 "register_operand" "w"))))]
1910 "TARGET_SIMD"
1911 {
1912 if (!BYTES_BIG_ENDIAN)
1913 return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1914 else
1915 return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1916 }
1917 [(set_attr "type" "neon_permute<q>")]
1918 )
1919
1920 ;; Packing doubles.
1921
1922 (define_expand "vec_pack_trunc_<mode>"
1923 [(match_operand:<VNARROWD> 0 "register_operand")
1924 (match_operand:VDN 1 "general_operand")
1925 (match_operand:VDN 2 "general_operand")]
1926 "TARGET_SIMD"
1927 {
1928 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1929 emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
1930 emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1931 DONE;
1932 })
1933
1934 ;; Packing quads.
1935
1936 (define_expand "vec_pack_trunc_<mode>"
1937 [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1938 (vec_concat:<VNARROWQ2>
1939 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1940 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1941 "TARGET_SIMD"
1942 {
1943 rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1944 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1945 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1946
1947 emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1948
1949 if (BYTES_BIG_ENDIAN)
1950 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1951 operands[hi]));
1952 else
1953 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1954 operands[hi]));
1955 DONE;
1956 }
1957 )
1958
1959 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1960 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1961 (vec_concat:<VNARROWQ2>
1962 (truncate:<VNARROWQ>
1963 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1964 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1965 (truncate:<VNARROWQ>
1966 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1967 (match_dup 2)))))]
1968 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1969 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1970 [(set_attr "type" "neon_permute<q>")]
1971 )
1972
1973 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1974 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1975 (vec_concat:<VNARROWQ2>
1976 (truncate:<VNARROWQ>
1977 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1978 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1979 (truncate:<VNARROWQ>
1980 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1981 (match_dup 2)))))]
1982 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1983 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1984 [(set_attr "type" "neon_permute<q>")]
1985 )
1986
1987 ;; Widening operations.
1988
1989 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1990 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1991 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1992 (match_operand:VQW 1 "register_operand" "w")
1993 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1994 )))]
1995 "TARGET_SIMD"
1996 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1997 [(set_attr "type" "neon_shift_imm_long")]
1998 )
1999
2000 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
2001 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2002 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2003 (match_operand:VQW 1 "register_operand" "w")
2004 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
2005 )))]
2006 "TARGET_SIMD"
2007 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
2008 [(set_attr "type" "neon_shift_imm_long")]
2009 )
2010
2011 (define_expand "vec_unpack<su>_hi_<mode>"
2012 [(match_operand:<VWIDE> 0 "register_operand")
2013 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2014 "TARGET_SIMD"
2015 {
2016 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2017 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
2018 operands[1], p));
2019 DONE;
2020 }
2021 )
2022
2023 (define_expand "vec_unpack<su>_lo_<mode>"
2024 [(match_operand:<VWIDE> 0 "register_operand")
2025 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2026 "TARGET_SIMD"
2027 {
2028 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2029 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
2030 operands[1], p));
2031 DONE;
2032 }
2033 )
2034
2035 ;; Widening arithmetic.
2036
2037 (define_insn "*aarch64_<su>mlal_lo<mode>"
2038 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2039 (plus:<VWIDE>
2040 (mult:<VWIDE>
2041 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2042 (match_operand:VQW 2 "register_operand" "w")
2043 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2044 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2045 (match_operand:VQW 4 "register_operand" "w")
2046 (match_dup 3))))
2047 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2048 "TARGET_SIMD"
2049 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2050 [(set_attr "type" "neon_mla_<Vetype>_long")]
2051 )
2052
2053 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2054 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2055 (plus:<VWIDE>
2056 (mult:<VWIDE>
2057 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2058 (match_operand:VQW 2 "register_operand" "w")
2059 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2060 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2061 (match_operand:VQW 4 "register_operand" "w")
2062 (match_dup 3))))
2063 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2064 "TARGET_SIMD"
2065 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2066 [(set_attr "type" "neon_mla_<Vetype>_long")]
2067 )
2068
2069 (define_expand "aarch64_<su>mlal_hi<mode>"
2070 [(match_operand:<VWIDE> 0 "register_operand")
2071 (match_operand:<VWIDE> 1 "register_operand")
2072 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2073 (match_operand:VQW 3 "register_operand")]
2074 "TARGET_SIMD"
2075 {
2076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2077 emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2078 operands[2], p, operands[3]));
2079 DONE;
2080 }
2081 )
2082
2083 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2084 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2085 (plus:<VWIDE>
2086 (mult:<VWIDE>
2087 (ANY_EXTEND:<VWIDE>
2088 (vec_select:<VHALF>
2089 (match_operand:VQ_HSI 2 "register_operand" "w")
2090 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2091 (vec_duplicate:<VWIDE>
2092 (ANY_EXTEND:<VWIDE_S>
2093 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2094 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2095 "TARGET_SIMD"
2096 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2097 [(set_attr "type" "neon_mla_<Vetype>_long")]
2098 )
2099
2100 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2101 [(match_operand:<VWIDE> 0 "register_operand")
2102 (match_operand:<VWIDE> 1 "register_operand")
2103 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2104 (match_operand:<VEL> 3 "register_operand")]
2105 "TARGET_SIMD"
2106 {
2107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108 emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2109 operands[1], operands[2], p, operands[3]));
2110 DONE;
2111 }
2112 )
2113
2114 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2115 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2116 (minus:<VWIDE>
2117 (match_operand:<VWIDE> 1 "register_operand" "0")
2118 (mult:<VWIDE>
2119 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2120 (match_operand:VQW 2 "register_operand" "w")
2121 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2122 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2123 (match_operand:VQW 4 "register_operand" "w")
2124 (match_dup 3))))))]
2125 "TARGET_SIMD"
2126 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2127 [(set_attr "type" "neon_mla_<Vetype>_long")]
2128 )
2129
2130 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2131 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2132 (minus:<VWIDE>
2133 (match_operand:<VWIDE> 1 "register_operand" "0")
2134 (mult:<VWIDE>
2135 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2136 (match_operand:VQW 2 "register_operand" "w")
2137 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2138 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2139 (match_operand:VQW 4 "register_operand" "w")
2140 (match_dup 3))))))]
2141 "TARGET_SIMD"
2142 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2143 [(set_attr "type" "neon_mla_<Vetype>_long")]
2144 )
2145
2146 (define_expand "aarch64_<su>mlsl_hi<mode>"
2147 [(match_operand:<VWIDE> 0 "register_operand")
2148 (match_operand:<VWIDE> 1 "register_operand")
2149 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2150 (match_operand:VQW 3 "register_operand")]
2151 "TARGET_SIMD"
2152 {
2153 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2154 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2155 operands[2], p, operands[3]));
2156 DONE;
2157 }
2158 )
2159
2160 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2161 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2162 (minus:<VWIDE>
2163 (match_operand:<VWIDE> 1 "register_operand" "0")
2164 (mult:<VWIDE>
2165 (ANY_EXTEND:<VWIDE>
2166 (vec_select:<VHALF>
2167 (match_operand:VQ_HSI 2 "register_operand" "w")
2168 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2169 (vec_duplicate:<VWIDE>
2170 (ANY_EXTEND:<VWIDE_S>
2171 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2172 "TARGET_SIMD"
2173 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2174 [(set_attr "type" "neon_mla_<Vetype>_long")]
2175 )
2176
2177 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2178 [(match_operand:<VWIDE> 0 "register_operand")
2179 (match_operand:<VWIDE> 1 "register_operand")
2180 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2181 (match_operand:<VEL> 3 "register_operand")]
2182 "TARGET_SIMD"
2183 {
2184 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2185 emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2186 operands[1], operands[2], p, operands[3]));
2187 DONE;
2188 }
2189 )
2190
2191 (define_insn "aarch64_<su>mlal<mode>"
2192 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2193 (plus:<VWIDE>
2194 (mult:<VWIDE>
2195 (ANY_EXTEND:<VWIDE>
2196 (match_operand:VD_BHSI 2 "register_operand" "w"))
2197 (ANY_EXTEND:<VWIDE>
2198 (match_operand:VD_BHSI 3 "register_operand" "w")))
2199 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2200 "TARGET_SIMD"
2201 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2202 [(set_attr "type" "neon_mla_<Vetype>_long")]
2203 )
2204
2205 (define_insn "aarch64_<su>mlal_n<mode>"
2206 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2207 (plus:<VWIDE>
2208 (mult:<VWIDE>
2209 (ANY_EXTEND:<VWIDE>
2210 (match_operand:VD_HSI 2 "register_operand" "w"))
2211 (vec_duplicate:<VWIDE>
2212 (ANY_EXTEND:<VWIDE_S>
2213 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2214 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2215 "TARGET_SIMD"
2216 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2217 [(set_attr "type" "neon_mla_<Vetype>_long")]
2218 )
2219
2220 (define_insn "aarch64_<su>mlsl<mode>"
2221 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2222 (minus:<VWIDE>
2223 (match_operand:<VWIDE> 1 "register_operand" "0")
2224 (mult:<VWIDE>
2225 (ANY_EXTEND:<VWIDE>
2226 (match_operand:VD_BHSI 2 "register_operand" "w"))
2227 (ANY_EXTEND:<VWIDE>
2228 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2229 "TARGET_SIMD"
2230 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2231 [(set_attr "type" "neon_mla_<Vetype>_long")]
2232 )
2233
2234 (define_insn "aarch64_<su>mlsl_n<mode>"
2235 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2236 (minus:<VWIDE>
2237 (match_operand:<VWIDE> 1 "register_operand" "0")
2238 (mult:<VWIDE>
2239 (ANY_EXTEND:<VWIDE>
2240 (match_operand:VD_HSI 2 "register_operand" "w"))
2241 (vec_duplicate:<VWIDE>
2242 (ANY_EXTEND:<VWIDE_S>
2243 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2244 "TARGET_SIMD"
2245 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2246 [(set_attr "type" "neon_mla_<Vetype>_long")]
2247 )
2248
2249 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2250 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2251 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2252 (match_operand:VQW 1 "register_operand" "w")
2253 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2254 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2255 (match_operand:VQW 2 "register_operand" "w")
2256 (match_dup 3)))))]
2257 "TARGET_SIMD"
2258 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2259 [(set_attr "type" "neon_mul_<Vetype>_long")]
2260 )
2261
2262 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2263 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2264 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2265 (match_operand:VD_BHSI 1 "register_operand" "w"))
2266 (ANY_EXTEND:<VWIDE>
2267 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2268 "TARGET_SIMD"
2269 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2270 [(set_attr "type" "neon_mul_<Vetype>_long")]
2271 )
2272
2273 (define_expand "vec_widen_<su>mult_lo_<mode>"
2274 [(match_operand:<VWIDE> 0 "register_operand")
2275 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2276 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2277 "TARGET_SIMD"
2278 {
2279 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2280 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2281 operands[1],
2282 operands[2], p));
2283 DONE;
2284 }
2285 )
2286
2287 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2289 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2290 (match_operand:VQW 1 "register_operand" "w")
2291 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2292 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2293 (match_operand:VQW 2 "register_operand" "w")
2294 (match_dup 3)))))]
2295 "TARGET_SIMD"
2296 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2297 [(set_attr "type" "neon_mul_<Vetype>_long")]
2298 )
2299
2300 (define_expand "vec_widen_<su>mult_hi_<mode>"
2301 [(match_operand:<VWIDE> 0 "register_operand")
2302 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2303 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2304 "TARGET_SIMD"
2305 {
2306 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2307 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2308 operands[1],
2309 operands[2], p));
2310 DONE;
2311
2312 }
2313 )
2314
2315 ;; vmull_lane_s16 intrinsics
2316 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2318 (mult:<VWIDE>
2319 (ANY_EXTEND:<VWIDE>
2320 (match_operand:<VCOND> 1 "register_operand" "w"))
2321 (vec_duplicate:<VWIDE>
2322 (ANY_EXTEND:<VWIDE_S>
2323 (vec_select:<VEL>
2324 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2325 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2326 "TARGET_SIMD"
2327 {
2328 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2329 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2330 }
2331 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2332 )
2333
2334 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2335 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2336 (mult:<VWIDE>
2337 (ANY_EXTEND:<VWIDE>
2338 (vec_select:<VHALF>
2339 (match_operand:VQ_HSI 1 "register_operand" "w")
2340 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2341 (vec_duplicate:<VWIDE>
2342 (ANY_EXTEND:<VWIDE_S>
2343 (vec_select:<VEL>
2344 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2345 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2346 "TARGET_SIMD"
2347 {
2348 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2349 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2350 }
2351 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2352 )
2353
2354 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2355 [(match_operand:<VWIDE> 0 "register_operand")
2356 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2357 (match_operand:<VCOND> 2 "register_operand")
2358 (match_operand:SI 3 "immediate_operand")]
2359 "TARGET_SIMD"
2360 {
2361 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2362 emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2363 operands[1], p, operands[2], operands[3]));
2364 DONE;
2365 }
2366 )
2367
2368 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2369 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2370 (mult:<VWIDE>
2371 (ANY_EXTEND:<VWIDE>
2372 (vec_select:<VHALF>
2373 (match_operand:VQ_HSI 1 "register_operand" "w")
2374 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2375 (vec_duplicate:<VWIDE>
2376 (ANY_EXTEND:<VWIDE_S>
2377 (vec_select:<VEL>
2378 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2379 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2380 "TARGET_SIMD"
2381 {
2382 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2383 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2384 }
2385 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2386 )
2387
2388 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2389 [(match_operand:<VWIDE> 0 "register_operand")
2390 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2391 (match_operand:<VCONQ> 2 "register_operand")
2392 (match_operand:SI 3 "immediate_operand")]
2393 "TARGET_SIMD"
2394 {
2395 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2396 emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2397 operands[1], p, operands[2], operands[3]));
2398 DONE;
2399 }
2400 )
2401
2402 (define_insn "aarch64_<su>mull_n<mode>"
2403 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2404 (mult:<VWIDE>
2405 (ANY_EXTEND:<VWIDE>
2406 (match_operand:VD_HSI 1 "register_operand" "w"))
2407 (vec_duplicate:<VWIDE>
2408 (ANY_EXTEND:<VWIDE_S>
2409 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2410 "TARGET_SIMD"
2411 "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2412 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2413 )
2414
2415 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2416 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2417 (mult:<VWIDE>
2418 (ANY_EXTEND:<VWIDE>
2419 (vec_select:<VHALF>
2420 (match_operand:VQ_HSI 1 "register_operand" "w")
2421 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2422 (vec_duplicate:<VWIDE>
2423 (ANY_EXTEND:<VWIDE_S>
2424 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2425 "TARGET_SIMD"
2426 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2427 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2428 )
2429
2430 (define_expand "aarch64_<su>mull_hi_n<mode>"
2431 [(match_operand:<VWIDE> 0 "register_operand")
2432 (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2433 (match_operand:<VEL> 2 "register_operand")]
2434 "TARGET_SIMD"
2435 {
2436 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2437 emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2438 operands[2], p));
2439 DONE;
2440 }
2441 )
2442
2443 ;; vmlal_lane_s16 intrinsics
2444 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2445 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2446 (plus:<VWIDE>
2447 (mult:<VWIDE>
2448 (ANY_EXTEND:<VWIDE>
2449 (match_operand:<VCOND> 2 "register_operand" "w"))
2450 (vec_duplicate:<VWIDE>
2451 (ANY_EXTEND:<VWIDE_S>
2452 (vec_select:<VEL>
2453 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2454 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2455 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2456 "TARGET_SIMD"
2457 {
2458 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2459 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2460 }
2461 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2462 )
2463
2464 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2465 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2466 (plus:<VWIDE>
2467 (mult:<VWIDE>
2468 (ANY_EXTEND:<VWIDE>
2469 (vec_select:<VHALF>
2470 (match_operand:VQ_HSI 2 "register_operand" "w")
2471 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2472 (vec_duplicate:<VWIDE>
2473 (ANY_EXTEND:<VWIDE_S>
2474 (vec_select:<VEL>
2475 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2476 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2477 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2478 "TARGET_SIMD"
2479 {
2480 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2481 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2482 }
2483 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2484 )
2485
2486 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2487 [(match_operand:<VWIDE> 0 "register_operand")
2488 (match_operand:<VWIDE> 1 "register_operand")
2489 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2490 (match_operand:<VCOND> 3 "register_operand")
2491 (match_operand:SI 4 "immediate_operand")]
2492 "TARGET_SIMD"
2493 {
2494 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2495 emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2496 operands[1], operands[2], p, operands[3], operands[4]));
2497 DONE;
2498 }
2499 )
2500
2501 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2502 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2503 (plus:<VWIDE>
2504 (mult:<VWIDE>
2505 (ANY_EXTEND:<VWIDE>
2506 (vec_select:<VHALF>
2507 (match_operand:VQ_HSI 2 "register_operand" "w")
2508 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2509 (vec_duplicate:<VWIDE>
2510 (ANY_EXTEND:<VWIDE_S>
2511 (vec_select:<VEL>
2512 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2513 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2514 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2515 "TARGET_SIMD"
2516 {
2517 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2518 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2519 }
2520 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2521 )
2522
2523 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2524 [(match_operand:<VWIDE> 0 "register_operand")
2525 (match_operand:<VWIDE> 1 "register_operand")
2526 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2527 (match_operand:<VCONQ> 3 "register_operand")
2528 (match_operand:SI 4 "immediate_operand")]
2529 "TARGET_SIMD"
2530 {
2531 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2532 emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2533 operands[1], operands[2], p, operands[3], operands[4]));
2534 DONE;
2535 }
2536 )
2537
2538 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2539 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2540 (minus:<VWIDE>
2541 (match_operand:<VWIDE> 1 "register_operand" "0")
2542 (mult:<VWIDE>
2543 (ANY_EXTEND:<VWIDE>
2544 (match_operand:<VCOND> 2 "register_operand" "w"))
2545 (vec_duplicate:<VWIDE>
2546 (ANY_EXTEND:<VWIDE_S>
2547 (vec_select:<VEL>
2548 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2549 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2550 "TARGET_SIMD"
2551 {
2552 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2553 return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2554 }
2555 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2556 )
2557
2558 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2559 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2560 (minus:<VWIDE>
2561 (match_operand:<VWIDE> 1 "register_operand" "0")
2562 (mult:<VWIDE>
2563 (ANY_EXTEND:<VWIDE>
2564 (vec_select:<VHALF>
2565 (match_operand:VQ_HSI 2 "register_operand" "w")
2566 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2567 (vec_duplicate:<VWIDE>
2568 (ANY_EXTEND:<VWIDE_S>
2569 (vec_select:<VEL>
2570 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2571 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2572 )))]
2573 "TARGET_SIMD"
2574 {
2575 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2576 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2577 }
2578 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2579 )
2580
2581 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2582 [(match_operand:<VWIDE> 0 "register_operand")
2583 (match_operand:<VWIDE> 1 "register_operand")
2584 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2585 (match_operand:<VCOND> 3 "register_operand")
2586 (match_operand:SI 4 "immediate_operand")]
2587 "TARGET_SIMD"
2588 {
2589 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2590 emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2591 operands[1], operands[2], p, operands[3], operands[4]));
2592 DONE;
2593 }
2594 )
2595
2596 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2598 (minus:<VWIDE>
2599 (match_operand:<VWIDE> 1 "register_operand" "0")
2600 (mult:<VWIDE>
2601 (ANY_EXTEND:<VWIDE>
2602 (vec_select:<VHALF>
2603 (match_operand:VQ_HSI 2 "register_operand" "w")
2604 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2605 (vec_duplicate:<VWIDE>
2606 (ANY_EXTEND:<VWIDE_S>
2607 (vec_select:<VEL>
2608 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2609 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2610 )))]
2611 "TARGET_SIMD"
2612 {
2613 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2614 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2615 }
2616 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2617 )
2618
2619 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2620 [(match_operand:<VWIDE> 0 "register_operand")
2621 (match_operand:<VWIDE> 1 "register_operand")
2622 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2623 (match_operand:<VCONQ> 3 "register_operand")
2624 (match_operand:SI 4 "immediate_operand")]
2625 "TARGET_SIMD"
2626 {
2627 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2628 emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2629 operands[1], operands[2], p, operands[3], operands[4]));
2630 DONE;
2631 }
2632 )
2633
2634 ;; FP vector operations.
2635 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2636 ;; double-precision (64-bit) floating-point data types and arithmetic as
2637 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
2638 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2639 ;;
2640 ;; Floating-point operations can raise an exception. Vectorizing such
2641 ;; operations are safe because of reasons explained below.
2642 ;;
2643 ;; ARMv8 permits an extension to enable trapped floating-point
2644 ;; exception handling, however this is an optional feature. In the
2645 ;; event of a floating-point exception being raised by vectorised
2646 ;; code then:
2647 ;; 1. If trapped floating-point exceptions are available, then a trap
2648 ;; will be taken when any lane raises an enabled exception. A trap
2649 ;; handler may determine which lane raised the exception.
2650 ;; 2. Alternatively a sticky exception flag is set in the
2651 ;; floating-point status register (FPSR). Software may explicitly
2652 ;; test the exception flags, in which case the tests will either
2653 ;; prevent vectorisation, allowing precise identification of the
2654 ;; failing operation, or if tested outside of vectorisable regions
2655 ;; then the specific operation and lane are not of interest.
2656
2657 ;; FP arithmetic operations.
2658
2659 (define_insn "add<mode>3<vczle><vczbe>"
2660 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2661 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2662 (match_operand:VHSDF 2 "register_operand" "w")))]
2663 "TARGET_SIMD"
2664 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2665 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2666 )
2667
2668 (define_insn "sub<mode>3<vczle><vczbe>"
2669 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2670 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2671 (match_operand:VHSDF 2 "register_operand" "w")))]
2672 "TARGET_SIMD"
2673 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2674 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2675 )
2676
2677 (define_insn "mul<mode>3<vczle><vczbe>"
2678 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2679 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2680 (match_operand:VHSDF 2 "register_operand" "w")))]
2681 "TARGET_SIMD"
2682 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2683 [(set_attr "type" "neon_fp_mul_<stype><q>")]
2684 )
2685
2686 (define_expand "div<mode>3"
2687 [(set (match_operand:VHSDF 0 "register_operand")
2688 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2689 (match_operand:VHSDF 2 "register_operand")))]
2690 "TARGET_SIMD"
2691 {
2692 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2693 DONE;
2694
2695 operands[1] = force_reg (<MODE>mode, operands[1]);
2696 })
2697
2698 (define_insn "*div<mode>3<vczle><vczbe>"
2699 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2700 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2701 (match_operand:VHSDF 2 "register_operand" "w")))]
2702 "TARGET_SIMD"
2703 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2704 [(set_attr "type" "neon_fp_div_<stype><q>")]
2705 )
2706
2707 ;; SVE has vector integer divisions, unlike Advanced SIMD.
2708 ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
2709 ;; optabs to the midend.
2710 (define_expand "<su_optab>div<mode>3"
2711 [(set (match_operand:VQDIV 0 "register_operand")
2712 (ANY_DIV:VQDIV
2713 (match_operand:VQDIV 1 "register_operand")
2714 (match_operand:VQDIV 2 "register_operand")))]
2715 "TARGET_SVE"
2716 {
2717 machine_mode sve_mode
2718 = aarch64_full_sve_mode (GET_MODE_INNER (<MODE>mode)).require ();
2719 rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], <MODE>mode, 0);
2720 rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], <MODE>mode, 0);
2721 rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], <MODE>mode, 0);
2722
2723 emit_insn (gen_<su_optab>div<vnx>3 (sve_op0, sve_op1, sve_op2));
2724 DONE;
2725 }
2726 )
2727
2728 (define_insn "neg<mode>2<vczle><vczbe>"
2729 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2730 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2731 "TARGET_SIMD"
2732 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2733 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2734 )
2735
2736 (define_insn "abs<mode>2<vczle><vczbe>"
2737 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2738 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2739 "TARGET_SIMD"
2740 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2741 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2742 )
2743
2744 (define_expand "aarch64_float_mla<mode>"
2745 [(set (match_operand:VDQF_DF 0 "register_operand")
2746 (plus:VDQF_DF
2747 (mult:VDQF_DF
2748 (match_operand:VDQF_DF 2 "register_operand")
2749 (match_operand:VDQF_DF 3 "register_operand"))
2750 (match_operand:VDQF_DF 1 "register_operand")))]
2751 "TARGET_SIMD"
2752 {
2753 rtx scratch = gen_reg_rtx (<MODE>mode);
2754 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2755 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2756 DONE;
2757 }
2758 )
2759
2760 (define_expand "aarch64_float_mls<mode>"
2761 [(set (match_operand:VDQF_DF 0 "register_operand")
2762 (minus:VDQF_DF
2763 (match_operand:VDQF_DF 1 "register_operand")
2764 (mult:VDQF_DF
2765 (match_operand:VDQF_DF 2 "register_operand")
2766 (match_operand:VDQF_DF 3 "register_operand"))))]
2767 "TARGET_SIMD"
2768 {
2769 rtx scratch = gen_reg_rtx (<MODE>mode);
2770 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2771 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2772 DONE;
2773 }
2774 )
2775
2776 (define_expand "aarch64_float_mla_n<mode>"
2777 [(set (match_operand:VDQSF 0 "register_operand")
2778 (plus:VDQSF
2779 (mult:VDQSF
2780 (vec_duplicate:VDQSF
2781 (match_operand:<VEL> 3 "register_operand"))
2782 (match_operand:VDQSF 2 "register_operand"))
2783 (match_operand:VDQSF 1 "register_operand")))]
2784 "TARGET_SIMD"
2785 {
2786 rtx scratch = gen_reg_rtx (<MODE>mode);
2787 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2788 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2789 DONE;
2790 }
2791 )
2792
2793 (define_expand "aarch64_float_mls_n<mode>"
2794 [(set (match_operand:VDQSF 0 "register_operand")
2795 (minus:VDQSF
2796 (match_operand:VDQSF 1 "register_operand")
2797 (mult:VDQSF
2798 (vec_duplicate:VDQSF
2799 (match_operand:<VEL> 3 "register_operand"))
2800 (match_operand:VDQSF 2 "register_operand"))))]
2801 "TARGET_SIMD"
2802 {
2803 rtx scratch = gen_reg_rtx (<MODE>mode);
2804 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2805 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2806 DONE;
2807 }
2808 )
2809
2810 (define_expand "aarch64_float_mla_lane<mode>"
2811 [(set (match_operand:VDQSF 0 "register_operand")
2812 (plus:VDQSF
2813 (mult:VDQSF
2814 (vec_duplicate:VDQSF
2815 (vec_select:<VEL>
2816 (match_operand:V2SF 3 "register_operand")
2817 (parallel [(match_operand:SI 4 "immediate_operand")])))
2818 (match_operand:VDQSF 2 "register_operand"))
2819 (match_operand:VDQSF 1 "register_operand")))]
2820 "TARGET_SIMD"
2821 {
2822 rtx scratch = gen_reg_rtx (<MODE>mode);
2823 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2824 operands[3], operands[4]));
2825 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2826 DONE;
2827 }
2828 )
2829
2830 (define_expand "aarch64_float_mls_lane<mode>"
2831 [(set (match_operand:VDQSF 0 "register_operand")
2832 (minus:VDQSF
2833 (match_operand:VDQSF 1 "register_operand")
2834 (mult:VDQSF
2835 (vec_duplicate:VDQSF
2836 (vec_select:<VEL>
2837 (match_operand:V2SF 3 "register_operand")
2838 (parallel [(match_operand:SI 4 "immediate_operand")])))
2839 (match_operand:VDQSF 2 "register_operand"))))]
2840 "TARGET_SIMD"
2841 {
2842 rtx scratch = gen_reg_rtx (<MODE>mode);
2843 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2844 operands[3], operands[4]));
2845 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2846 DONE;
2847 }
2848 )
2849
2850 (define_expand "aarch64_float_mla_laneq<mode>"
2851 [(set (match_operand:VDQSF 0 "register_operand")
2852 (plus:VDQSF
2853 (mult:VDQSF
2854 (vec_duplicate:VDQSF
2855 (vec_select:<VEL>
2856 (match_operand:V4SF 3 "register_operand")
2857 (parallel [(match_operand:SI 4 "immediate_operand")])))
2858 (match_operand:VDQSF 2 "register_operand"))
2859 (match_operand:VDQSF 1 "register_operand")))]
2860 "TARGET_SIMD"
2861 {
2862 rtx scratch = gen_reg_rtx (<MODE>mode);
2863 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2864 operands[3], operands[4]));
2865 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2866 DONE;
2867 }
2868 )
2869
2870 (define_expand "aarch64_float_mls_laneq<mode>"
2871 [(set (match_operand:VDQSF 0 "register_operand")
2872 (minus:VDQSF
2873 (match_operand:VDQSF 1 "register_operand")
2874 (mult:VDQSF
2875 (vec_duplicate:VDQSF
2876 (vec_select:<VEL>
2877 (match_operand:V4SF 3 "register_operand")
2878 (parallel [(match_operand:SI 4 "immediate_operand")])))
2879 (match_operand:VDQSF 2 "register_operand"))))]
2880 "TARGET_SIMD"
2881 {
2882 rtx scratch = gen_reg_rtx (<MODE>mode);
2883 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2884 operands[3], operands[4]));
2885 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2886 DONE;
2887 }
2888 )
2889
2890 (define_insn "fma<mode>4<vczle><vczbe>"
2891 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2892 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2893 (match_operand:VHSDF 2 "register_operand" "w")
2894 (match_operand:VHSDF 3 "register_operand" "0")))]
2895 "TARGET_SIMD"
2896 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2897 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2898 )
2899
2900 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2901 [(set (match_operand:VDQF 0 "register_operand" "=w")
2902 (fma:VDQF
2903 (vec_duplicate:VDQF
2904 (vec_select:<VEL>
2905 (match_operand:VDQF 1 "register_operand" "<h_con>")
2906 (parallel [(match_operand:SI 2 "immediate_operand")])))
2907 (match_operand:VDQF 3 "register_operand" "w")
2908 (match_operand:VDQF 4 "register_operand" "0")))]
2909 "TARGET_SIMD"
2910 {
2911 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2912 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2913 }
2914 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2915 )
2916
2917 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2918 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2919 (fma:VDQSF
2920 (vec_duplicate:VDQSF
2921 (vec_select:<VEL>
2922 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2923 (parallel [(match_operand:SI 2 "immediate_operand")])))
2924 (match_operand:VDQSF 3 "register_operand" "w")
2925 (match_operand:VDQSF 4 "register_operand" "0")))]
2926 "TARGET_SIMD"
2927 {
2928 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2929 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2930 }
2931 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2932 )
2933
2934 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2935 [(set (match_operand:VMUL 0 "register_operand" "=w")
2936 (fma:VMUL
2937 (vec_duplicate:VMUL
2938 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2939 (match_operand:VMUL 2 "register_operand" "w")
2940 (match_operand:VMUL 3 "register_operand" "0")))]
2941 "TARGET_SIMD"
2942 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2943 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2944 )
2945
2946 (define_insn "*aarch64_fma4_elt_to_64v2df"
2947 [(set (match_operand:DF 0 "register_operand" "=w")
2948 (fma:DF
2949 (vec_select:DF
2950 (match_operand:V2DF 1 "register_operand" "w")
2951 (parallel [(match_operand:SI 2 "immediate_operand")]))
2952 (match_operand:DF 3 "register_operand" "w")
2953 (match_operand:DF 4 "register_operand" "0")))]
2954 "TARGET_SIMD"
2955 {
2956 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2957 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2958 }
2959 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2960 )
2961
2962 (define_insn "fnma<mode>4<vczle><vczbe>"
2963 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2964 (fma:VHSDF
2965 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2966 (match_operand:VHSDF 2 "register_operand" "w")
2967 (match_operand:VHSDF 3 "register_operand" "0")))]
2968 "TARGET_SIMD"
2969 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2970 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2971 )
2972
2973 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2974 [(set (match_operand:VDQF 0 "register_operand" "=w")
2975 (fma:VDQF
2976 (neg:VDQF
2977 (match_operand:VDQF 3 "register_operand" "w"))
2978 (vec_duplicate:VDQF
2979 (vec_select:<VEL>
2980 (match_operand:VDQF 1 "register_operand" "<h_con>")
2981 (parallel [(match_operand:SI 2 "immediate_operand")])))
2982 (match_operand:VDQF 4 "register_operand" "0")))]
2983 "TARGET_SIMD"
2984 {
2985 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2986 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2987 }
2988 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2989 )
2990
2991 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2992 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2993 (fma:VDQSF
2994 (neg:VDQSF
2995 (match_operand:VDQSF 3 "register_operand" "w"))
2996 (vec_duplicate:VDQSF
2997 (vec_select:<VEL>
2998 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2999 (parallel [(match_operand:SI 2 "immediate_operand")])))
3000 (match_operand:VDQSF 4 "register_operand" "0")))]
3001 "TARGET_SIMD"
3002 {
3003 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3004 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3005 }
3006 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3007 )
3008
3009 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
3010 [(set (match_operand:VMUL 0 "register_operand" "=w")
3011 (fma:VMUL
3012 (neg:VMUL
3013 (match_operand:VMUL 2 "register_operand" "w"))
3014 (vec_duplicate:VMUL
3015 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
3016 (match_operand:VMUL 3 "register_operand" "0")))]
3017 "TARGET_SIMD"
3018 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
3019 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3020 )
3021
3022 (define_insn "*aarch64_fnma4_elt_to_64v2df"
3023 [(set (match_operand:DF 0 "register_operand" "=w")
3024 (fma:DF
3025 (vec_select:DF
3026 (match_operand:V2DF 1 "register_operand" "w")
3027 (parallel [(match_operand:SI 2 "immediate_operand")]))
3028 (neg:DF
3029 (match_operand:DF 3 "register_operand" "w"))
3030 (match_operand:DF 4 "register_operand" "0")))]
3031 "TARGET_SIMD"
3032 {
3033 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3034 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
3035 }
3036 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3037 )
3038
3039 ;; Vector versions of the floating-point frint patterns.
3040 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3041 (define_insn "<frint_pattern><mode>2<vczle><vczbe>"
3042 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3043 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3044 FRINT))]
3045 "TARGET_SIMD"
3046 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
3047 [(set_attr "type" "neon_fp_round_<stype><q>")]
3048 )
3049
3050 ;; Vector versions of the fcvt standard patterns.
3051 ;; Expands to lbtrunc, lround, lceil, lfloor
3052 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
3053 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3054 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3055 [(match_operand:VHSDF 1 "register_operand" "w")]
3056 FCVT)))]
3057 "TARGET_SIMD"
3058 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
3059 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3060 )
3061
3062 ;; HF Scalar variants of related SIMD instructions.
3063 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3064 [(set (match_operand:HI 0 "register_operand" "=w")
3065 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3066 FCVT)))]
3067 "TARGET_SIMD_F16INST"
3068 "fcvt<frint_suffix><su>\t%h0, %h1"
3069 [(set_attr "type" "neon_fp_to_int_s")]
3070 )
3071
3072 (define_insn "<optab>_trunchfhi2"
3073 [(set (match_operand:HI 0 "register_operand" "=w")
3074 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3075 "TARGET_SIMD_F16INST"
3076 "fcvtz<su>\t%h0, %h1"
3077 [(set_attr "type" "neon_fp_to_int_s")]
3078 )
3079
3080 (define_insn "<optab>hihf2"
3081 [(set (match_operand:HF 0 "register_operand" "=w")
3082 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3083 "TARGET_SIMD_F16INST"
3084 "<su_optab>cvtf\t%h0, %h1"
3085 [(set_attr "type" "neon_int_to_fp_s")]
3086 )
3087
3088 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3089 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3090 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3091 [(mult:VDQF
3092 (match_operand:VDQF 1 "register_operand" "w")
3093 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3094 UNSPEC_FRINTZ)))]
3095 "TARGET_SIMD
3096 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3097 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3098 {
3099 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3100 char buf[64];
3101 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3102 output_asm_insn (buf, operands);
3103 return "";
3104 }
3105 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3106 )
3107
3108 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3109 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3110 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3111 [(match_operand:VHSDF 1 "register_operand")]
3112 UNSPEC_FRINTZ)))]
3113 "TARGET_SIMD"
3114 {})
3115
3116 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3117 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3118 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3119 [(match_operand:VHSDF 1 "register_operand")]
3120 UNSPEC_FRINTZ)))]
3121 "TARGET_SIMD"
3122 {})
3123
3124 (define_expand "ftrunc<VHSDF:mode>2"
3125 [(set (match_operand:VHSDF 0 "register_operand")
3126 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3127 UNSPEC_FRINTZ))]
3128 "TARGET_SIMD"
3129 {})
3130
3131 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3132 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3133 (FLOATUORS:VHSDF
3134 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3135 "TARGET_SIMD"
3136 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3137 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3138 )
3139
3140 ;; Conversions between vectors of floats and doubles.
3141 ;; Contains a mix of patterns to match standard pattern names
3142 ;; and those for intrinsics.
3143
3144 ;; Float widening operations.
3145
3146 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3147 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3148 (float_extend:<VWIDE> (vec_select:<VHALF>
3149 (match_operand:VQ_HSF 1 "register_operand" "w")
3150 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3151 )))]
3152 "TARGET_SIMD"
3153 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3154 [(set_attr "type" "neon_fp_cvt_widen_s")]
3155 )
3156
3157 ;; Convert between fixed-point and floating-point (vector modes)
3158
3159 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3160 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3161 (unspec:<VHSDF:FCVT_TARGET>
3162 [(match_operand:VHSDF 1 "register_operand" "w")
3163 (match_operand:SI 2 "immediate_operand" "i")]
3164 FCVT_F2FIXED))]
3165 "TARGET_SIMD"
3166 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3167 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3168 )
3169
3170 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3171 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3172 (unspec:<VDQ_HSDI:FCVT_TARGET>
3173 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3174 (match_operand:SI 2 "immediate_operand" "i")]
3175 FCVT_FIXED2F))]
3176 "TARGET_SIMD"
3177 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3178 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3179 )
3180
3181 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3182 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3183 ;; the meaning of HI and LO changes depending on the target endianness.
3184 ;; While elsewhere we map the higher numbered elements of a vector to
3185 ;; the lower architectural lanes of the vector, for these patterns we want
3186 ;; to always treat "hi" as referring to the higher architectural lanes.
3187 ;; Consequently, while the patterns below look inconsistent with our
3188 ;; other big-endian patterns their behavior is as required.
3189
3190 (define_expand "vec_unpacks_lo_<mode>"
3191 [(match_operand:<VWIDE> 0 "register_operand")
3192 (match_operand:VQ_HSF 1 "register_operand")]
3193 "TARGET_SIMD"
3194 {
3195 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3196 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3197 operands[1], p));
3198 DONE;
3199 }
3200 )
3201
3202 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3203 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3204 (float_extend:<VWIDE> (vec_select:<VHALF>
3205 (match_operand:VQ_HSF 1 "register_operand" "w")
3206 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3207 )))]
3208 "TARGET_SIMD"
3209 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3210 [(set_attr "type" "neon_fp_cvt_widen_s")]
3211 )
3212
3213 (define_expand "vec_unpacks_hi_<mode>"
3214 [(match_operand:<VWIDE> 0 "register_operand")
3215 (match_operand:VQ_HSF 1 "register_operand")]
3216 "TARGET_SIMD"
3217 {
3218 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3219 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3220 operands[1], p));
3221 DONE;
3222 }
3223 )
3224 (define_insn "aarch64_float_extend_lo_<Vwide>"
3225 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3226 (float_extend:<VWIDE>
3227 (match_operand:VDF 1 "register_operand" "w")))]
3228 "TARGET_SIMD"
3229 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3230 [(set_attr "type" "neon_fp_cvt_widen_s")]
3231 )
3232
3233 ;; Float narrowing operations.
3234
3235 (define_insn "aarch64_float_trunc_rodd_df"
3236 [(set (match_operand:SF 0 "register_operand" "=w")
3237 (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3238 UNSPEC_FCVTXN))]
3239 "TARGET_SIMD"
3240 "fcvtxn\\t%s0, %d1"
3241 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3242 )
3243
3244 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3245 [(set (match_operand:V2SF 0 "register_operand" "=w")
3246 (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3247 UNSPEC_FCVTXN))]
3248 "TARGET_SIMD"
3249 "fcvtxn\\t%0.2s, %1.2d"
3250 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3251 )
3252
3253 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3254 [(set (match_operand:V4SF 0 "register_operand" "=w")
3255 (vec_concat:V4SF
3256 (match_operand:V2SF 1 "register_operand" "0")
3257 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3258 UNSPEC_FCVTXN)))]
3259 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3260 "fcvtxn2\\t%0.4s, %2.2d"
3261 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3262 )
3263
3264 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3265 [(set (match_operand:V4SF 0 "register_operand" "=w")
3266 (vec_concat:V4SF
3267 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3268 UNSPEC_FCVTXN)
3269 (match_operand:V2SF 1 "register_operand" "0")))]
3270 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3271 "fcvtxn2\\t%0.4s, %2.2d"
3272 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3273 )
3274
3275 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3276 [(match_operand:V4SF 0 "register_operand")
3277 (match_operand:V2SF 1 "register_operand")
3278 (match_operand:V2DF 2 "register_operand")]
3279 "TARGET_SIMD"
3280 {
3281 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3282 ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3283 : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3284 emit_insn (gen (operands[0], operands[1], operands[2]));
3285 DONE;
3286 }
3287 )
3288
3289 (define_insn "aarch64_float_truncate_lo_<mode><vczle><vczbe>"
3290 [(set (match_operand:VDF 0 "register_operand" "=w")
3291 (float_truncate:VDF
3292 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3293 "TARGET_SIMD"
3294 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3295 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3296 )
3297
3298 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3299 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3300 (vec_concat:<VDBL>
3301 (match_operand:VDF 1 "register_operand" "0")
3302 (float_truncate:VDF
3303 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3304 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3305 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3306 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3307 )
3308
3309 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3310 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3311 (vec_concat:<VDBL>
3312 (float_truncate:VDF
3313 (match_operand:<VWIDE> 2 "register_operand" "w"))
3314 (match_operand:VDF 1 "register_operand" "0")))]
3315 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3316 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3317 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3318 )
3319
3320 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3321 [(match_operand:<VDBL> 0 "register_operand")
3322 (match_operand:VDF 1 "register_operand")
3323 (match_operand:<VWIDE> 2 "register_operand")]
3324 "TARGET_SIMD"
3325 {
3326 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3327 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3328 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3329 emit_insn (gen (operands[0], operands[1], operands[2]));
3330 DONE;
3331 }
3332 )
3333
3334 (define_expand "vec_pack_trunc_v2df"
3335 [(set (match_operand:V4SF 0 "register_operand")
3336 (vec_concat:V4SF
3337 (float_truncate:V2SF
3338 (match_operand:V2DF 1 "register_operand"))
3339 (float_truncate:V2SF
3340 (match_operand:V2DF 2 "register_operand"))
3341 ))]
3342 "TARGET_SIMD"
3343 {
3344 rtx tmp = gen_reg_rtx (V2SFmode);
3345 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3346 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3347
3348 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3349 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3350 tmp, operands[hi]));
3351 DONE;
3352 }
3353 )
3354
3355 (define_expand "vec_pack_trunc_df"
3356 [(set (match_operand:V2SF 0 "register_operand")
3357 (vec_concat:V2SF
3358 (float_truncate:SF (match_operand:DF 1 "general_operand"))
3359 (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
3360 "TARGET_SIMD"
3361 {
3362 rtx tmp = gen_reg_rtx (V2SFmode);
3363 emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
3364 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3365 DONE;
3366 }
3367 )
3368
3369 ;; FP Max/Min
3370 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
3371 ;; expression like:
3372 ;; a = (b < c) ? b : c;
3373 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3374 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3375 ;; -ffast-math.
3376 ;;
3377 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3378 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3379 ;; operand will be returned when both operands are zero (i.e. they may not
3380 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
3381 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3382 ;; NaNs.
3383
3384 (define_insn "<su><maxmin><mode>3"
3385 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3386 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3387 (match_operand:VHSDF 2 "register_operand" "w")))]
3388 "TARGET_SIMD"
3389 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3390 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3391 )
3392
3393 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3394 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3395 ;; which implement the IEEE fmax ()/fmin () functions.
3396 (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
3397 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3398 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3399 (match_operand:VHSDF 2 "register_operand" "w")]
3400 FMAXMIN_UNS))]
3401 "TARGET_SIMD"
3402 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3403 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3404 )
3405
3406 ;; 'across lanes' add.
3407
3408 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3409 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3410 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3411 (match_operand:VHSDF 2 "register_operand" "w")]
3412 UNSPEC_FADDV))]
3413 "TARGET_SIMD"
3414 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3415 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3416 )
3417
3418 (define_insn "reduc_plus_scal_<mode>"
3419 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3420 (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
3421 UNSPEC_ADDV))]
3422 "TARGET_SIMD"
3423 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3424 [(set_attr "type" "neon_reduc_add<q>")]
3425 )
3426
3427 (define_insn "reduc_plus_scal_v2si"
3428 [(set (match_operand:SI 0 "register_operand" "=w")
3429 (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3430 UNSPEC_ADDV))]
3431 "TARGET_SIMD"
3432 "addp\\t%0.2s, %1.2s, %1.2s"
3433 [(set_attr "type" "neon_reduc_add")]
3434 )
3435
3436 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3437 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3438 [(set (match_operand:GPI 0 "register_operand" "=w")
3439 (zero_extend:GPI
3440 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3441 UNSPEC_ADDV)))]
3442 "TARGET_SIMD"
3443 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3444 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3445 )
3446
3447 (define_insn "reduc_plus_scal_<mode>"
3448 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3449 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3450 UNSPEC_FADDV))]
3451 "TARGET_SIMD"
3452 "faddp\\t%<Vetype>0, %1.<Vtype>"
3453 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3454 )
3455
3456 (define_expand "reduc_plus_scal_v4sf"
3457 [(set (match_operand:SF 0 "register_operand")
3458 (unspec:SF [(match_operand:V4SF 1 "register_operand")]
3459 UNSPEC_FADDV))]
3460 "TARGET_SIMD"
3461 {
3462 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3463 rtx scratch = gen_reg_rtx (V4SFmode);
3464 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3465 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3466 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3467 DONE;
3468 })
3469
3470 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3471 ;; sign or zero-extends its elements.
3472 (define_insn "aarch64_<su>addlv<mode>"
3473 [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3474 (unspec:<VWIDE_S>
3475 [(ANY_EXTEND:<V2XWIDE>
3476 (match_operand:VDQV_L 1 "register_operand" "w"))]
3477 UNSPEC_ADDV))]
3478 "TARGET_SIMD"
3479 "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3480 [(set_attr "type" "neon_reduc_add<q>")]
3481 )
3482
3483 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3484 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3485 ;; of that vector are used. We can greatly simplify the RTL expression using
3486 ;; this splitter.
3487 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3488 [(set (match_operand:<VWIDE_S> 0 "register_operand")
3489 (unspec:<VWIDE_S>
3490 [(plus:<VDBLW>
3491 (vec_select:<VDBLW>
3492 (ANY_EXTEND:<V2XWIDE>
3493 (match_operand:VDQV_L 1 "register_operand"))
3494 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3495 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3496 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half")))]
3497 UNSPEC_ADDV))]
3498 "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3499 "#"
3500 "&& 1"
3501 [(set (match_dup 0)
3502 (unspec:<VWIDE_S>
3503 [(ANY_EXTEND:<V2XWIDE>
3504 (match_dup 1))]
3505 UNSPEC_ADDV))]
3506 {}
3507 )
3508
3509 ;; Similar to the above but for two-step zero-widening reductions.
3510 ;; We can push the outer zero_extend outside the ADDV unspec and make
3511 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3512 ;; in a single instruction.
3513 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3514 [(set (match_operand:<VWIDE2X_S> 0 "register_operand" "=w")
3515 (unspec:<VWIDE2X_S>
3516 [(zero_extend:<VQUADW>
3517 (plus:<VDBLW>
3518 (vec_select:<VDBLW>
3519 (zero_extend:<V2XWIDE>
3520 (match_operand:VDQQH 1 "register_operand" "w"))
3521 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3522 (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup 1))
3523 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half"))))]
3524 UNSPEC_ADDV))]
3525 "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3526 "#"
3527 "&& 1"
3528 [(set (match_dup 0)
3529 (zero_extend:<VWIDE2X_S>
3530 (unspec:<VWIDE_S>
3531 [(zero_extend:<V2XWIDE>
3532 (match_dup 1))]
3533 UNSPEC_ADDV)))]
3534 {}
3535 )
3536
3537 ;; Zero-extending version of the above. As these intrinsics produce a scalar
3538 ;; value that may be used by further intrinsics we want to avoid moving the
3539 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3540
3541 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3542 [(set (match_operand:GPI 0 "register_operand" "=w")
3543 (zero_extend:GPI
3544 (unspec:<VWIDE_S>
3545 [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3546 (match_operand:VDQV_L 1 "register_operand" "w"))]
3547 UNSPEC_ADDV)))]
3548 "TARGET_SIMD
3549 && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3550 "<su>addl<VDQV_L:vp>\\t%<VDQV_L:Vwstype>0<VDQV_L:Vwsuf>, %1.<VDQV_L:Vtype>"
3551 [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3552 )
3553
3554 (define_expand "aarch64_<su>addlp<mode>"
3555 [(set (match_operand:<VDBLW> 0 "register_operand")
3556 (plus:<VDBLW>
3557 (vec_select:<VDBLW>
3558 (ANY_EXTEND:<V2XWIDE>
3559 (match_operand:VDQV_L 1 "register_operand"))
3560 (match_dup 2))
3561 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3562 (match_dup 3))))]
3563 "TARGET_SIMD"
3564 {
3565 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
3566 operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
3567 operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
3568 }
3569 )
3570
3571 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3572 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3573 (plus:<VDBLW>
3574 (vec_select:<VDBLW>
3575 (ANY_EXTEND:<V2XWIDE>
3576 (match_operand:VDQV_L 1 "register_operand" "w"))
3577 (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
3578 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3579 (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
3580 "TARGET_SIMD
3581 && !rtx_equal_p (operands[2], operands[3])"
3582 "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3583 [(set_attr "type" "neon_reduc_add<q>")]
3584 )
3585
3586 (define_insn "clrsb<mode>2<vczle><vczbe>"
3587 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3588 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3589 "TARGET_SIMD"
3590 "cls\\t%0.<Vtype>, %1.<Vtype>"
3591 [(set_attr "type" "neon_cls<q>")]
3592 )
3593
3594 (define_insn "clz<mode>2<vczle><vczbe>"
3595 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3596 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3597 "TARGET_SIMD"
3598 "clz\\t%0.<Vtype>, %1.<Vtype>"
3599 [(set_attr "type" "neon_cls<q>")]
3600 )
3601
3602 (define_insn "popcount<mode>2<vczle><vczbe>"
3603 [(set (match_operand:VB 0 "register_operand" "=w")
3604 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3605 "TARGET_SIMD"
3606 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3607 [(set_attr "type" "neon_cnt<q>")]
3608 )
3609
3610 ;; 'across lanes' max and min ops.
3611
3612 ;; Template for outputting a scalar, so we can create __builtins which can be
3613 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
3614 (define_expand "reduc_<optab>_scal_<mode>"
3615 [(match_operand:<VEL> 0 "register_operand")
3616 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3617 FMAXMINV)]
3618 "TARGET_SIMD"
3619 {
3620 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3621 rtx scratch = gen_reg_rtx (<MODE>mode);
3622 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3623 operands[1]));
3624 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3625 DONE;
3626 }
3627 )
3628
3629 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3630 [(match_operand:<VEL> 0 "register_operand")
3631 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3632 FMAXMINNMV)]
3633 "TARGET_SIMD"
3634 {
3635 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3636 DONE;
3637 }
3638 )
3639
3640 ;; Likewise for integer cases, signed and unsigned.
3641 (define_expand "reduc_<optab>_scal_<mode>"
3642 [(match_operand:<VEL> 0 "register_operand")
3643 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3644 MAXMINV)]
3645 "TARGET_SIMD"
3646 {
3647 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3648 rtx scratch = gen_reg_rtx (<MODE>mode);
3649 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3650 operands[1]));
3651 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3652 DONE;
3653 }
3654 )
3655
3656 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3657 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3658 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3659 MAXMINV))]
3660 "TARGET_SIMD"
3661 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3662 [(set_attr "type" "neon_reduc_minmax<q>")]
3663 )
3664
3665 (define_insn "aarch64_reduc_<optab>_internalv2si"
3666 [(set (match_operand:V2SI 0 "register_operand" "=w")
3667 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3668 MAXMINV))]
3669 "TARGET_SIMD"
3670 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3671 [(set_attr "type" "neon_reduc_minmax")]
3672 )
3673
3674 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3675 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3676 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3677 FMAXMINV))]
3678 "TARGET_SIMD"
3679 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3680 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3681 )
3682
3683 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3684 ;; allocation.
3685 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3686 ;; to select.
3687 ;;
3688 ;; Thus our BSL is of the form:
3689 ;; op0 = bsl (mask, op2, op3)
3690 ;; We can use any of:
3691 ;;
3692 ;; if (op0 = mask)
3693 ;; bsl mask, op1, op2
3694 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3695 ;; bit op0, op2, mask
3696 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3697 ;; bif op0, op1, mask
3698 ;;
3699 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3700 ;; Some forms of straight-line code may generate the equivalent form
3701 ;; in *aarch64_simd_bsl<mode>_alt.
3702
3703 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3704 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3705 (xor:VDQ_I
3706 (and:VDQ_I
3707 (xor:VDQ_I
3708 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
3709 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
3710 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3711 (match_dup:<V_INT_EQUIV> 3)
3712 ))]
3713 "TARGET_SIMD"
3714 "@
3715 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3716 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3717 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
3718 [(set_attr "type" "neon_bsl<q>")]
3719 )
3720
3721 ;; We need this form in addition to the above pattern to match the case
3722 ;; when combine tries merging three insns such that the second operand of
3723 ;; the outer XOR matches the second operand of the inner XOR rather than
3724 ;; the first. The two are equivalent but since recog doesn't try all
3725 ;; permutations of commutative operations, we have to have a separate pattern.
3726
3727 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3728 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3729 (xor:VDQ_I
3730 (and:VDQ_I
3731 (xor:VDQ_I
3732 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
3733 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
3734 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3735 (match_dup:<V_INT_EQUIV> 2)))]
3736 "TARGET_SIMD"
3737 "@
3738 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3739 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3740 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
3741 [(set_attr "type" "neon_bsl<q>")]
3742 )
3743
3744 ;; DImode is special, we want to avoid computing operations which are
3745 ;; more naturally computed in general purpose registers in the vector
3746 ;; registers. If we do that, we need to move all three operands from general
3747 ;; purpose registers to vector registers, then back again. However, we
3748 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3749 ;; optimizations based on the component operations of a BSL.
3750 ;;
3751 ;; That means we need a splitter back to the individual operations, if they
3752 ;; would be better calculated on the integer side.
3753
3754 (define_insn_and_split "aarch64_simd_bsldi_internal"
3755 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3756 (xor:DI
3757 (and:DI
3758 (xor:DI
3759 (match_operand:DI 3 "register_operand" "w,0,w,r")
3760 (match_operand:DI 2 "register_operand" "w,w,0,r"))
3761 (match_operand:DI 1 "register_operand" "0,w,w,r"))
3762 (match_dup:DI 3)
3763 ))]
3764 "TARGET_SIMD"
3765 "@
3766 bsl\\t%0.8b, %2.8b, %3.8b
3767 bit\\t%0.8b, %2.8b, %1.8b
3768 bif\\t%0.8b, %3.8b, %1.8b
3769 #"
3770 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3771 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3772 {
3773 /* Split back to individual operations. If we're before reload, and
3774 able to create a temporary register, do so. If we're after reload,
3775 we've got an early-clobber destination register, so use that.
3776 Otherwise, we can't create pseudos and we can't yet guarantee that
3777 operands[0] is safe to write, so FAIL to split. */
3778
3779 rtx scratch;
3780 if (reload_completed)
3781 scratch = operands[0];
3782 else if (can_create_pseudo_p ())
3783 scratch = gen_reg_rtx (DImode);
3784 else
3785 FAIL;
3786
3787 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3788 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3789 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3790 DONE;
3791 }
3792 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3793 (set_attr "length" "4,4,4,12")]
3794 )
3795
3796 (define_insn_and_split "aarch64_simd_bsldi_alt"
3797 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3798 (xor:DI
3799 (and:DI
3800 (xor:DI
3801 (match_operand:DI 3 "register_operand" "w,w,0,r")
3802 (match_operand:DI 2 "register_operand" "w,0,w,r"))
3803 (match_operand:DI 1 "register_operand" "0,w,w,r"))
3804 (match_dup:DI 2)
3805 ))]
3806 "TARGET_SIMD"
3807 "@
3808 bsl\\t%0.8b, %3.8b, %2.8b
3809 bit\\t%0.8b, %3.8b, %1.8b
3810 bif\\t%0.8b, %2.8b, %1.8b
3811 #"
3812 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3813 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3814 {
3815 /* Split back to individual operations. If we're before reload, and
3816 able to create a temporary register, do so. If we're after reload,
3817 we've got an early-clobber destination register, so use that.
3818 Otherwise, we can't create pseudos and we can't yet guarantee that
3819 operands[0] is safe to write, so FAIL to split. */
3820
3821 rtx scratch;
3822 if (reload_completed)
3823 scratch = operands[0];
3824 else if (can_create_pseudo_p ())
3825 scratch = gen_reg_rtx (DImode);
3826 else
3827 FAIL;
3828
3829 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3830 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3831 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3832 DONE;
3833 }
3834 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3835 (set_attr "length" "4,4,4,12")]
3836 )
3837
3838 (define_expand "aarch64_simd_bsl<mode>"
3839 [(match_operand:VALLDIF 0 "register_operand")
3840 (match_operand:<V_INT_EQUIV> 1 "register_operand")
3841 (match_operand:VALLDIF 2 "register_operand")
3842 (match_operand:VALLDIF 3 "register_operand")]
3843 "TARGET_SIMD"
3844 {
3845 /* We can't alias operands together if they have different modes. */
3846 rtx tmp = operands[0];
3847 if (FLOAT_MODE_P (<MODE>mode))
3848 {
3849 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3850 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3851 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3852 }
3853 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3854 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3855 operands[1],
3856 operands[2],
3857 operands[3]));
3858 if (tmp != operands[0])
3859 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3860
3861 DONE;
3862 })
3863
3864 (define_expand "vcond_mask_<mode><v_int_equiv>"
3865 [(match_operand:VALLDI 0 "register_operand")
3866 (match_operand:VALLDI 1 "nonmemory_operand")
3867 (match_operand:VALLDI 2 "nonmemory_operand")
3868 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3869 "TARGET_SIMD"
3870 {
3871 /* If we have (a = (P) ? -1 : 0);
3872 Then we can simply move the generated mask (result must be int). */
3873 if (operands[1] == CONSTM1_RTX (<MODE>mode)
3874 && operands[2] == CONST0_RTX (<MODE>mode))
3875 emit_move_insn (operands[0], operands[3]);
3876 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
3877 else if (operands[1] == CONST0_RTX (<MODE>mode)
3878 && operands[2] == CONSTM1_RTX (<MODE>mode))
3879 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3880 else
3881 {
3882 if (!REG_P (operands[1]))
3883 operands[1] = force_reg (<MODE>mode, operands[1]);
3884 if (!REG_P (operands[2]))
3885 operands[2] = force_reg (<MODE>mode, operands[2]);
3886 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3887 operands[1], operands[2]));
3888 }
3889
3890 DONE;
3891 })
3892
3893 ;; Patterns comparing two vectors to produce a mask.
3894
3895 (define_expand "vec_cmp<mode><mode>"
3896 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3897 (match_operator 1 "comparison_operator"
3898 [(match_operand:VSDQ_I_DI 2 "register_operand")
3899 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3900 "TARGET_SIMD"
3901 {
3902 rtx mask = operands[0];
3903 enum rtx_code code = GET_CODE (operands[1]);
3904
3905 switch (code)
3906 {
3907 case NE:
3908 case LE:
3909 case LT:
3910 case GE:
3911 case GT:
3912 case EQ:
3913 if (operands[3] == CONST0_RTX (<MODE>mode))
3914 break;
3915
3916 /* Fall through. */
3917 default:
3918 if (!REG_P (operands[3]))
3919 operands[3] = force_reg (<MODE>mode, operands[3]);
3920
3921 break;
3922 }
3923
3924 switch (code)
3925 {
3926 case LT:
3927 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3928 break;
3929
3930 case GE:
3931 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3932 break;
3933
3934 case LE:
3935 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3936 break;
3937
3938 case GT:
3939 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3940 break;
3941
3942 case LTU:
3943 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3944 break;
3945
3946 case GEU:
3947 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3948 break;
3949
3950 case LEU:
3951 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3952 break;
3953
3954 case GTU:
3955 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3956 break;
3957
3958 case NE:
3959 /* Handle NE as !EQ. */
3960 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3961 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3962 break;
3963
3964 case EQ:
3965 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3966 break;
3967
3968 default:
3969 gcc_unreachable ();
3970 }
3971
3972 DONE;
3973 })
3974
3975 (define_expand "vec_cmp<mode><v_int_equiv>"
3976 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3977 (match_operator 1 "comparison_operator"
3978 [(match_operand:VDQF 2 "register_operand")
3979 (match_operand:VDQF 3 "nonmemory_operand")]))]
3980 "TARGET_SIMD"
3981 {
3982 int use_zero_form = 0;
3983 enum rtx_code code = GET_CODE (operands[1]);
3984 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3985
3986 rtx (*comparison) (rtx, rtx, rtx) = NULL;
3987
3988 switch (code)
3989 {
3990 case LE:
3991 case LT:
3992 case GE:
3993 case GT:
3994 case EQ:
3995 if (operands[3] == CONST0_RTX (<MODE>mode))
3996 {
3997 use_zero_form = 1;
3998 break;
3999 }
4000 /* Fall through. */
4001 default:
4002 if (!REG_P (operands[3]))
4003 operands[3] = force_reg (<MODE>mode, operands[3]);
4004
4005 break;
4006 }
4007
4008 switch (code)
4009 {
4010 case LT:
4011 if (use_zero_form)
4012 {
4013 comparison = gen_aarch64_cmlt<mode>;
4014 break;
4015 }
4016 /* Fall through. */
4017 case UNLT:
4018 std::swap (operands[2], operands[3]);
4019 /* Fall through. */
4020 case UNGT:
4021 case GT:
4022 comparison = gen_aarch64_cmgt<mode>;
4023 break;
4024 case LE:
4025 if (use_zero_form)
4026 {
4027 comparison = gen_aarch64_cmle<mode>;
4028 break;
4029 }
4030 /* Fall through. */
4031 case UNLE:
4032 std::swap (operands[2], operands[3]);
4033 /* Fall through. */
4034 case UNGE:
4035 case GE:
4036 comparison = gen_aarch64_cmge<mode>;
4037 break;
4038 case NE:
4039 case EQ:
4040 comparison = gen_aarch64_cmeq<mode>;
4041 break;
4042 case UNEQ:
4043 case ORDERED:
4044 case UNORDERED:
4045 case LTGT:
4046 break;
4047 default:
4048 gcc_unreachable ();
4049 }
4050
4051 switch (code)
4052 {
4053 case UNGE:
4054 case UNGT:
4055 case UNLE:
4056 case UNLT:
4057 {
4058 /* All of the above must not raise any FP exceptions. Thus we first
4059 check each operand for NaNs and force any elements containing NaN to
4060 zero before using them in the compare.
4061 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4062 (cm<cc> (isnan (a) ? 0.0 : a,
4063 isnan (b) ? 0.0 : b))
4064 We use the following transformations for doing the comparisions:
4065 a UNGE b -> a GE b
4066 a UNGT b -> a GT b
4067 a UNLE b -> b GE a
4068 a UNLT b -> b GT a. */
4069
4070 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4071 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4072 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4073 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4074 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4075 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4076 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4077 lowpart_subreg (<V_INT_EQUIV>mode,
4078 operands[2],
4079 <MODE>mode)));
4080 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4081 lowpart_subreg (<V_INT_EQUIV>mode,
4082 operands[3],
4083 <MODE>mode)));
4084 gcc_assert (comparison != NULL);
4085 emit_insn (comparison (operands[0],
4086 lowpart_subreg (<MODE>mode,
4087 tmp0, <V_INT_EQUIV>mode),
4088 lowpart_subreg (<MODE>mode,
4089 tmp1, <V_INT_EQUIV>mode)));
4090 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
4091 }
4092 break;
4093
4094 case LT:
4095 case LE:
4096 case GT:
4097 case GE:
4098 case EQ:
4099 case NE:
4100 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
4101 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
4102 a GE b -> a GE b
4103 a GT b -> a GT b
4104 a LE b -> b GE a
4105 a LT b -> b GT a
4106 a EQ b -> a EQ b
4107 a NE b -> ~(a EQ b) */
4108 gcc_assert (comparison != NULL);
4109 emit_insn (comparison (operands[0], operands[2], operands[3]));
4110 if (code == NE)
4111 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4112 break;
4113
4114 case LTGT:
4115 /* LTGT is not guranteed to not generate a FP exception. So let's
4116 go the faster way : ((a > b) || (b > a)). */
4117 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4118 operands[2], operands[3]));
4119 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4120 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4121 break;
4122
4123 case ORDERED:
4124 case UNORDERED:
4125 case UNEQ:
4126 /* cmeq (a, a) & cmeq (b, b). */
4127 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4128 operands[2], operands[2]));
4129 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4130 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4131
4132 if (code == UNORDERED)
4133 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4134 else if (code == UNEQ)
4135 {
4136 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4137 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4138 }
4139 break;
4140
4141 default:
4142 gcc_unreachable ();
4143 }
4144
4145 DONE;
4146 })
4147
4148 (define_expand "vec_cmpu<mode><mode>"
4149 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4150 (match_operator 1 "comparison_operator"
4151 [(match_operand:VSDQ_I_DI 2 "register_operand")
4152 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4153 "TARGET_SIMD"
4154 {
4155 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4156 operands[2], operands[3]));
4157 DONE;
4158 })
4159
4160 (define_expand "vcond<mode><mode>"
4161 [(set (match_operand:VALLDI 0 "register_operand")
4162 (if_then_else:VALLDI
4163 (match_operator 3 "comparison_operator"
4164 [(match_operand:VALLDI 4 "register_operand")
4165 (match_operand:VALLDI 5 "nonmemory_operand")])
4166 (match_operand:VALLDI 1 "nonmemory_operand")
4167 (match_operand:VALLDI 2 "nonmemory_operand")))]
4168 "TARGET_SIMD"
4169 {
4170 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4171 enum rtx_code code = GET_CODE (operands[3]);
4172
4173 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4174 it as well as switch operands 1/2 in order to avoid the additional
4175 NOT instruction. */
4176 if (code == NE)
4177 {
4178 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4179 operands[4], operands[5]);
4180 std::swap (operands[1], operands[2]);
4181 }
4182 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4183 operands[4], operands[5]));
4184 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4185 operands[2], mask));
4186
4187 DONE;
4188 })
4189
4190 (define_expand "vcond<v_cmp_mixed><mode>"
4191 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4192 (if_then_else:<V_cmp_mixed>
4193 (match_operator 3 "comparison_operator"
4194 [(match_operand:VDQF_COND 4 "register_operand")
4195 (match_operand:VDQF_COND 5 "nonmemory_operand")])
4196 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4197 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
4198 "TARGET_SIMD"
4199 {
4200 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4201 enum rtx_code code = GET_CODE (operands[3]);
4202
4203 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4204 it as well as switch operands 1/2 in order to avoid the additional
4205 NOT instruction. */
4206 if (code == NE)
4207 {
4208 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4209 operands[4], operands[5]);
4210 std::swap (operands[1], operands[2]);
4211 }
4212 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4213 operands[4], operands[5]));
4214 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4215 operands[0], operands[1],
4216 operands[2], mask));
4217
4218 DONE;
4219 })
4220
4221 (define_expand "vcondu<mode><mode>"
4222 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4223 (if_then_else:VSDQ_I_DI
4224 (match_operator 3 "comparison_operator"
4225 [(match_operand:VSDQ_I_DI 4 "register_operand")
4226 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4227 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4228 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4229 "TARGET_SIMD"
4230 {
4231 rtx mask = gen_reg_rtx (<MODE>mode);
4232 enum rtx_code code = GET_CODE (operands[3]);
4233
4234 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4235 it as well as switch operands 1/2 in order to avoid the additional
4236 NOT instruction. */
4237 if (code == NE)
4238 {
4239 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4240 operands[4], operands[5]);
4241 std::swap (operands[1], operands[2]);
4242 }
4243 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4244 operands[4], operands[5]));
4245 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4246 operands[2], mask));
4247 DONE;
4248 })
4249
4250 (define_expand "vcondu<mode><v_cmp_mixed>"
4251 [(set (match_operand:VDQF 0 "register_operand")
4252 (if_then_else:VDQF
4253 (match_operator 3 "comparison_operator"
4254 [(match_operand:<V_cmp_mixed> 4 "register_operand")
4255 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4256 (match_operand:VDQF 1 "nonmemory_operand")
4257 (match_operand:VDQF 2 "nonmemory_operand")))]
4258 "TARGET_SIMD"
4259 {
4260 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4261 enum rtx_code code = GET_CODE (operands[3]);
4262
4263 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4264 it as well as switch operands 1/2 in order to avoid the additional
4265 NOT instruction. */
4266 if (code == NE)
4267 {
4268 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4269 operands[4], operands[5]);
4270 std::swap (operands[1], operands[2]);
4271 }
4272 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4273 mask, operands[3],
4274 operands[4], operands[5]));
4275 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4276 operands[2], mask));
4277 DONE;
4278 })
4279
4280 ;; Patterns for AArch64 SIMD Intrinsics.
4281
4282 ;; Lane extraction with sign extension to general purpose register.
4283 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4284 [(set (match_operand:GPI 0 "register_operand" "=r")
4285 (sign_extend:GPI
4286 (vec_select:<VDQQH:VEL>
4287 (match_operand:VDQQH 1 "register_operand" "w")
4288 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4289 "TARGET_SIMD"
4290 {
4291 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4292 INTVAL (operands[2]));
4293 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4294 }
4295 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4296 )
4297
4298 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4299 [(set (match_operand:GPI 0 "register_operand" "=r")
4300 (zero_extend:GPI
4301 (vec_select:<VDQQH:VEL>
4302 (match_operand:VDQQH 1 "register_operand" "w")
4303 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4304 "TARGET_SIMD"
4305 {
4306 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4307 INTVAL (operands[2]));
4308 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4309 }
4310 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4311 )
4312
4313 ;; Lane extraction of a value, neither sign nor zero extension
4314 ;; is guaranteed so upper bits should be considered undefined.
4315 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4316 ;; Extracting lane zero is split into a simple move when it is between SIMD
4317 ;; registers or a store.
4318 (define_insn_and_split "aarch64_get_lane<mode>"
4319 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4320 (vec_select:<VEL>
4321 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4322 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4323 "TARGET_SIMD"
4324 {
4325 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4326 switch (which_alternative)
4327 {
4328 case 0:
4329 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4330 case 1:
4331 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4332 case 2:
4333 return "st1\\t{%1.<Vetype>}[%2], %0";
4334 default:
4335 gcc_unreachable ();
4336 }
4337 }
4338 "&& reload_completed
4339 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4340 [(set (match_dup 0) (match_dup 1))]
4341 {
4342 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4343 }
4344 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4345 )
4346
4347 (define_insn "*aarch64_get_high<mode>"
4348 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
4349 (vec_select:<VEL>
4350 (match_operand:VQ_2E 1 "register_operand" "w")
4351 (parallel [(match_operand:SI 2 "immediate_operand")])))]
4352 "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
4353 "fmov\t%0, %1.d[1]"
4354 [(set_attr "type" "f_mrc")]
4355 )
4356
4357 (define_insn "load_pair_lanes<mode>"
4358 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4359 (vec_concat:<VDBL>
4360 (match_operand:VDCSIF 1 "memory_operand" "Utq")
4361 (match_operand:VDCSIF 2 "memory_operand" "m")))]
4362 "TARGET_FLOAT
4363 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
4364 "ldr\\t%<single_dtype>0, %1"
4365 [(set_attr "type" "neon_load1_1reg<dblq>")]
4366 )
4367
4368 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4369 ;; below. The reason for having both of them is that the alternatives of
4370 ;; the later patterns do not have consistent register preferences: the STP
4371 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4372 ;; the GPR form is more natural for scalar integers) whereas the other
4373 ;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4374 ;;
4375 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4376 ;; which the destination was always memory. On the other hand, expressing
4377 ;; the true preferences makes GPRs seem more palatable than they really are
4378 ;; for register destinations.
4379 ;;
4380 ;; Despite that, we do still want the general form to have STP alternatives,
4381 ;; in order to handle cases where a register destination is spilled.
4382 ;;
4383 ;; The best compromise therefore seemed to be to have a dedicated STP
4384 ;; pattern to catch cases in which the destination was always memory.
4385 ;; This dedicated pattern must come first.
4386
4387 (define_insn "store_pair_lanes<mode>"
4388 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
4389 (vec_concat:<VDBL>
4390 (match_operand:VDCSIF 1 "register_operand" "w, r")
4391 (match_operand:VDCSIF 2 "register_operand" "w, r")))]
4392 "TARGET_FLOAT"
4393 "@
4394 stp\t%<single_type>1, %<single_type>2, %y0
4395 stp\t%<single_wx>1, %<single_wx>2, %y0"
4396 [(set_attr "type" "neon_stp, store_16")]
4397 )
4398
4399 ;; Form a vector whose least significant half comes from operand 1 and whose
4400 ;; most significant half comes from operand 2. The register alternatives
4401 ;; tie the least significant half to the same register as the destination,
4402 ;; so that only the other half needs to be handled explicitly. For the
4403 ;; reasons given above, the STP alternatives use ? for constraints that
4404 ;; the register alternatives either don't accept or themselves disparage.
4405
4406 (define_insn "*aarch64_combine_internal<mode>"
4407 [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
4408 (vec_concat:<VDBL>
4409 (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")
4410 (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, w, ?r")))]
4411 "TARGET_FLOAT
4412 && !BYTES_BIG_ENDIAN
4413 && (register_operand (operands[0], <VDBL>mode)
4414 || register_operand (operands[2], <MODE>mode))"
4415 "@
4416 ins\t%0.<single_type>[1], %2.<single_type>[0]
4417 ins\t%0.<single_type>[1], %<single_wx>2
4418 fmov\t%0.d[1], %2
4419 ld1\t{%0.<single_type>}[1], %2
4420 stp\t%<single_type>1, %<single_type>2, %y0
4421 stp\t%<single_wx>1, %<single_wx>2, %y0"
4422 [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr,
4423 neon_load1_one_lane<dblq>, neon_stp, store_16")
4424 (set_attr "arch" "simd,simd,*,simd,*,*")]
4425 )
4426
4427 (define_insn "*aarch64_combine_internal_be<mode>"
4428 [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
4429 (vec_concat:<VDBL>
4430 (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, ?w, ?r")
4431 (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")))]
4432 "TARGET_FLOAT
4433 && BYTES_BIG_ENDIAN
4434 && (register_operand (operands[0], <VDBL>mode)
4435 || register_operand (operands[2], <MODE>mode))"
4436 "@
4437 ins\t%0.<single_type>[1], %2.<single_type>[0]
4438 ins\t%0.<single_type>[1], %<single_wx>2
4439 fmov\t%0.d[1], %2
4440 ld1\t{%0.<single_type>}[1], %2
4441 stp\t%<single_type>2, %<single_type>1, %y0
4442 stp\t%<single_wx>2, %<single_wx>1, %y0"
4443 [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr, neon_load1_one_lane<dblq>, neon_stp, store_16")
4444 (set_attr "arch" "simd,simd,*,simd,*,*")]
4445 )
4446
4447 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4448 ;; dest vector.
4449
4450 (define_insn "*aarch64_combinez<mode>"
4451 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4452 (vec_concat:<VDBL>
4453 (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")
4454 (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
4455 "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4456 "@
4457 fmov\\t%<single_type>0, %<single_type>1
4458 fmov\t%<single_type>0, %<single_wx>1
4459 ldr\\t%<single_type>0, %1"
4460 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
4461 )
4462
4463 (define_insn "*aarch64_combinez_be<mode>"
4464 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4465 (vec_concat:<VDBL>
4466 (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4467 (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))]
4468 "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4469 "@
4470 fmov\\t%<single_type>0, %<single_type>1
4471 fmov\t%<single_type>0, %<single_wx>1
4472 ldr\\t%<single_type>0, %1"
4473 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
4474 )
4475
4476 ;; Form a vector whose first half (in array order) comes from operand 1
4477 ;; and whose second half (in array order) comes from operand 2.
4478 ;; This operand order follows the RTL vec_concat operation.
4479 (define_expand "@aarch64_vec_concat<mode>"
4480 [(set (match_operand:<VDBL> 0 "register_operand")
4481 (vec_concat:<VDBL>
4482 (match_operand:VDCSIF 1 "general_operand")
4483 (match_operand:VDCSIF 2 "general_operand")))]
4484 "TARGET_FLOAT"
4485 {
4486 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4487 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4488
4489 if (MEM_P (operands[1])
4490 && MEM_P (operands[2])
4491 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4492 /* Use load_pair_lanes<mode>. */
4493 ;
4494 else if (operands[hi] == CONST0_RTX (<MODE>mode))
4495 {
4496 /* Use *aarch64_combinez<mode>. */
4497 if (!nonimmediate_operand (operands[lo], <MODE>mode))
4498 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4499 }
4500 else
4501 {
4502 /* Use *aarch64_combine_internal<mode>. */
4503 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4504 if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4505 {
4506 if (MEM_P (operands[hi]))
4507 {
4508 rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4509 operands[hi] = replace_equiv_address (operands[hi], addr);
4510 }
4511 else
4512 operands[hi] = force_reg (<MODE>mode, operands[hi]);
4513 }
4514 }
4515 })
4516
4517 ;; Form a vector whose least significant half comes from operand 1 and whose
4518 ;; most significant half comes from operand 2. This operand order follows
4519 ;; arm_neon.h vcombine* intrinsics.
4520 (define_expand "aarch64_combine<mode>"
4521 [(match_operand:<VDBL> 0 "register_operand")
4522 (match_operand:VDC 1 "general_operand")
4523 (match_operand:VDC 2 "general_operand")]
4524 "TARGET_FLOAT"
4525 {
4526 if (BYTES_BIG_ENDIAN)
4527 std::swap (operands[1], operands[2]);
4528 emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4529 operands[2]));
4530 DONE;
4531 }
4532 )
4533
4534 ;; <su><addsub>l<q>.
4535
4536 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4537 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4538 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4539 (match_operand:VQW 1 "register_operand" "w")
4540 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4541 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4542 (match_operand:VQW 2 "register_operand" "w")
4543 (match_dup 3)))))]
4544 "TARGET_SIMD"
4545 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4546 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4547 )
4548
4549 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4550 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4551 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4552 (match_operand:VQW 1 "register_operand" "w")
4553 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4554 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4555 (match_operand:VQW 2 "register_operand" "w")
4556 (match_dup 3)))))]
4557 "TARGET_SIMD"
4558 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4559 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4560 )
4561
4562 (define_expand "vec_widen_<su>add_lo_<mode>"
4563 [(match_operand:<VWIDE> 0 "register_operand")
4564 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4565 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4566 "TARGET_SIMD"
4567 {
4568 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4569 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4570 operands[2], p));
4571 DONE;
4572 })
4573
4574 (define_expand "vec_widen_<su>add_hi_<mode>"
4575 [(match_operand:<VWIDE> 0 "register_operand")
4576 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4577 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4578 "TARGET_SIMD"
4579 {
4580 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4581 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4582 operands[2], p));
4583 DONE;
4584 })
4585
4586 (define_expand "vec_widen_<su>sub_lo_<mode>"
4587 [(match_operand:<VWIDE> 0 "register_operand")
4588 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4589 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4590 "TARGET_SIMD"
4591 {
4592 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4593 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4594 operands[2], p));
4595 DONE;
4596 })
4597
4598 (define_expand "vec_widen_<su>sub_hi_<mode>"
4599 [(match_operand:<VWIDE> 0 "register_operand")
4600 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4601 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4602 "TARGET_SIMD"
4603 {
4604 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4605 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4606 operands[2], p));
4607 DONE;
4608 })
4609
4610 (define_expand "aarch64_saddl2<mode>"
4611 [(match_operand:<VWIDE> 0 "register_operand")
4612 (match_operand:VQW 1 "register_operand")
4613 (match_operand:VQW 2 "register_operand")]
4614 "TARGET_SIMD"
4615 {
4616 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4617 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4618 operands[2], p));
4619 DONE;
4620 })
4621
4622 (define_expand "aarch64_uaddl2<mode>"
4623 [(match_operand:<VWIDE> 0 "register_operand")
4624 (match_operand:VQW 1 "register_operand")
4625 (match_operand:VQW 2 "register_operand")]
4626 "TARGET_SIMD"
4627 {
4628 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4629 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4630 operands[2], p));
4631 DONE;
4632 })
4633
4634 (define_expand "aarch64_ssubl2<mode>"
4635 [(match_operand:<VWIDE> 0 "register_operand")
4636 (match_operand:VQW 1 "register_operand")
4637 (match_operand:VQW 2 "register_operand")]
4638 "TARGET_SIMD"
4639 {
4640 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4641 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4642 operands[2], p));
4643 DONE;
4644 })
4645
4646 (define_expand "aarch64_usubl2<mode>"
4647 [(match_operand:<VWIDE> 0 "register_operand")
4648 (match_operand:VQW 1 "register_operand")
4649 (match_operand:VQW 2 "register_operand")]
4650 "TARGET_SIMD"
4651 {
4652 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4653 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4654 operands[2], p));
4655 DONE;
4656 })
4657
4658 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4659 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4660 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4661 (match_operand:VD_BHSI 1 "register_operand" "w"))
4662 (ANY_EXTEND:<VWIDE>
4663 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4664 "TARGET_SIMD"
4665 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4666 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4667 )
4668
4669 ;; <su><addsub>w<q>.
4670
4671 (define_expand "widen_ssum<mode>3"
4672 [(set (match_operand:<VDBLW> 0 "register_operand")
4673 (plus:<VDBLW> (sign_extend:<VDBLW>
4674 (match_operand:VQW 1 "register_operand"))
4675 (match_operand:<VDBLW> 2 "register_operand")))]
4676 "TARGET_SIMD"
4677 {
4678 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4679 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4680
4681 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4682 operands[1], p));
4683 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4684 DONE;
4685 }
4686 )
4687
4688 (define_expand "widen_ssum<mode>3"
4689 [(set (match_operand:<VWIDE> 0 "register_operand")
4690 (plus:<VWIDE> (sign_extend:<VWIDE>
4691 (match_operand:VD_BHSI 1 "register_operand"))
4692 (match_operand:<VWIDE> 2 "register_operand")))]
4693 "TARGET_SIMD"
4694 {
4695 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4696 DONE;
4697 })
4698
4699 (define_expand "widen_usum<mode>3"
4700 [(set (match_operand:<VDBLW> 0 "register_operand")
4701 (plus:<VDBLW> (zero_extend:<VDBLW>
4702 (match_operand:VQW 1 "register_operand"))
4703 (match_operand:<VDBLW> 2 "register_operand")))]
4704 "TARGET_SIMD"
4705 {
4706 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4707 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4708
4709 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4710 operands[1], p));
4711 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4712 DONE;
4713 }
4714 )
4715
4716 (define_expand "widen_usum<mode>3"
4717 [(set (match_operand:<VWIDE> 0 "register_operand")
4718 (plus:<VWIDE> (zero_extend:<VWIDE>
4719 (match_operand:VD_BHSI 1 "register_operand"))
4720 (match_operand:<VWIDE> 2 "register_operand")))]
4721 "TARGET_SIMD"
4722 {
4723 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4724 DONE;
4725 })
4726
4727 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4728 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4729 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4730 (ANY_EXTEND:<VWIDE>
4731 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4732 "TARGET_SIMD"
4733 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4734 [(set_attr "type" "neon_sub_widen")]
4735 )
4736
4737 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4738 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4739 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4740 (ANY_EXTEND:<VWIDE>
4741 (vec_select:<VHALF>
4742 (match_operand:VQW 2 "register_operand" "w")
4743 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4744 "TARGET_SIMD"
4745 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4746 [(set_attr "type" "neon_sub_widen")]
4747 )
4748
4749 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4750 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4751 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4752 (ANY_EXTEND:<VWIDE>
4753 (vec_select:<VHALF>
4754 (match_operand:VQW 2 "register_operand" "w")
4755 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4756 "TARGET_SIMD"
4757 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4758 [(set_attr "type" "neon_sub_widen")]
4759 )
4760
4761 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4762 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4763 (plus:<VWIDE>
4764 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4765 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4766 "TARGET_SIMD"
4767 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4768 [(set_attr "type" "neon_add_widen")]
4769 )
4770
4771 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4772 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4773 (plus:<VWIDE>
4774 (ANY_EXTEND:<VWIDE>
4775 (vec_select:<VHALF>
4776 (match_operand:VQW 2 "register_operand" "w")
4777 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4778 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4779 "TARGET_SIMD"
4780 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4781 [(set_attr "type" "neon_add_widen")]
4782 )
4783
4784 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4785 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4786 (plus:<VWIDE>
4787 (ANY_EXTEND:<VWIDE>
4788 (vec_select:<VHALF>
4789 (match_operand:VQW 2 "register_operand" "w")
4790 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4791 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4792 "TARGET_SIMD"
4793 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4794 [(set_attr "type" "neon_add_widen")]
4795 )
4796
4797 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4798 [(set (match_operand:<VWIDE> 0 "register_operand")
4799 (ADDSUB:<VWIDE>
4800 (ANY_EXTEND:<VWIDE>
4801 (vec_select:<VHALF>
4802 (match_operand:VQW 2 "register_operand")
4803 (match_dup 3)))
4804 (match_operand:<VWIDE> 1 "register_operand")))]
4805 "TARGET_SIMD"
4806 {
4807 /* We still do an emit_insn rather than relying on the pattern above
4808 because for the MINUS case the operands would need to be swapped
4809 around. */
4810 operands[3]
4811 = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4812 emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4813 operands[0],
4814 operands[1],
4815 operands[2],
4816 operands[3]));
4817 DONE;
4818 })
4819
4820 ;; <su><r>h<addsub>.
4821
4822 (define_expand "<su_optab>avg<mode>3_floor"
4823 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4824 (truncate:VDQ_BHSI
4825 (ashiftrt:<V2XWIDE>
4826 (plus:<V2XWIDE>
4827 (ANY_EXTEND:<V2XWIDE>
4828 (match_operand:VDQ_BHSI 1 "register_operand"))
4829 (ANY_EXTEND:<V2XWIDE>
4830 (match_operand:VDQ_BHSI 2 "register_operand")))
4831 (match_dup 3))))]
4832 "TARGET_SIMD"
4833 {
4834 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4835 }
4836 )
4837
4838 (define_expand "<su_optab>avg<mode>3_ceil"
4839 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4840 (truncate:VDQ_BHSI
4841 (ashiftrt:<V2XWIDE>
4842 (plus:<V2XWIDE>
4843 (plus:<V2XWIDE>
4844 (ANY_EXTEND:<V2XWIDE>
4845 (match_operand:VDQ_BHSI 1 "register_operand"))
4846 (ANY_EXTEND:<V2XWIDE>
4847 (match_operand:VDQ_BHSI 2 "register_operand")))
4848 (match_dup 3))
4849 (match_dup 3))))]
4850 "TARGET_SIMD"
4851 {
4852 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4853 }
4854 )
4855
4856 (define_expand "aarch64_<su>hsub<mode>"
4857 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4858 (truncate:VDQ_BHSI
4859 (ashiftrt:<V2XWIDE>
4860 (minus:<V2XWIDE>
4861 (ANY_EXTEND:<V2XWIDE>
4862 (match_operand:VDQ_BHSI 1 "register_operand"))
4863 (ANY_EXTEND:<V2XWIDE>
4864 (match_operand:VDQ_BHSI 2 "register_operand")))
4865 (match_dup 3))))]
4866 "TARGET_SIMD"
4867 {
4868 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4869 }
4870 )
4871
4872 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4873 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4874 (truncate:VDQ_BHSI
4875 (ashiftrt:<V2XWIDE>
4876 (ADDSUB:<V2XWIDE>
4877 (ANY_EXTEND:<V2XWIDE>
4878 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4879 (ANY_EXTEND:<V2XWIDE>
4880 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4881 (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))))]
4882 "TARGET_SIMD"
4883 "<su>h<ADDSUB:optab>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4884 [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4885 )
4886
4887 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4888 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4889 (truncate:VDQ_BHSI
4890 (ashiftrt:<V2XWIDE>
4891 (plus:<V2XWIDE>
4892 (plus:<V2XWIDE>
4893 (ANY_EXTEND:<V2XWIDE>
4894 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4895 (ANY_EXTEND:<V2XWIDE>
4896 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4897 (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))
4898 (match_dup 3))))]
4899 "TARGET_SIMD"
4900 "<su>rhadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4901 [(set_attr "type" "neon_add_halve<q>")]
4902 )
4903
4904 ;; <r><addsub>hn<q>.
4905
4906 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4907 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4908 (truncate:<VNARROWQ>
4909 (ashiftrt:VQN
4910 (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4911 (match_operand:VQN 2 "register_operand" "w"))
4912 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4913 "TARGET_SIMD"
4914 "<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4915 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4916 )
4917
4918 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4919 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4920 (truncate:<VNARROWQ>
4921 (ashiftrt:VQN
4922 (plus:VQN
4923 (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4924 (match_operand:VQN 2 "register_operand" "w"))
4925 (match_operand:VQN 3 "aarch64_simd_raddsubhn_imm_vec"))
4926 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4927 "TARGET_SIMD"
4928 "r<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4929 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4930 )
4931
4932 (define_expand "aarch64_<optab>hn<mode>"
4933 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4934 (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4935 (match_operand:VQN 2 "register_operand")))]
4936 "TARGET_SIMD"
4937 {
4938 rtx shft
4939 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4940 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4941 emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[0], operands[1],
4942 operands[2], shft));
4943 DONE;
4944 }
4945 )
4946
4947 (define_expand "aarch64_r<optab>hn<mode>"
4948 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4949 (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4950 (match_operand:VQN 2 "register_operand")))]
4951 "TARGET_SIMD"
4952 {
4953 rtx shft
4954 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4955 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4956 rtx rnd
4957 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4958 HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4959 emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[0], operands[1],
4960 operands[2], rnd, shft));
4961 DONE;
4962 }
4963 )
4964
4965 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4966 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4967 (vec_concat:<VNARROWQ2>
4968 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4969 (truncate:<VNARROWQ>
4970 (ashiftrt:VQN
4971 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4972 (match_operand:VQN 3 "register_operand" "w"))
4973 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4974 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4975 "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4976 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4977 )
4978
4979 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4980 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4981 (vec_concat:<VNARROWQ2>
4982 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4983 (truncate:<VNARROWQ>
4984 (ashiftrt:VQN
4985 (plus:VQN
4986 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4987 (match_operand:VQN 3 "register_operand" "w"))
4988 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4989 (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4990 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4991 "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4992 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4993 )
4994
4995 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4996 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4997 (vec_concat:<VNARROWQ2>
4998 (truncate:<VNARROWQ>
4999 (ashiftrt:VQN
5000 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
5001 (match_operand:VQN 3 "register_operand" "w"))
5002 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))
5003 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5004 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5005 "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
5006 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
5007 )
5008
5009 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
5010 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5011 (vec_concat:<VNARROWQ2>
5012 (truncate:<VNARROWQ>
5013 (ashiftrt:VQN
5014 (plus:VQN
5015 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
5016 (match_operand:VQN 3 "register_operand" "w"))
5017 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
5018 (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))
5019 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5020 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5021 "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
5022 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
5023 )
5024
5025 (define_expand "aarch64_<optab>hn2<mode>"
5026 [(match_operand:<VNARROWQ2> 0 "register_operand")
5027 (match_operand:<VNARROWQ> 1 "register_operand")
5028 (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5029 (match_operand:VQN 3 "register_operand"))]
5030 "TARGET_SIMD"
5031 {
5032 rtx shft
5033 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5034 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5035 if (BYTES_BIG_ENDIAN)
5036 emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[0],
5037 operands[1], operands[2], operands[3], shft));
5038 else
5039 emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[0],
5040 operands[1], operands[2], operands[3], shft));
5041 DONE;
5042 }
5043 )
5044
5045 (define_expand "aarch64_r<optab>hn2<mode>"
5046 [(match_operand:<VNARROWQ2> 0 "register_operand")
5047 (match_operand:<VNARROWQ> 1 "register_operand")
5048 (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5049 (match_operand:VQN 3 "register_operand"))]
5050 "TARGET_SIMD"
5051 {
5052 rtx shft
5053 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5054 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5055 rtx rnd
5056 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5057 HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
5058 if (BYTES_BIG_ENDIAN)
5059 emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[0],
5060 operands[1], operands[2], operands[3], rnd, shft));
5061 else
5062 emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[0],
5063 operands[1], operands[2], operands[3], rnd, shft));
5064 DONE;
5065 }
5066 )
5067
5068 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5069 (define_insn_and_split "*bitmask_shift_plus<mode>"
5070 [(set (match_operand:VQN 0 "register_operand" "=&w")
5071 (plus:VQN
5072 (lshiftrt:VQN
5073 (plus:VQN (match_operand:VQN 1 "register_operand" "w")
5074 (match_operand:VQN 2 "register_operand" "w"))
5075 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5076 (match_operand:VQN 4 "register_operand" "w")))]
5077 "TARGET_SIMD"
5078 "#"
5079 "&& true"
5080 [(const_int 0)]
5081 {
5082 rtx tmp;
5083 if (can_create_pseudo_p ())
5084 tmp = gen_reg_rtx (<VNARROWQ>mode);
5085 else
5086 tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[0]));
5087 emit_insn (gen_aarch64_addhn<mode> (tmp, operands[1], operands[2]));
5088 emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[0], operands[4], tmp));
5089 DONE;
5090 })
5091
5092 ;; pmul.
5093
5094 (define_insn "aarch64_pmul<mode>"
5095 [(set (match_operand:VB 0 "register_operand" "=w")
5096 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
5097 (match_operand:VB 2 "register_operand" "w")]
5098 UNSPEC_PMUL))]
5099 "TARGET_SIMD"
5100 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5101 [(set_attr "type" "neon_mul_<Vetype><q>")]
5102 )
5103
5104 (define_insn "aarch64_pmullv8qi"
5105 [(set (match_operand:V8HI 0 "register_operand" "=w")
5106 (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
5107 (match_operand:V8QI 2 "register_operand" "w")]
5108 UNSPEC_PMULL))]
5109 "TARGET_SIMD"
5110 "pmull\\t%0.8h, %1.8b, %2.8b"
5111 [(set_attr "type" "neon_mul_b_long")]
5112 )
5113
5114 (define_insn "aarch64_pmull_hiv16qi_insn"
5115 [(set (match_operand:V8HI 0 "register_operand" "=w")
5116 (unspec:V8HI
5117 [(vec_select:V8QI
5118 (match_operand:V16QI 1 "register_operand" "w")
5119 (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
5120 (vec_select:V8QI
5121 (match_operand:V16QI 2 "register_operand" "w")
5122 (match_dup 3))]
5123 UNSPEC_PMULL))]
5124 "TARGET_SIMD"
5125 "pmull2\\t%0.8h, %1.16b, %2.16b"
5126 [(set_attr "type" "neon_mul_b_long")]
5127 )
5128
5129 (define_expand "aarch64_pmull_hiv16qi"
5130 [(match_operand:V8HI 0 "register_operand")
5131 (match_operand:V16QI 1 "register_operand")
5132 (match_operand:V16QI 2 "register_operand")]
5133 "TARGET_SIMD"
5134 {
5135 rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
5136 emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
5137 operands[2], p));
5138 DONE;
5139 }
5140 )
5141
5142 ;; fmulx.
5143
5144 (define_insn "aarch64_fmulx<mode>"
5145 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5146 (unspec:VHSDF_HSDF
5147 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5148 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5149 UNSPEC_FMULX))]
5150 "TARGET_SIMD"
5151 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5152 [(set_attr "type" "neon_fp_mul_<stype>")]
5153 )
5154
5155 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5156
5157 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5158 [(set (match_operand:VDQSF 0 "register_operand" "=w")
5159 (unspec:VDQSF
5160 [(match_operand:VDQSF 1 "register_operand" "w")
5161 (vec_duplicate:VDQSF
5162 (vec_select:<VEL>
5163 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
5164 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5165 UNSPEC_FMULX))]
5166 "TARGET_SIMD"
5167 {
5168 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
5169 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5170 }
5171 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5172 )
5173
5174 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5175
5176 (define_insn "*aarch64_mulx_elt<mode>"
5177 [(set (match_operand:VDQF 0 "register_operand" "=w")
5178 (unspec:VDQF
5179 [(match_operand:VDQF 1 "register_operand" "w")
5180 (vec_duplicate:VDQF
5181 (vec_select:<VEL>
5182 (match_operand:VDQF 2 "register_operand" "w")
5183 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5184 UNSPEC_FMULX))]
5185 "TARGET_SIMD"
5186 {
5187 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5188 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5189 }
5190 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5191 )
5192
5193 ;; vmulxq_lane
5194
5195 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5196 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5197 (unspec:VHSDF
5198 [(match_operand:VHSDF 1 "register_operand" "w")
5199 (vec_duplicate:VHSDF
5200 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5201 UNSPEC_FMULX))]
5202 "TARGET_SIMD"
5203 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
5204 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5205 )
5206
5207 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5208 ;; vmulxd_lane_f64 == vmulx_lane_f64
5209 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5210
5211 (define_insn "*aarch64_vgetfmulx<mode>"
5212 [(set (match_operand:<VEL> 0 "register_operand" "=w")
5213 (unspec:<VEL>
5214 [(match_operand:<VEL> 1 "register_operand" "w")
5215 (vec_select:<VEL>
5216 (match_operand:VDQF 2 "register_operand" "w")
5217 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5218 UNSPEC_FMULX))]
5219 "TARGET_SIMD"
5220 {
5221 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5222 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5223 }
5224 [(set_attr "type" "fmul<Vetype>")]
5225 )
5226 ;; <su>q<addsub>
5227
5228 (define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
5229 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5230 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
5231 (match_operand:VSDQ_I 2 "register_operand" "w")))]
5232 "TARGET_SIMD"
5233 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5234 [(set_attr "type" "neon_q<addsub><q>")]
5235 )
5236
5237 ;; suqadd and usqadd
5238
5239 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5240 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5241 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5242 (match_operand:VSDQ_I 2 "register_operand" "w")]
5243 USSUQADD))]
5244 "TARGET_SIMD"
5245 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5246 [(set_attr "type" "neon_qadd<q>")]
5247 )
5248
5249 ;; sqmovn and uqmovn
5250
5251 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5252 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5253 (SAT_TRUNC:<VNARROWQ>
5254 (match_operand:SD_HSDI 1 "register_operand" "w")))]
5255 "TARGET_SIMD"
5256 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5257 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5258 )
5259
5260 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5261 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5262 (SAT_TRUNC:<VNARROWQ>
5263 (match_operand:VQN 1 "register_operand" "w")))]
5264 "TARGET_SIMD"
5265 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5266 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5267 )
5268
5269 (define_insn "aarch64_<su>qxtn2<mode>_le"
5270 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5271 (vec_concat:<VNARROWQ2>
5272 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5273 (SAT_TRUNC:<VNARROWQ>
5274 (match_operand:VQN 2 "register_operand" "w"))))]
5275 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5276 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5277 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5278 )
5279
5280 (define_insn "aarch64_<su>qxtn2<mode>_be"
5281 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5282 (vec_concat:<VNARROWQ2>
5283 (SAT_TRUNC:<VNARROWQ>
5284 (match_operand:VQN 2 "register_operand" "w"))
5285 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5286 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5287 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5288 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5289 )
5290
5291 (define_expand "aarch64_<su>qxtn2<mode>"
5292 [(match_operand:<VNARROWQ2> 0 "register_operand")
5293 (match_operand:<VNARROWQ> 1 "register_operand")
5294 (SAT_TRUNC:<VNARROWQ>
5295 (match_operand:VQN 2 "register_operand"))]
5296 "TARGET_SIMD"
5297 {
5298 if (BYTES_BIG_ENDIAN)
5299 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5300 operands[2]));
5301 else
5302 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5303 operands[2]));
5304 DONE;
5305 }
5306 )
5307
5308 ;; sqmovun
5309
5310 (define_insn "aarch64_sqmovun<mode>"
5311 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5312 (truncate:<VNARROWQ>
5313 (smin:SD_HSDI
5314 (smax:SD_HSDI
5315 (match_operand:SD_HSDI 1 "register_operand" "w")
5316 (const_int 0))
5317 (const_int <half_mask>))))]
5318 "TARGET_SIMD"
5319 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5320 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5321 )
5322
5323 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5324 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5325 (truncate:<VNARROWQ>
5326 (smin:VQN
5327 (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5328 (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
5329 (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
5330 "TARGET_SIMD"
5331 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5332 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5333 )
5334
5335 (define_expand "aarch64_sqmovun<mode>"
5336 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5337 (truncate:<VNARROWQ>
5338 (smin:VQN
5339 (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5340 (match_dup 2))
5341 (match_dup 3))))]
5342 "TARGET_SIMD"
5343 {
5344 operands[2] = CONST0_RTX (<MODE>mode);
5345 operands[3]
5346 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5347 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5348 }
5349 )
5350
5351 (define_insn "aarch64_sqxtun2<mode>_le"
5352 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5353 (vec_concat:<VNARROWQ2>
5354 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5355 (truncate:<VNARROWQ>
5356 (smin:VQN
5357 (smax:VQN
5358 (match_operand:VQN 2 "register_operand" "w")
5359 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5360 (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
5361 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5362 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5363 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5364 )
5365
5366 (define_insn "aarch64_sqxtun2<mode>_be"
5367 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5368 (vec_concat:<VNARROWQ2>
5369 (truncate:<VNARROWQ>
5370 (smin:VQN
5371 (smax:VQN
5372 (match_operand:VQN 2 "register_operand" "w")
5373 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5374 (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
5375 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5376 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5377 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5378 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5379 )
5380
5381 (define_expand "aarch64_sqxtun2<mode>"
5382 [(match_operand:<VNARROWQ2> 0 "register_operand")
5383 (match_operand:<VNARROWQ> 1 "register_operand")
5384 (match_operand:VQN 2 "register_operand")]
5385 "TARGET_SIMD"
5386 {
5387 rtx zeros = CONST0_RTX (<MODE>mode);
5388 rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5389 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5390 if (BYTES_BIG_ENDIAN)
5391 emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5392 operands[2], zeros, half_umax));
5393 else
5394 emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5395 operands[2], zeros, half_umax));
5396 DONE;
5397 }
5398 )
5399
5400 ;; <su>q<absneg>
5401
5402 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5403 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5404 (UNQOPS:VSDQ_I
5405 (match_operand:VSDQ_I 1 "register_operand" "w")))]
5406 "TARGET_SIMD"
5407 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5408 [(set_attr "type" "neon_<optab><q>")]
5409 )
5410
5411 ;; sq<r>dmulh.
5412
5413 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5414 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5415 (unspec:VSDQ_HSI
5416 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5417 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5418 VQDMULH))]
5419 "TARGET_SIMD"
5420 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5421 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5422 )
5423
5424 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5425 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5426 (unspec:VDQHS
5427 [(match_operand:VDQHS 1 "register_operand" "w")
5428 (vec_duplicate:VDQHS
5429 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5430 VQDMULH))]
5431 "TARGET_SIMD"
5432 "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5433 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5434 )
5435
5436 ;; sq<r>dmulh_lane
5437
5438 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5439 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5440 (unspec:VDQHS
5441 [(match_operand:VDQHS 1 "register_operand" "w")
5442 (vec_select:<VEL>
5443 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5444 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5445 VQDMULH))]
5446 "TARGET_SIMD"
5447 "*
5448 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5449 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5450 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5451 )
5452
5453 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5454 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5455 (unspec:VDQHS
5456 [(match_operand:VDQHS 1 "register_operand" "w")
5457 (vec_select:<VEL>
5458 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5459 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5460 VQDMULH))]
5461 "TARGET_SIMD"
5462 "*
5463 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5464 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5465 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5466 )
5467
5468 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5469 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5470 (unspec:SD_HSI
5471 [(match_operand:SD_HSI 1 "register_operand" "w")
5472 (vec_select:<VEL>
5473 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5474 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5475 VQDMULH))]
5476 "TARGET_SIMD"
5477 "*
5478 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5479 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5480 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5481 )
5482
5483 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5484 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5485 (unspec:SD_HSI
5486 [(match_operand:SD_HSI 1 "register_operand" "w")
5487 (vec_select:<VEL>
5488 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5489 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5490 VQDMULH))]
5491 "TARGET_SIMD"
5492 "*
5493 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5494 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5495 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5496 )
5497
5498 ;; sqrdml[as]h.
5499
5500 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5501 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5502 (unspec:VSDQ_HSI
5503 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5504 (match_operand:VSDQ_HSI 2 "register_operand" "w")
5505 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5506 SQRDMLH_AS))]
5507 "TARGET_SIMD_RDMA"
5508 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5509 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5510 )
5511
5512 ;; sqrdml[as]h_lane.
5513
5514 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5515 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5516 (unspec:VDQHS
5517 [(match_operand:VDQHS 1 "register_operand" "0")
5518 (match_operand:VDQHS 2 "register_operand" "w")
5519 (vec_select:<VEL>
5520 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5521 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5522 SQRDMLH_AS))]
5523 "TARGET_SIMD_RDMA"
5524 {
5525 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5526 return
5527 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5528 }
5529 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5530 )
5531
5532 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5533 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5534 (unspec:SD_HSI
5535 [(match_operand:SD_HSI 1 "register_operand" "0")
5536 (match_operand:SD_HSI 2 "register_operand" "w")
5537 (vec_select:<VEL>
5538 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5539 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5540 SQRDMLH_AS))]
5541 "TARGET_SIMD_RDMA"
5542 {
5543 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5544 return
5545 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5546 }
5547 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5548 )
5549
5550 ;; sqrdml[as]h_laneq.
5551
5552 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5553 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5554 (unspec:VDQHS
5555 [(match_operand:VDQHS 1 "register_operand" "0")
5556 (match_operand:VDQHS 2 "register_operand" "w")
5557 (vec_select:<VEL>
5558 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5559 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5560 SQRDMLH_AS))]
5561 "TARGET_SIMD_RDMA"
5562 {
5563 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5564 return
5565 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5566 }
5567 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5568 )
5569
5570 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5571 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5572 (unspec:SD_HSI
5573 [(match_operand:SD_HSI 1 "register_operand" "0")
5574 (match_operand:SD_HSI 2 "register_operand" "w")
5575 (vec_select:<VEL>
5576 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5577 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5578 SQRDMLH_AS))]
5579 "TARGET_SIMD_RDMA"
5580 {
5581 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5582 return
5583 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5584 }
5585 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5586 )
5587
5588 ;; vqdml[sa]l
5589
5590 (define_insn "aarch64_sqdmlal<mode>"
5591 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5592 (ss_plus:<VWIDE>
5593 (ss_ashift:<VWIDE>
5594 (mult:<VWIDE>
5595 (sign_extend:<VWIDE>
5596 (match_operand:VSD_HSI 2 "register_operand" "w"))
5597 (sign_extend:<VWIDE>
5598 (match_operand:VSD_HSI 3 "register_operand" "w")))
5599 (const_int 1))
5600 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5601 "TARGET_SIMD"
5602 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5603 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5604 )
5605
5606 (define_insn "aarch64_sqdmlsl<mode>"
5607 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5608 (ss_minus:<VWIDE>
5609 (match_operand:<VWIDE> 1 "register_operand" "0")
5610 (ss_ashift:<VWIDE>
5611 (mult:<VWIDE>
5612 (sign_extend:<VWIDE>
5613 (match_operand:VSD_HSI 2 "register_operand" "w"))
5614 (sign_extend:<VWIDE>
5615 (match_operand:VSD_HSI 3 "register_operand" "w")))
5616 (const_int 1))))]
5617 "TARGET_SIMD"
5618 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5619 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5620 )
5621
5622 ;; vqdml[sa]l_lane
5623
5624 (define_insn "aarch64_sqdmlal_lane<mode>"
5625 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5626 (ss_plus:<VWIDE>
5627 (ss_ashift:<VWIDE>
5628 (mult:<VWIDE>
5629 (sign_extend:<VWIDE>
5630 (match_operand:VD_HSI 2 "register_operand" "w"))
5631 (vec_duplicate:<VWIDE>
5632 (sign_extend:<VWIDE_S>
5633 (vec_select:<VEL>
5634 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5635 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5636 ))
5637 (const_int 1))
5638 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5639 "TARGET_SIMD"
5640 {
5641 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5642 return
5643 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5644 }
5645 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5646 )
5647
5648 (define_insn "aarch64_sqdmlsl_lane<mode>"
5649 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5650 (ss_minus:<VWIDE>
5651 (match_operand:<VWIDE> 1 "register_operand" "0")
5652 (ss_ashift:<VWIDE>
5653 (mult:<VWIDE>
5654 (sign_extend:<VWIDE>
5655 (match_operand:VD_HSI 2 "register_operand" "w"))
5656 (vec_duplicate:<VWIDE>
5657 (sign_extend:<VWIDE_S>
5658 (vec_select:<VEL>
5659 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5660 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5661 ))
5662 (const_int 1))))]
5663 "TARGET_SIMD"
5664 {
5665 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5666 return
5667 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5668 }
5669 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5670 )
5671
5672
5673 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5674 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5675 (ss_minus:<VWIDE>
5676 (match_operand:<VWIDE> 1 "register_operand" "0")
5677 (ss_ashift:<VWIDE>
5678 (mult:<VWIDE>
5679 (sign_extend:<VWIDE>
5680 (match_operand:VD_HSI 2 "register_operand" "w"))
5681 (vec_duplicate:<VWIDE>
5682 (sign_extend:<VWIDE_S>
5683 (vec_select:<VEL>
5684 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5685 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5686 ))
5687 (const_int 1))))]
5688 "TARGET_SIMD"
5689 {
5690 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5691 return
5692 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5693 }
5694 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5695 )
5696
5697 (define_insn "aarch64_sqdmlal_laneq<mode>"
5698 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5699 (ss_plus:<VWIDE>
5700 (ss_ashift:<VWIDE>
5701 (mult:<VWIDE>
5702 (sign_extend:<VWIDE>
5703 (match_operand:VD_HSI 2 "register_operand" "w"))
5704 (vec_duplicate:<VWIDE>
5705 (sign_extend:<VWIDE_S>
5706 (vec_select:<VEL>
5707 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5708 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5709 ))
5710 (const_int 1))
5711 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5712 "TARGET_SIMD"
5713 {
5714 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5715 return
5716 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5717 }
5718 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5719 )
5720
5721
5722 (define_insn "aarch64_sqdmlal_lane<mode>"
5723 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5724 (ss_plus:<VWIDE>
5725 (ss_ashift:<VWIDE>
5726 (mult:<VWIDE>
5727 (sign_extend:<VWIDE>
5728 (match_operand:SD_HSI 2 "register_operand" "w"))
5729 (sign_extend:<VWIDE>
5730 (vec_select:<VEL>
5731 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5732 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5733 )
5734 (const_int 1))
5735 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5736 "TARGET_SIMD"
5737 {
5738 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5739 return
5740 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5741 }
5742 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5743 )
5744
5745 (define_insn "aarch64_sqdmlsl_lane<mode>"
5746 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5747 (ss_minus:<VWIDE>
5748 (match_operand:<VWIDE> 1 "register_operand" "0")
5749 (ss_ashift:<VWIDE>
5750 (mult:<VWIDE>
5751 (sign_extend:<VWIDE>
5752 (match_operand:SD_HSI 2 "register_operand" "w"))
5753 (sign_extend:<VWIDE>
5754 (vec_select:<VEL>
5755 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5756 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5757 )
5758 (const_int 1))))]
5759 "TARGET_SIMD"
5760 {
5761 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5762 return
5763 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5764 }
5765 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5766 )
5767
5768
5769 (define_insn "aarch64_sqdmlal_laneq<mode>"
5770 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5771 (ss_plus:<VWIDE>
5772 (ss_ashift:<VWIDE>
5773 (mult:<VWIDE>
5774 (sign_extend:<VWIDE>
5775 (match_operand:SD_HSI 2 "register_operand" "w"))
5776 (sign_extend:<VWIDE>
5777 (vec_select:<VEL>
5778 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5779 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5780 )
5781 (const_int 1))
5782 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5783 "TARGET_SIMD"
5784 {
5785 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5786 return
5787 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5788 }
5789 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5790 )
5791
5792 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5793 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5794 (ss_minus:<VWIDE>
5795 (match_operand:<VWIDE> 1 "register_operand" "0")
5796 (ss_ashift:<VWIDE>
5797 (mult:<VWIDE>
5798 (sign_extend:<VWIDE>
5799 (match_operand:SD_HSI 2 "register_operand" "w"))
5800 (sign_extend:<VWIDE>
5801 (vec_select:<VEL>
5802 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5803 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5804 )
5805 (const_int 1))))]
5806 "TARGET_SIMD"
5807 {
5808 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5809 return
5810 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5811 }
5812 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5813 )
5814
5815 ;; vqdml[sa]l_n
5816
5817 (define_insn "aarch64_sqdmlsl_n<mode>"
5818 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5819 (ss_minus:<VWIDE>
5820 (match_operand:<VWIDE> 1 "register_operand" "0")
5821 (ss_ashift:<VWIDE>
5822 (mult:<VWIDE>
5823 (sign_extend:<VWIDE>
5824 (match_operand:VD_HSI 2 "register_operand" "w"))
5825 (vec_duplicate:<VWIDE>
5826 (sign_extend:<VWIDE_S>
5827 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5828 (const_int 1))))]
5829 "TARGET_SIMD"
5830 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5831 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5832 )
5833
5834 (define_insn "aarch64_sqdmlal_n<mode>"
5835 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5836 (ss_plus:<VWIDE>
5837 (ss_ashift:<VWIDE>
5838 (mult:<VWIDE>
5839 (sign_extend:<VWIDE>
5840 (match_operand:VD_HSI 2 "register_operand" "w"))
5841 (vec_duplicate:<VWIDE>
5842 (sign_extend:<VWIDE_S>
5843 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5844 (const_int 1))
5845 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5846 "TARGET_SIMD"
5847 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5848 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5849 )
5850
5851
5852 ;; sqdml[as]l2
5853
5854 (define_insn "aarch64_sqdmlal2<mode>_internal"
5855 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5856 (ss_plus:<VWIDE>
5857 (ss_ashift:<VWIDE>
5858 (mult:<VWIDE>
5859 (sign_extend:<VWIDE>
5860 (vec_select:<VHALF>
5861 (match_operand:VQ_HSI 2 "register_operand" "w")
5862 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5863 (sign_extend:<VWIDE>
5864 (vec_select:<VHALF>
5865 (match_operand:VQ_HSI 3 "register_operand" "w")
5866 (match_dup 4))))
5867 (const_int 1))
5868 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5869 "TARGET_SIMD"
5870 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5871 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5872 )
5873
5874 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5876 (ss_minus:<VWIDE>
5877 (match_operand:<VWIDE> 1 "register_operand" "0")
5878 (ss_ashift:<VWIDE>
5879 (mult:<VWIDE>
5880 (sign_extend:<VWIDE>
5881 (vec_select:<VHALF>
5882 (match_operand:VQ_HSI 2 "register_operand" "w")
5883 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5884 (sign_extend:<VWIDE>
5885 (vec_select:<VHALF>
5886 (match_operand:VQ_HSI 3 "register_operand" "w")
5887 (match_dup 4))))
5888 (const_int 1))))]
5889 "TARGET_SIMD"
5890 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5891 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5892 )
5893
5894 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5895 [(match_operand:<VWIDE> 0 "register_operand")
5896 (SBINQOPS:<VWIDE>
5897 (match_operand:<VWIDE> 1 "register_operand")
5898 (match_dup 1))
5899 (match_operand:VQ_HSI 2 "register_operand")
5900 (match_operand:VQ_HSI 3 "register_operand")]
5901 "TARGET_SIMD"
5902 {
5903 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5904 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5905 operands[1], operands[2],
5906 operands[3], p));
5907 DONE;
5908 })
5909
5910 ;; vqdml[sa]l2_lane
5911
5912 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5913 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5914 (ss_minus:<VWIDE>
5915 (match_operand:<VWIDE> 1 "register_operand" "0")
5916 (ss_ashift:<VWIDE>
5917 (mult:<VWIDE>
5918 (sign_extend:<VWIDE>
5919 (vec_select:<VHALF>
5920 (match_operand:VQ_HSI 2 "register_operand" "w")
5921 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5922 (vec_duplicate:<VWIDE>
5923 (sign_extend:<VWIDE_S>
5924 (vec_select:<VEL>
5925 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5926 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5927 ))))
5928 (const_int 1))))]
5929 "TARGET_SIMD"
5930 {
5931 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5932 return
5933 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5934 }
5935 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5936 )
5937
5938 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5939 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5940 (ss_plus:<VWIDE>
5941 (ss_ashift:<VWIDE>
5942 (mult:<VWIDE>
5943 (sign_extend:<VWIDE>
5944 (vec_select:<VHALF>
5945 (match_operand:VQ_HSI 2 "register_operand" "w")
5946 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5947 (vec_duplicate:<VWIDE>
5948 (sign_extend:<VWIDE_S>
5949 (vec_select:<VEL>
5950 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5951 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5952 ))))
5953 (const_int 1))
5954 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5955 "TARGET_SIMD"
5956 {
5957 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5958 return
5959 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5960 }
5961 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5962 )
5963
5964 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5965 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5966 (ss_minus:<VWIDE>
5967 (match_operand:<VWIDE> 1 "register_operand" "0")
5968 (ss_ashift:<VWIDE>
5969 (mult:<VWIDE>
5970 (sign_extend:<VWIDE>
5971 (vec_select:<VHALF>
5972 (match_operand:VQ_HSI 2 "register_operand" "w")
5973 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5974 (vec_duplicate:<VWIDE>
5975 (sign_extend:<VWIDE_S>
5976 (vec_select:<VEL>
5977 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5978 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5979 ))))
5980 (const_int 1))))]
5981 "TARGET_SIMD"
5982 {
5983 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5984 return
5985 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5986 }
5987 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5988 )
5989
5990 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5991 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5992 (ss_plus:<VWIDE>
5993 (ss_ashift:<VWIDE>
5994 (mult:<VWIDE>
5995 (sign_extend:<VWIDE>
5996 (vec_select:<VHALF>
5997 (match_operand:VQ_HSI 2 "register_operand" "w")
5998 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5999 (vec_duplicate:<VWIDE>
6000 (sign_extend:<VWIDE_S>
6001 (vec_select:<VEL>
6002 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
6003 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6004 ))))
6005 (const_int 1))
6006 (match_operand:<VWIDE> 1 "register_operand" "0")))]
6007 "TARGET_SIMD"
6008 {
6009 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
6010 return
6011 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6012 }
6013 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6014 )
6015
6016 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
6017 [(match_operand:<VWIDE> 0 "register_operand")
6018 (SBINQOPS:<VWIDE>
6019 (match_operand:<VWIDE> 1 "register_operand")
6020 (match_dup 1))
6021 (match_operand:VQ_HSI 2 "register_operand")
6022 (match_operand:<VCOND> 3 "register_operand")
6023 (match_operand:SI 4 "immediate_operand")]
6024 "TARGET_SIMD"
6025 {
6026 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6027 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
6028 operands[1], operands[2],
6029 operands[3], operands[4], p));
6030 DONE;
6031 })
6032
6033 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6034 [(match_operand:<VWIDE> 0 "register_operand")
6035 (SBINQOPS:<VWIDE>
6036 (match_operand:<VWIDE> 1 "register_operand")
6037 (match_dup 1))
6038 (match_operand:VQ_HSI 2 "register_operand")
6039 (match_operand:<VCONQ> 3 "register_operand")
6040 (match_operand:SI 4 "immediate_operand")]
6041 "TARGET_SIMD"
6042 {
6043 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6044 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
6045 operands[1], operands[2],
6046 operands[3], operands[4], p));
6047 DONE;
6048 })
6049
6050 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6051 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6052 (ss_minus:<VWIDE>
6053 (match_operand:<VWIDE> 1 "register_operand" "0")
6054 (ss_ashift:<VWIDE>
6055 (mult:<VWIDE>
6056 (sign_extend:<VWIDE>
6057 (vec_select:<VHALF>
6058 (match_operand:VQ_HSI 2 "register_operand" "w")
6059 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6060 (vec_duplicate:<VWIDE>
6061 (sign_extend:<VWIDE_S>
6062 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6063 (const_int 1))))]
6064 "TARGET_SIMD"
6065 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6066 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6067 )
6068
6069 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6070 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6071 (ss_plus:<VWIDE>
6072 (ss_ashift:<VWIDE>
6073 (mult:<VWIDE>
6074 (sign_extend:<VWIDE>
6075 (vec_select:<VHALF>
6076 (match_operand:VQ_HSI 2 "register_operand" "w")
6077 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6078 (vec_duplicate:<VWIDE>
6079 (sign_extend:<VWIDE_S>
6080 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6081 (const_int 1))
6082 (match_operand:<VWIDE> 1 "register_operand" "0")))]
6083 "TARGET_SIMD"
6084 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6085 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6086 )
6087
6088 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6089 [(match_operand:<VWIDE> 0 "register_operand")
6090 (SBINQOPS:<VWIDE>
6091 (match_operand:<VWIDE> 1 "register_operand")
6092 (match_dup 1))
6093 (match_operand:VQ_HSI 2 "register_operand")
6094 (match_operand:<VEL> 3 "register_operand")]
6095 "TARGET_SIMD"
6096 {
6097 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6098 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
6099 operands[1], operands[2],
6100 operands[3], p));
6101 DONE;
6102 })
6103
6104 ;; vqdmull
6105
6106 (define_insn "aarch64_sqdmull<mode>"
6107 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6108 (ss_ashift:<VWIDE>
6109 (mult:<VWIDE>
6110 (sign_extend:<VWIDE>
6111 (match_operand:VSD_HSI 1 "register_operand" "w"))
6112 (sign_extend:<VWIDE>
6113 (match_operand:VSD_HSI 2 "register_operand" "w")))
6114 (const_int 1)))]
6115 "TARGET_SIMD"
6116 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6117 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6118 )
6119
6120 ;; vqdmull_lane
6121
6122 (define_insn "aarch64_sqdmull_lane<mode>"
6123 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6124 (ss_ashift:<VWIDE>
6125 (mult:<VWIDE>
6126 (sign_extend:<VWIDE>
6127 (match_operand:VD_HSI 1 "register_operand" "w"))
6128 (vec_duplicate:<VWIDE>
6129 (sign_extend:<VWIDE_S>
6130 (vec_select:<VEL>
6131 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6132 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6133 ))
6134 (const_int 1)))]
6135 "TARGET_SIMD"
6136 {
6137 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6138 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6139 }
6140 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6141 )
6142
6143 (define_insn "aarch64_sqdmull_laneq<mode>"
6144 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6145 (ss_ashift:<VWIDE>
6146 (mult:<VWIDE>
6147 (sign_extend:<VWIDE>
6148 (match_operand:VD_HSI 1 "register_operand" "w"))
6149 (vec_duplicate:<VWIDE>
6150 (sign_extend:<VWIDE_S>
6151 (vec_select:<VEL>
6152 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6153 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6154 ))
6155 (const_int 1)))]
6156 "TARGET_SIMD"
6157 {
6158 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6159 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6160 }
6161 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6162 )
6163
6164 (define_insn "aarch64_sqdmull_lane<mode>"
6165 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6166 (ss_ashift:<VWIDE>
6167 (mult:<VWIDE>
6168 (sign_extend:<VWIDE>
6169 (match_operand:SD_HSI 1 "register_operand" "w"))
6170 (sign_extend:<VWIDE>
6171 (vec_select:<VEL>
6172 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6173 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6174 ))
6175 (const_int 1)))]
6176 "TARGET_SIMD"
6177 {
6178 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6179 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6180 }
6181 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6182 )
6183
6184 (define_insn "aarch64_sqdmull_laneq<mode>"
6185 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6186 (ss_ashift:<VWIDE>
6187 (mult:<VWIDE>
6188 (sign_extend:<VWIDE>
6189 (match_operand:SD_HSI 1 "register_operand" "w"))
6190 (sign_extend:<VWIDE>
6191 (vec_select:<VEL>
6192 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6193 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6194 ))
6195 (const_int 1)))]
6196 "TARGET_SIMD"
6197 {
6198 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6199 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6200 }
6201 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6202 )
6203
6204 ;; vqdmull_n
6205
6206 (define_insn "aarch64_sqdmull_n<mode>"
6207 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6208 (ss_ashift:<VWIDE>
6209 (mult:<VWIDE>
6210 (sign_extend:<VWIDE>
6211 (match_operand:VD_HSI 1 "register_operand" "w"))
6212 (vec_duplicate:<VWIDE>
6213 (sign_extend:<VWIDE_S>
6214 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6215 )
6216 (const_int 1)))]
6217 "TARGET_SIMD"
6218 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6219 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6220 )
6221
6222 ;; vqdmull2
6223
6224 (define_insn "aarch64_sqdmull2<mode>_internal"
6225 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6226 (ss_ashift:<VWIDE>
6227 (mult:<VWIDE>
6228 (sign_extend:<VWIDE>
6229 (vec_select:<VHALF>
6230 (match_operand:VQ_HSI 1 "register_operand" "w")
6231 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6232 (sign_extend:<VWIDE>
6233 (vec_select:<VHALF>
6234 (match_operand:VQ_HSI 2 "register_operand" "w")
6235 (match_dup 3)))
6236 )
6237 (const_int 1)))]
6238 "TARGET_SIMD"
6239 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6240 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6241 )
6242
6243 (define_expand "aarch64_sqdmull2<mode>"
6244 [(match_operand:<VWIDE> 0 "register_operand")
6245 (match_operand:VQ_HSI 1 "register_operand")
6246 (match_operand:VQ_HSI 2 "register_operand")]
6247 "TARGET_SIMD"
6248 {
6249 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6250 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6251 operands[2], p));
6252 DONE;
6253 })
6254
6255 ;; vqdmull2_lane
6256
6257 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6258 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6259 (ss_ashift:<VWIDE>
6260 (mult:<VWIDE>
6261 (sign_extend:<VWIDE>
6262 (vec_select:<VHALF>
6263 (match_operand:VQ_HSI 1 "register_operand" "w")
6264 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6265 (vec_duplicate:<VWIDE>
6266 (sign_extend:<VWIDE_S>
6267 (vec_select:<VEL>
6268 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6269 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6270 ))
6271 (const_int 1)))]
6272 "TARGET_SIMD"
6273 {
6274 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6275 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6276 }
6277 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6278 )
6279
6280 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6281 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6282 (ss_ashift:<VWIDE>
6283 (mult:<VWIDE>
6284 (sign_extend:<VWIDE>
6285 (vec_select:<VHALF>
6286 (match_operand:VQ_HSI 1 "register_operand" "w")
6287 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6288 (vec_duplicate:<VWIDE>
6289 (sign_extend:<VWIDE_S>
6290 (vec_select:<VEL>
6291 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6292 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6293 ))
6294 (const_int 1)))]
6295 "TARGET_SIMD"
6296 {
6297 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6298 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6299 }
6300 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6301 )
6302
6303 (define_expand "aarch64_sqdmull2_lane<mode>"
6304 [(match_operand:<VWIDE> 0 "register_operand")
6305 (match_operand:VQ_HSI 1 "register_operand")
6306 (match_operand:<VCOND> 2 "register_operand")
6307 (match_operand:SI 3 "immediate_operand")]
6308 "TARGET_SIMD"
6309 {
6310 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6311 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6312 operands[2], operands[3],
6313 p));
6314 DONE;
6315 })
6316
6317 (define_expand "aarch64_sqdmull2_laneq<mode>"
6318 [(match_operand:<VWIDE> 0 "register_operand")
6319 (match_operand:VQ_HSI 1 "register_operand")
6320 (match_operand:<VCONQ> 2 "register_operand")
6321 (match_operand:SI 3 "immediate_operand")]
6322 "TARGET_SIMD"
6323 {
6324 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6325 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6326 operands[2], operands[3],
6327 p));
6328 DONE;
6329 })
6330
6331 ;; vqdmull2_n
6332
6333 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6334 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6335 (ss_ashift:<VWIDE>
6336 (mult:<VWIDE>
6337 (sign_extend:<VWIDE>
6338 (vec_select:<VHALF>
6339 (match_operand:VQ_HSI 1 "register_operand" "w")
6340 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6341 (vec_duplicate:<VWIDE>
6342 (sign_extend:<VWIDE_S>
6343 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6344 )
6345 (const_int 1)))]
6346 "TARGET_SIMD"
6347 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6348 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6349 )
6350
6351 (define_expand "aarch64_sqdmull2_n<mode>"
6352 [(match_operand:<VWIDE> 0 "register_operand")
6353 (match_operand:VQ_HSI 1 "register_operand")
6354 (match_operand:<VEL> 2 "register_operand")]
6355 "TARGET_SIMD"
6356 {
6357 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6358 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6359 operands[2], p));
6360 DONE;
6361 })
6362
6363 ;; vshl
6364
6365 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6366 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6367 (unspec:VSDQ_I_DI
6368 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6369 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6370 VSHL))]
6371 "TARGET_SIMD"
6372 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6373 [(set_attr "type" "neon_shift_reg<q>")]
6374 )
6375
6376
6377 ;; vqshl
6378
6379 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6380 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6381 (unspec:VSDQ_I
6382 [(match_operand:VSDQ_I 1 "register_operand" "w")
6383 (match_operand:VSDQ_I 2 "register_operand" "w")]
6384 VQSHL))]
6385 "TARGET_SIMD"
6386 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6387 [(set_attr "type" "neon_sat_shift_reg<q>")]
6388 )
6389
6390 ;; vshll_n
6391
6392 (define_insn "aarch64_<su>shll<mode>"
6393 [(set (match_operand:<VWIDE> 0 "register_operand")
6394 (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6395 (match_operand:VD_BHSI 1 "register_operand"))
6396 (match_operand:<VWIDE> 2
6397 "aarch64_simd_shll_imm_vec")))]
6398 "TARGET_SIMD"
6399 {@ [cons: =0, 1, 2]
6400 [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6401 [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6402 }
6403 [(set_attr "type" "neon_shift_imm_long")]
6404 )
6405
6406 (define_expand "aarch64_<sur>shll_n<mode>"
6407 [(set (match_operand:<VWIDE> 0 "register_operand")
6408 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
6409 (match_operand:SI 2
6410 "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6411 VSHLL))]
6412 "TARGET_SIMD"
6413 {
6414 rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6415 emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
6416 DONE;
6417 }
6418 )
6419
6420 ;; vshll_high_n
6421
6422 (define_insn "aarch64_<su>shll2<mode>"
6423 [(set (match_operand:<VWIDE> 0 "register_operand")
6424 (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6425 (vec_select:<VHALF>
6426 (match_operand:VQW 1 "register_operand")
6427 (match_operand:VQW 2 "vect_par_cnst_hi_half")))
6428 (match_operand:<VWIDE> 3
6429 "aarch64_simd_shll_imm_vec")))]
6430 "TARGET_SIMD"
6431 {@ [cons: =0, 1, 2, 3]
6432 [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6433 [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6434 }
6435 [(set_attr "type" "neon_shift_imm_long")]
6436 )
6437
6438 (define_expand "aarch64_<sur>shll2_n<mode>"
6439 [(set (match_operand:<VWIDE> 0 "register_operand")
6440 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
6441 (match_operand:SI 2
6442 "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6443 VSHLL))]
6444 "TARGET_SIMD"
6445 {
6446 rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6447 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6448 emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
6449 DONE;
6450 }
6451 )
6452
6453 ;; vrshr_n
6454
6455 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6456 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6457 (truncate:VSDQ_I_DI
6458 (SHIFTRT:<V2XWIDE>
6459 (plus:<V2XWIDE>
6460 (<SHIFTEXTEND>:<V2XWIDE>
6461 (match_operand:VSDQ_I_DI 1 "register_operand" "w"))
6462 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6463 (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6464 "TARGET_SIMD
6465 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6466 "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6467 [(set_attr "type" "neon_sat_shift_imm<q>")]
6468 )
6469
6470 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6471 [(match_operand:VSDQ_I_DI 0 "register_operand")
6472 (SHIFTRT:VSDQ_I_DI
6473 (match_operand:VSDQ_I_DI 1 "register_operand")
6474 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6475 "TARGET_SIMD"
6476 {
6477 /* Use this expander to create the rounding constant vector, which is
6478 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6479 RTL is generated when handling the DImode expanders. */
6480 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6481 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6482 rtx shft = gen_int_mode (INTVAL (operands[2]), DImode);
6483 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6484 if (VECTOR_MODE_P (<MODE>mode))
6485 {
6486 shft = gen_const_vec_duplicate (<MODE>mode, shft);
6487 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6488 }
6489
6490 emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1],
6491 shft, rnd));
6492 DONE;
6493 }
6494 )
6495
6496 ;; v(r)sra_n
6497
6498 (define_insn "aarch64_<sur>sra_ndi"
6499 [(set (match_operand:DI 0 "register_operand" "=w")
6500 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6501 (match_operand:DI 2 "register_operand" "w")
6502 (match_operand:SI 3
6503 "aarch64_simd_shift_imm_offset_di" "i")]
6504 VSRA))]
6505 "TARGET_SIMD"
6506 "<sur>sra\\t%d0, %d2, %3"
6507 [(set_attr "type" "neon_shift_acc")]
6508 )
6509
6510 ;; vs<lr>i_n
6511
6512 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6513 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6514 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6515 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6516 (match_operand:SI 3
6517 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6518 VSLRI))]
6519 "TARGET_SIMD"
6520 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6521 [(set_attr "type" "neon_shift_imm<q>")]
6522 )
6523
6524 ;; vqshl(u)
6525
6526 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6527 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6528 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6529 (match_operand:SI 2
6530 "aarch64_simd_shift_imm_<ve_mode>" "i")]
6531 VQSHL_N))]
6532 "TARGET_SIMD"
6533 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6534 [(set_attr "type" "neon_sat_shift_imm<q>")]
6535 )
6536
6537
6538 ;; vq(r)shr(u)n_n
6539
6540 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6541 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6542 (SAT_TRUNC:<VNARROWQ>
6543 (<TRUNC_SHIFT>:SD_HSDI
6544 (match_operand:SD_HSDI 1 "register_operand" "w")
6545 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6546 "TARGET_SIMD"
6547 "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6548 [(set_attr "type" "neon_shift_imm_narrow_q")]
6549 )
6550
6551 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6552 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6553 (ALL_TRUNC:<VNARROWQ>
6554 (SHIFTRT:VQN
6555 (match_operand:VQN 1 "register_operand" "w")
6556 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6557 "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6558 "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6559 [(set_attr "type" "neon_shift_imm_narrow_q")]
6560 )
6561
6562 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6563 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6564 (ALL_TRUNC:<VNARROWQ>
6565 (<TRUNC_SHIFT>:VQN
6566 (match_operand:VQN 1 "register_operand")
6567 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6568 "TARGET_SIMD"
6569 {
6570 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6571 INTVAL (operands[2]));
6572 }
6573 )
6574
6575 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6576 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6577 (ALL_TRUNC:<VNARROWQ>
6578 (<TRUNC_SHIFT>:<V2XWIDE>
6579 (plus:<V2XWIDE>
6580 (<TRUNCEXTEND>:<V2XWIDE>
6581 (match_operand:VQN 1 "register_operand" "w"))
6582 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6583 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6584 "TARGET_SIMD
6585 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6586 "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6587 [(set_attr "type" "neon_shift_imm_narrow_q")]
6588 )
6589
6590 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6591 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6592 (SAT_TRUNC:<VNARROWQ>
6593 (<TRUNC_SHIFT>:<DWI>
6594 (plus:<DWI>
6595 (<TRUNCEXTEND>:<DWI>
6596 (match_operand:SD_HSDI 1 "register_operand" "w"))
6597 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6598 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6599 "TARGET_SIMD
6600 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6601 "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6602 [(set_attr "type" "neon_shift_imm_narrow_q")]
6603 )
6604
6605 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6606 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6607 (SAT_TRUNC:<VNARROWQ>
6608 (<TRUNC_SHIFT>:<V2XWIDE>
6609 (plus:<V2XWIDE>
6610 (<TRUNCEXTEND>:<V2XWIDE>
6611 (match_operand:SD_HSDI 1 "register_operand"))
6612 (match_dup 3))
6613 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6614 "TARGET_SIMD"
6615 {
6616 /* Use this expander to create the rounding constant vector, which is
6617 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6618 RTL is generated when handling the DImode expanders. */
6619 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6620 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6621 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6622 }
6623 )
6624
6625 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6626 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6627 (ALL_TRUNC:<VNARROWQ>
6628 (<TRUNC_SHIFT>:<V2XWIDE>
6629 (plus:<V2XWIDE>
6630 (<TRUNCEXTEND>:<V2XWIDE>
6631 (match_operand:VQN 1 "register_operand"))
6632 (match_dup 3))
6633 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6634 "TARGET_SIMD"
6635 {
6636 if (<CODE> == TRUNCATE
6637 && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6638 {
6639 rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6640 emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
6641 DONE;
6642 }
6643 /* Use this expander to create the rounding constant vector, which is
6644 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6645 RTL is generated when handling the DImode expanders. */
6646 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6647 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6648 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6649 operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6650 operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6651 }
6652 )
6653
6654 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6655 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6656 (truncate:<VNARROWQ>
6657 (smin:VQN
6658 (smax:VQN
6659 (ashiftrt:VQN
6660 (match_operand:VQN 1 "register_operand" "w")
6661 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6662 (match_operand:VQN 3 "aarch64_simd_imm_zero"))
6663 (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
6664 "TARGET_SIMD"
6665 "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6666 [(set_attr "type" "neon_shift_imm_narrow_q")]
6667 )
6668
6669 (define_insn "aarch64_sqshrun_n<mode>_insn"
6670 [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
6671 (smin:SD_HSDI
6672 (smax:SD_HSDI
6673 (ashiftrt:SD_HSDI
6674 (match_operand:SD_HSDI 1 "register_operand" "w")
6675 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6676 (const_int 0))
6677 (const_int <half_mask>)))]
6678 "TARGET_SIMD"
6679 "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6680 [(set_attr "type" "neon_shift_imm_narrow_q")]
6681 )
6682
6683 (define_expand "aarch64_sqshrun_n<mode>"
6684 [(match_operand:<VNARROWQ> 0 "register_operand")
6685 (match_operand:SD_HSDI 1 "register_operand")
6686 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6687 "TARGET_SIMD"
6688 {
6689 rtx dst = gen_reg_rtx (<MODE>mode);
6690 emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
6691 operands[2]));
6692 emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6693 DONE;
6694 }
6695 )
6696
6697 (define_expand "aarch64_sqshrun_n<mode>"
6698 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6699 (truncate:<VNARROWQ>
6700 (smin:VQN
6701 (smax:VQN
6702 (ashiftrt:VQN
6703 (match_operand:VQN 1 "register_operand")
6704 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6705 (match_dup 3))
6706 (match_dup 4))))]
6707 "TARGET_SIMD"
6708 {
6709 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6710 INTVAL (operands[2]));
6711 operands[3] = CONST0_RTX (<MODE>mode);
6712 operands[4]
6713 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6714 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6715 }
6716 )
6717
6718 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6719 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6720 (truncate:<VNARROWQ>
6721 (smin:<V2XWIDE>
6722 (smax:<V2XWIDE>
6723 (ashiftrt:<V2XWIDE>
6724 (plus:<V2XWIDE>
6725 (sign_extend:<V2XWIDE>
6726 (match_operand:VQN 1 "register_operand" "w"))
6727 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6728 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6729 (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
6730 (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
6731 "TARGET_SIMD
6732 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6733 "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6734 [(set_attr "type" "neon_shift_imm_narrow_q")]
6735 )
6736
6737 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6738 [(set (match_operand:<DWI> 0 "register_operand" "=w")
6739 (smin:<DWI>
6740 (smax:<DWI>
6741 (ashiftrt:<DWI>
6742 (plus:<DWI>
6743 (sign_extend:<DWI>
6744 (match_operand:SD_HSDI 1 "register_operand" "w"))
6745 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6746 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6747 (const_int 0))
6748 (const_int <half_mask>)))]
6749 "TARGET_SIMD
6750 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6751 "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6752 [(set_attr "type" "neon_shift_imm_narrow_q")]
6753 )
6754
6755 (define_expand "aarch64_sqrshrun_n<mode>"
6756 [(match_operand:<VNARROWQ> 0 "register_operand")
6757 (match_operand:SD_HSDI 1 "register_operand")
6758 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6759 "TARGET_SIMD"
6760 {
6761 int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6762 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6763 rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6764 rtx dst = gen_reg_rtx (<DWI>mode);
6765 emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
6766 emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6767 DONE;
6768 }
6769 )
6770
6771 (define_expand "aarch64_sqrshrun_n<mode>"
6772 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6773 (truncate:<VNARROWQ>
6774 (smin:<V2XWIDE>
6775 (smax:<V2XWIDE>
6776 (ashiftrt:<V2XWIDE>
6777 (plus:<V2XWIDE>
6778 (sign_extend:<V2XWIDE>
6779 (match_operand:VQN 1 "register_operand"))
6780 (match_dup 3))
6781 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6782 (match_dup 4))
6783 (match_dup 5))))]
6784 "TARGET_SIMD"
6785 {
6786 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6787 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6788 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6789 operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6790 operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6791 operands[4] = CONST0_RTX (<V2XWIDE>mode);
6792 operands[5]
6793 = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6794 operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
6795 }
6796 )
6797
6798 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6799 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6800 (vec_concat:<VNARROWQ2>
6801 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6802 (ALL_TRUNC:<VNARROWQ>
6803 (SHIFTRT:VQN
6804 (match_operand:VQN 2 "register_operand" "w")
6805 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6806 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6807 && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6808 "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6809 [(set_attr "type" "neon_shift_imm_narrow_q")]
6810 )
6811
6812 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6813 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6814 (vec_concat:<VNARROWQ2>
6815 (ALL_TRUNC:<VNARROWQ>
6816 (SHIFTRT:VQN
6817 (match_operand:VQN 2 "register_operand" "w")
6818 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6819 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6820 "TARGET_SIMD && BYTES_BIG_ENDIAN
6821 && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6822 "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6823 [(set_attr "type" "neon_shift_imm_narrow_q")]
6824 )
6825
6826 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6827 [(match_operand:<VNARROWQ2> 0 "register_operand")
6828 (match_operand:<VNARROWQ> 1 "register_operand")
6829 (ALL_TRUNC:<VNARROWQ>
6830 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand")))
6831 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6832 "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6833 {
6834 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6835 INTVAL (operands[3]));
6836
6837 if (BYTES_BIG_ENDIAN)
6838 emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6839 operands[0], operands[1], operands[2], operands[3]));
6840 else
6841 emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6842 operands[0], operands[1], operands[2], operands[3]));
6843 DONE;
6844 }
6845 )
6846
6847 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6848 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6849 (vec_concat:<VNARROWQ2>
6850 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6851 (ALL_TRUNC:<VNARROWQ>
6852 (<TRUNC_SHIFT>:<V2XWIDE>
6853 (plus:<V2XWIDE>
6854 (<TRUNCEXTEND>:<V2XWIDE>
6855 (match_operand:VQN 2 "register_operand" "w"))
6856 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6857 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6858 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6859 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6860 "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6861 [(set_attr "type" "neon_shift_imm_narrow_q")]
6862 )
6863
6864 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
6865 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6866 (vec_concat:<VNARROWQ2>
6867 (ALL_TRUNC:<VNARROWQ>
6868 (<TRUNC_SHIFT>:<V2XWIDE>
6869 (plus:<V2XWIDE>
6870 (<TRUNCEXTEND>:<V2XWIDE>
6871 (match_operand:VQN 2 "register_operand" "w"))
6872 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6873 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6874 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6875 "TARGET_SIMD && BYTES_BIG_ENDIAN
6876 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6877 "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6878 [(set_attr "type" "neon_shift_imm_narrow_q")]
6879 )
6880
6881 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
6882 [(match_operand:<VNARROWQ2> 0 "register_operand")
6883 (match_operand:<VNARROWQ> 1 "register_operand")
6884 (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
6885 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6886 "TARGET_SIMD"
6887 {
6888 if (<CODE> == TRUNCATE
6889 && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6890 {
6891 rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
6892 emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
6893 operands[2], tmp));
6894 DONE;
6895 }
6896 /* Use this expander to create the rounding constant vector, which is
6897 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6898 RTL is generated when handling the DImode expanders. */
6899 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6900 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
6901 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6902 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6903 operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
6904 if (BYTES_BIG_ENDIAN)
6905 emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
6906 operands[1],
6907 operands[2],
6908 operands[3],
6909 rnd));
6910 else
6911 emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
6912 operands[1],
6913 operands[2],
6914 operands[3],
6915 rnd));
6916 DONE;
6917 }
6918 )
6919
6920 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
6921 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6922 (vec_concat:<VNARROWQ2>
6923 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6924 (truncate:<VNARROWQ>
6925 (smin:VQN
6926 (smax:VQN
6927 (ashiftrt:VQN
6928 (match_operand:VQN 2 "register_operand" "w")
6929 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6930 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6931 (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
6932 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6933 "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6934 [(set_attr "type" "neon_shift_imm_narrow_q")]
6935 )
6936
6937 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
6938 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6939 (vec_concat:<VNARROWQ2>
6940 (truncate:<VNARROWQ>
6941 (smin:VQN
6942 (smax:VQN
6943 (ashiftrt:VQN
6944 (match_operand:VQN 2 "register_operand" "w")
6945 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6946 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6947 (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
6948 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6949 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6950 "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6951 [(set_attr "type" "neon_shift_imm_narrow_q")]
6952 )
6953
6954 (define_expand "aarch64_sqshrun2_n<mode>"
6955 [(match_operand:<VNARROWQ2> 0 "register_operand")
6956 (match_operand:<VNARROWQ> 1 "register_operand")
6957 (match_operand:VQN 2 "register_operand")
6958 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6959 "TARGET_SIMD"
6960 {
6961 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6962 INTVAL (operands[3]));
6963 rtx zeros = CONST0_RTX (<MODE>mode);
6964 rtx half_umax
6965 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6966 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6967 if (BYTES_BIG_ENDIAN)
6968 emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
6969 operands[1], operands[2], operands[3],
6970 zeros, half_umax));
6971 else
6972 emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
6973 operands[1], operands[2], operands[3],
6974 zeros, half_umax));
6975 DONE;
6976 }
6977 )
6978
6979 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
6980 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6981 (vec_concat:<VNARROWQ2>
6982 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6983 (truncate:<VNARROWQ>
6984 (smin:<V2XWIDE>
6985 (smax:<V2XWIDE>
6986 (ashiftrt:<V2XWIDE>
6987 (plus:<V2XWIDE>
6988 (sign_extend:<V2XWIDE>
6989 (match_operand:VQN 2 "register_operand" "w"))
6990 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6991 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6992 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
6993 (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
6994 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6995 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6996 "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6997 [(set_attr "type" "neon_shift_imm_narrow_q")]
6998 )
6999
7000 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
7001 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7002 (vec_concat:<VNARROWQ2>
7003 (truncate:<VNARROWQ>
7004 (smin:<V2XWIDE>
7005 (smax:<V2XWIDE>
7006 (ashiftrt:<V2XWIDE>
7007 (plus:<V2XWIDE>
7008 (sign_extend:<V2XWIDE>
7009 (match_operand:VQN 2 "register_operand" "w"))
7010 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7011 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7012 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
7013 (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
7014 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7015 "TARGET_SIMD && BYTES_BIG_ENDIAN
7016 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7017 "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7018 [(set_attr "type" "neon_shift_imm_narrow_q")]
7019 )
7020
7021 (define_expand "aarch64_sqrshrun2_n<mode>"
7022 [(match_operand:<VNARROWQ2> 0 "register_operand")
7023 (match_operand:<VNARROWQ> 1 "register_operand")
7024 (match_operand:VQN 2 "register_operand")
7025 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7026 "TARGET_SIMD"
7027 {
7028 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7029 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7030 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7031 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7032 rtx zero = CONST0_RTX (<V2XWIDE>mode);
7033 rtx half_umax
7034 = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7035 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7036 operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7037 if (BYTES_BIG_ENDIAN)
7038 emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
7039 operands[1], operands[2], operands[3], rnd,
7040 zero, half_umax));
7041 else
7042 emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
7043 operands[1], operands[2], operands[3], rnd,
7044 zero, half_umax));
7045 DONE;
7046 }
7047 )
7048
7049 ;; cm(eq|ge|gt|lt|le)
7050 ;; Note, we have constraints for Dz and Z as different expanders
7051 ;; have different ideas of what should be passed to this pattern.
7052
7053 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7054 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
7055 (neg:<V_INT_EQUIV>
7056 (COMPARISONS:<V_INT_EQUIV>
7057 (match_operand:VDQ_I 1 "register_operand" "w,w")
7058 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
7059 )))]
7060 "TARGET_SIMD"
7061 "@
7062 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7063 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
7064 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
7065 )
7066
7067 (define_insn_and_split "aarch64_cm<optab>di"
7068 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
7069 (neg:DI
7070 (COMPARISONS:DI
7071 (match_operand:DI 1 "register_operand" "w,w,r")
7072 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7073 )))
7074 (clobber (reg:CC CC_REGNUM))]
7075 "TARGET_SIMD"
7076 "#"
7077 "&& reload_completed"
7078 [(set (match_operand:DI 0 "register_operand")
7079 (neg:DI
7080 (COMPARISONS:DI
7081 (match_operand:DI 1 "register_operand")
7082 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7083 )))]
7084 {
7085 /* If we are in the general purpose register file,
7086 we split to a sequence of comparison and store. */
7087 if (GP_REGNUM_P (REGNO (operands[0]))
7088 && GP_REGNUM_P (REGNO (operands[1])))
7089 {
7090 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
7091 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7092 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7093 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7094 DONE;
7095 }
7096 /* Otherwise, we expand to a similar pattern which does not
7097 clobber CC_REGNUM. */
7098 }
7099 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7100 )
7101
7102 (define_insn "*aarch64_cm<optab>di"
7103 [(set (match_operand:DI 0 "register_operand" "=w,w")
7104 (neg:DI
7105 (COMPARISONS:DI
7106 (match_operand:DI 1 "register_operand" "w,w")
7107 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
7108 )))]
7109 "TARGET_SIMD && reload_completed"
7110 "@
7111 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
7112 cm<optab>\t%d0, %d1, #0"
7113 [(set_attr "type" "neon_compare, neon_compare_zero")]
7114 )
7115
7116 ;; cm(hs|hi)
7117
7118 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7119 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7120 (neg:<V_INT_EQUIV>
7121 (UCOMPARISONS:<V_INT_EQUIV>
7122 (match_operand:VDQ_I 1 "register_operand" "w")
7123 (match_operand:VDQ_I 2 "register_operand" "w")
7124 )))]
7125 "TARGET_SIMD"
7126 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7127 [(set_attr "type" "neon_compare<q>")]
7128 )
7129
7130 (define_insn_and_split "aarch64_cm<optab>di"
7131 [(set (match_operand:DI 0 "register_operand" "=w,r")
7132 (neg:DI
7133 (UCOMPARISONS:DI
7134 (match_operand:DI 1 "register_operand" "w,r")
7135 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
7136 )))
7137 (clobber (reg:CC CC_REGNUM))]
7138 "TARGET_SIMD"
7139 "#"
7140 "&& reload_completed"
7141 [(set (match_operand:DI 0 "register_operand")
7142 (neg:DI
7143 (UCOMPARISONS:DI
7144 (match_operand:DI 1 "register_operand")
7145 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7146 )))]
7147 {
7148 /* If we are in the general purpose register file,
7149 we split to a sequence of comparison and store. */
7150 if (GP_REGNUM_P (REGNO (operands[0]))
7151 && GP_REGNUM_P (REGNO (operands[1])))
7152 {
7153 machine_mode mode = CCmode;
7154 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7155 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7156 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7157 DONE;
7158 }
7159 /* Otherwise, we expand to a similar pattern which does not
7160 clobber CC_REGNUM. */
7161 }
7162 [(set_attr "type" "neon_compare,multiple")]
7163 )
7164
7165 (define_insn "*aarch64_cm<optab>di"
7166 [(set (match_operand:DI 0 "register_operand" "=w")
7167 (neg:DI
7168 (UCOMPARISONS:DI
7169 (match_operand:DI 1 "register_operand" "w")
7170 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
7171 )))]
7172 "TARGET_SIMD && reload_completed"
7173 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
7174 [(set_attr "type" "neon_compare")]
7175 )
7176
7177 ;; cmtst
7178
7179 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
7180 ;; we don't have any insns using ne, and aarch64_vcond outputs
7181 ;; not (neg (eq (and x y) 0))
7182 ;; which is rewritten by simplify_rtx as
7183 ;; plus (eq (and x y) 0) -1.
7184
7185 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7186 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7187 (plus:<V_INT_EQUIV>
7188 (eq:<V_INT_EQUIV>
7189 (and:VDQ_I
7190 (match_operand:VDQ_I 1 "register_operand" "w")
7191 (match_operand:VDQ_I 2 "register_operand" "w"))
7192 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
7193 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
7194 ]
7195 "TARGET_SIMD"
7196 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7197 [(set_attr "type" "neon_tst<q>")]
7198 )
7199
7200 ;; One can also get a cmtsts by having to combine a
7201 ;; not (neq (eq x 0)) in which case you rewrite it to
7202 ;; a comparison against itself
7203
7204 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7205 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7206 (plus:<V_INT_EQUIV>
7207 (eq:<V_INT_EQUIV>
7208 (match_operand:VDQ_I 1 "register_operand" "w")
7209 (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
7210 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
7211 ]
7212 "TARGET_SIMD"
7213 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
7214 [(set_attr "type" "neon_tst<q>")]
7215 )
7216
7217 (define_insn_and_split "aarch64_cmtstdi"
7218 [(set (match_operand:DI 0 "register_operand" "=w,r")
7219 (neg:DI
7220 (ne:DI
7221 (and:DI
7222 (match_operand:DI 1 "register_operand" "w,r")
7223 (match_operand:DI 2 "register_operand" "w,r"))
7224 (const_int 0))))
7225 (clobber (reg:CC CC_REGNUM))]
7226 "TARGET_SIMD"
7227 "#"
7228 "&& reload_completed"
7229 [(set (match_operand:DI 0 "register_operand")
7230 (neg:DI
7231 (ne:DI
7232 (and:DI
7233 (match_operand:DI 1 "register_operand")
7234 (match_operand:DI 2 "register_operand"))
7235 (const_int 0))))]
7236 {
7237 /* If we are in the general purpose register file,
7238 we split to a sequence of comparison and store. */
7239 if (GP_REGNUM_P (REGNO (operands[0]))
7240 && GP_REGNUM_P (REGNO (operands[1])))
7241 {
7242 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
7243 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7244 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7245 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7246 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7247 DONE;
7248 }
7249 /* Otherwise, we expand to a similar pattern which does not
7250 clobber CC_REGNUM. */
7251 }
7252 [(set_attr "type" "neon_tst,multiple")]
7253 )
7254
7255 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7256 [(set (match_operand:DI 0 "register_operand" "=w")
7257 (neg:DI
7258 (ne:DI
7259 (and:DI
7260 (match_operand:DI 1 "register_operand" "w")
7261 (match_operand:DI 2 "register_operand" "w"))
7262 (const_int 0))))]
7263 "TARGET_SIMD"
7264 "cmtst\t%d0, %d1, %d2"
7265 [(set_attr "type" "neon_tst")]
7266 )
7267
7268 ;; fcm(eq|ge|gt|le|lt)
7269
7270 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7271 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
7272 (neg:<V_INT_EQUIV>
7273 (COMPARISONS:<V_INT_EQUIV>
7274 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
7275 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
7276 )))]
7277 "TARGET_SIMD"
7278 "@
7279 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7280 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
7281 [(set_attr "type" "neon_fp_compare_<stype><q>")]
7282 )
7283
7284 ;; fac(ge|gt)
7285 ;; Note we can also handle what would be fac(le|lt) by
7286 ;; generating fac(ge|gt).
7287
7288 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7289 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7290 (neg:<V_INT_EQUIV>
7291 (FAC_COMPARISONS:<V_INT_EQUIV>
7292 (abs:VHSDF_HSDF
7293 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
7294 (abs:VHSDF_HSDF
7295 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
7296 )))]
7297 "TARGET_SIMD"
7298 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7299 [(set_attr "type" "neon_fp_compare_<stype><q>")]
7300 )
7301
7302 ;; addp
7303
7304 ;; ADDP with two registers semantically concatenates them and performs
7305 ;; a pairwise addition on the result. For 128-bit input modes represent this
7306 ;; as a concatentation of the pairwise addition results of the two input
7307 ;; registers. This allow us to avoid using intermediate 256-bit modes.
7308 (define_insn "aarch64_addp<mode>_insn"
7309 [(set (match_operand:VQ_I 0 "register_operand" "=w")
7310 (vec_concat:VQ_I
7311 (plus:<VHALF>
7312 (vec_select:<VHALF>
7313 (match_operand:VQ_I 1 "register_operand" "w")
7314 (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
7315 (vec_select:<VHALF>
7316 (match_dup 1)
7317 (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
7318 (plus:<VHALF>
7319 (vec_select:<VHALF>
7320 (match_operand:VQ_I 2 "register_operand" "w")
7321 (match_dup 3))
7322 (vec_select:<VHALF>
7323 (match_dup 2)
7324 (match_dup 4)))))]
7325 "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7326 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7327 [(set_attr "type" "neon_reduc_add<q>")]
7328 )
7329
7330 ;; For 64-bit input modes an ADDP is represented as a concatentation
7331 ;; of the input registers into an 128-bit register which is then fed
7332 ;; into a pairwise add. That way we avoid having to create intermediate
7333 ;; 32-bit vector modes.
7334 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7335 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
7336 (plus:VD_BHSI
7337 (vec_select:VD_BHSI
7338 (vec_concat:<VDBL>
7339 (match_operand:VD_BHSI 1 "register_operand" "w")
7340 (match_operand:VD_BHSI 2 "register_operand" "w"))
7341 (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
7342 (vec_select:VD_BHSI
7343 (vec_concat:<VDBL>
7344 (match_dup 1)
7345 (match_dup 2))
7346 (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
7347 "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7348 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7349 [(set_attr "type" "neon_reduc_add<q>")]
7350 )
7351
7352 ;; A common usecase of 64-bit ADDP is to have both operands come from the same
7353 ;; 128-bit vector and produce the pairwise addition results in the lower half.
7354 ;; Split into the 128-bit ADDP form and extract the low half.
7355 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7356 [(set (match_operand:<VHALF> 0 "register_operand" "=w")
7357 (plus:<VHALF>
7358 (vec_select:<VHALF>
7359 (match_operand:VQ_I 1 "register_operand" "w")
7360 (match_operand:VQ_I 2 "vect_par_cnst_even_or_odd_half"))
7361 (vec_select:<VHALF>
7362 (match_dup 1)
7363 (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))))]
7364 "TARGET_SIMD && !rtx_equal_p (operands[2], operands[3])"
7365 "#"
7366 "&& 1"
7367 [(const_int 0)]
7368 {
7369 rtx scratch;
7370 if (can_create_pseudo_p ())
7371 scratch = gen_reg_rtx (<MODE>mode);
7372 else
7373 scratch = lowpart_subreg (<MODE>mode, operands[0], <VHALF>mode);
7374
7375 emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[1], operands[1],
7376 operands[2], operands[3]));
7377 emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, scratch));
7378 DONE;
7379 }
7380 )
7381
7382 (define_expand "aarch64_addp<mode>"
7383 [(match_operand:VDQ_I 0 "register_operand")
7384 (match_operand:VDQ_I 1 "register_operand")
7385 (match_operand:VDQ_I 2 "register_operand")]
7386 "TARGET_SIMD"
7387 {
7388 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7389 if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
7390 nunits /= 2;
7391 rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
7392 rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
7393 if (BYTES_BIG_ENDIAN)
7394 std::swap (operands[1], operands[2]);
7395 emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
7396 operands[2], par_even, par_odd));
7397 DONE;
7398 }
7399 )
7400
7401 ;; sqrt
7402
7403 (define_expand "sqrt<mode>2"
7404 [(set (match_operand:VHSDF 0 "register_operand")
7405 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
7406 "TARGET_SIMD"
7407 {
7408 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
7409 DONE;
7410 })
7411
7412 (define_insn "*sqrt<mode>2<vczle><vczbe>"
7413 [(set (match_operand:VHSDF 0 "register_operand" "=w")
7414 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
7415 "TARGET_SIMD"
7416 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
7417 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7418 )
7419
7420 ;; Patterns for vector struct loads and stores.
7421
7422 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7423 [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
7424 (unspec:VSTRUCT_2Q [
7425 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
7426 UNSPEC_LD2))]
7427 "TARGET_SIMD"
7428 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7429 [(set_attr "type" "neon_load2_2reg<q>")]
7430 )
7431
7432 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
7433 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7434 (unspec:VSTRUCT_2QD [
7435 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7436 UNSPEC_LD2_DUP))]
7437 "TARGET_SIMD"
7438 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7439 [(set_attr "type" "neon_load2_all_lanes<q>")]
7440 )
7441
7442 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7443 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7444 (unspec:VSTRUCT_2QD [
7445 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7446 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
7447 (match_operand:SI 3 "immediate_operand" "i")]
7448 UNSPEC_LD2_LANE))]
7449 "TARGET_SIMD"
7450 {
7451 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7452 INTVAL (operands[3]));
7453 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
7454 }
7455 [(set_attr "type" "neon_load2_one_lane")]
7456 )
7457
7458 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7459 [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
7460 (unspec:VSTRUCT_2Q [
7461 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
7462 UNSPEC_LD2))]
7463 "TARGET_SIMD"
7464 {
7465 if (BYTES_BIG_ENDIAN)
7466 {
7467 rtx tmp = gen_reg_rtx (<MODE>mode);
7468 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7469 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7470 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
7471 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7472 }
7473 else
7474 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
7475 DONE;
7476 })
7477
7478 (define_insn "aarch64_simd_st2<vstruct_elt>"
7479 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
7480 (unspec:VSTRUCT_2Q [
7481 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
7482 UNSPEC_ST2))]
7483 "TARGET_SIMD"
7484 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7485 [(set_attr "type" "neon_store2_2reg<q>")]
7486 )
7487
7488 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7489 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7490 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7491 (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
7492 (match_operand:SI 2 "immediate_operand" "i")]
7493 UNSPEC_ST2_LANE))]
7494 "TARGET_SIMD"
7495 {
7496 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7497 INTVAL (operands[2]));
7498 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
7499 }
7500 [(set_attr "type" "neon_store2_one_lane<q>")]
7501 )
7502
7503 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7504 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
7505 (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
7506 UNSPEC_ST2))]
7507 "TARGET_SIMD"
7508 {
7509 if (BYTES_BIG_ENDIAN)
7510 {
7511 rtx tmp = gen_reg_rtx (<MODE>mode);
7512 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7513 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7514 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7515 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
7516 }
7517 else
7518 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
7519 DONE;
7520 })
7521
7522 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7523 [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
7524 (unspec:VSTRUCT_3Q [
7525 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
7526 UNSPEC_LD3))]
7527 "TARGET_SIMD"
7528 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7529 [(set_attr "type" "neon_load3_3reg<q>")]
7530 )
7531
7532 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
7533 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7534 (unspec:VSTRUCT_3QD [
7535 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7536 UNSPEC_LD3_DUP))]
7537 "TARGET_SIMD"
7538 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7539 [(set_attr "type" "neon_load3_all_lanes<q>")]
7540 )
7541
7542 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7543 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7544 (unspec:VSTRUCT_3QD [
7545 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7546 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
7547 (match_operand:SI 3 "immediate_operand" "i")]
7548 UNSPEC_LD3_LANE))]
7549 "TARGET_SIMD"
7550 {
7551 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7552 INTVAL (operands[3]));
7553 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
7554 }
7555 [(set_attr "type" "neon_load3_one_lane")]
7556 )
7557
7558 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7559 [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
7560 (unspec:VSTRUCT_3Q [
7561 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
7562 UNSPEC_LD3))]
7563 "TARGET_SIMD"
7564 {
7565 if (BYTES_BIG_ENDIAN)
7566 {
7567 rtx tmp = gen_reg_rtx (<MODE>mode);
7568 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7569 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7570 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
7571 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7572 }
7573 else
7574 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
7575 DONE;
7576 })
7577
7578 (define_insn "aarch64_simd_st3<vstruct_elt>"
7579 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
7580 (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
7581 UNSPEC_ST3))]
7582 "TARGET_SIMD"
7583 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7584 [(set_attr "type" "neon_store3_3reg<q>")]
7585 )
7586
7587 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7588 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7589 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7590 (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
7591 (match_operand:SI 2 "immediate_operand" "i")]
7592 UNSPEC_ST3_LANE))]
7593 "TARGET_SIMD"
7594 {
7595 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7596 INTVAL (operands[2]));
7597 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
7598 }
7599 [(set_attr "type" "neon_store3_one_lane<q>")]
7600 )
7601
7602 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7603 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
7604 (unspec:VSTRUCT_3Q [
7605 (match_operand:VSTRUCT_3Q 1 "register_operand")]
7606 UNSPEC_ST3))]
7607 "TARGET_SIMD"
7608 {
7609 if (BYTES_BIG_ENDIAN)
7610 {
7611 rtx tmp = gen_reg_rtx (<MODE>mode);
7612 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7613 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7614 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7615 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
7616 }
7617 else
7618 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
7619 DONE;
7620 })
7621
7622 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7623 [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
7624 (unspec:VSTRUCT_4Q [
7625 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
7626 UNSPEC_LD4))]
7627 "TARGET_SIMD"
7628 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7629 [(set_attr "type" "neon_load4_4reg<q>")]
7630 )
7631
7632 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
7633 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7634 (unspec:VSTRUCT_4QD [
7635 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7636 UNSPEC_LD4_DUP))]
7637 "TARGET_SIMD"
7638 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7639 [(set_attr "type" "neon_load4_all_lanes<q>")]
7640 )
7641
7642 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7643 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7644 (unspec:VSTRUCT_4QD [
7645 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7646 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7647 (match_operand:SI 3 "immediate_operand" "i")]
7648 UNSPEC_LD4_LANE))]
7649 "TARGET_SIMD"
7650 {
7651 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7652 INTVAL (operands[3]));
7653 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7654 }
7655 [(set_attr "type" "neon_load4_one_lane")]
7656 )
7657
7658 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7659 [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7660 (unspec:VSTRUCT_4Q [
7661 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7662 UNSPEC_LD4))]
7663 "TARGET_SIMD"
7664 {
7665 if (BYTES_BIG_ENDIAN)
7666 {
7667 rtx tmp = gen_reg_rtx (<MODE>mode);
7668 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7669 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7670 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7671 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7672 }
7673 else
7674 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7675 DONE;
7676 })
7677
7678 (define_insn "aarch64_simd_st4<vstruct_elt>"
7679 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7680 (unspec:VSTRUCT_4Q [
7681 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7682 UNSPEC_ST4))]
7683 "TARGET_SIMD"
7684 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7685 [(set_attr "type" "neon_store4_4reg<q>")]
7686 )
7687
7688 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7689 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7690 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7691 (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7692 (match_operand:SI 2 "immediate_operand" "i")]
7693 UNSPEC_ST4_LANE))]
7694 "TARGET_SIMD"
7695 {
7696 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7697 INTVAL (operands[2]));
7698 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7699 }
7700 [(set_attr "type" "neon_store4_one_lane<q>")]
7701 )
7702
7703 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7704 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7705 (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7706 UNSPEC_ST4))]
7707 "TARGET_SIMD"
7708 {
7709 if (BYTES_BIG_ENDIAN)
7710 {
7711 rtx tmp = gen_reg_rtx (<MODE>mode);
7712 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7713 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7714 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7715 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7716 }
7717 else
7718 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7719 DONE;
7720 })
7721
7722 (define_insn_and_split "aarch64_rev_reglist<mode>"
7723 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7724 (unspec:VSTRUCT_QD
7725 [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7726 (match_operand:V16QI 2 "register_operand" "w")]
7727 UNSPEC_REV_REGLIST))]
7728 "TARGET_SIMD"
7729 "#"
7730 "&& reload_completed"
7731 [(const_int 0)]
7732 {
7733 int i;
7734 int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7735 for (i = 0; i < nregs; i++)
7736 {
7737 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7738 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7739 emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7740 }
7741 DONE;
7742 }
7743 [(set_attr "type" "neon_tbl1_q")
7744 (set_attr "length" "<insn_count>")]
7745 )
7746
7747 ;; Reload patterns for AdvSIMD register list operands.
7748
7749 (define_expand "mov<mode>"
7750 [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7751 (match_operand:VSTRUCT_QD 1 "general_operand"))]
7752 "TARGET_FLOAT"
7753 {
7754 if (can_create_pseudo_p ())
7755 {
7756 if (GET_CODE (operands[0]) != REG)
7757 operands[1] = force_reg (<MODE>mode, operands[1]);
7758 }
7759 })
7760
7761 (define_expand "mov<mode>"
7762 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7763 (match_operand:VSTRUCT 1 "general_operand"))]
7764 "TARGET_FLOAT"
7765 {
7766 if (can_create_pseudo_p ())
7767 {
7768 if (GET_CODE (operands[0]) != REG)
7769 operands[1] = force_reg (<MODE>mode, operands[1]);
7770 }
7771 })
7772
7773 (define_expand "movv8di"
7774 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7775 (match_operand:V8DI 1 "general_operand"))]
7776 ""
7777 {
7778 if (can_create_pseudo_p () && MEM_P (operands[0]))
7779 operands[1] = force_reg (V8DImode, operands[1]);
7780 })
7781
7782 (define_expand "aarch64_ld1x3<vstruct_elt>"
7783 [(match_operand:VSTRUCT_3QD 0 "register_operand")
7784 (match_operand:DI 1 "register_operand")]
7785 "TARGET_SIMD"
7786 {
7787 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7788 emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7789 DONE;
7790 })
7791
7792 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7793 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7794 (unspec:VSTRUCT_3QD
7795 [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7796 UNSPEC_LD1))]
7797 "TARGET_SIMD"
7798 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7799 [(set_attr "type" "neon_load1_3reg<q>")]
7800 )
7801
7802 (define_expand "aarch64_ld1x4<vstruct_elt>"
7803 [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7804 (match_operand:DI 1 "register_operand" "r")]
7805 "TARGET_SIMD"
7806 {
7807 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7808 emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
7809 DONE;
7810 })
7811
7812 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7813 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7814 (unspec:VSTRUCT_4QD
7815 [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
7816 UNSPEC_LD1))]
7817 "TARGET_SIMD"
7818 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7819 [(set_attr "type" "neon_load1_4reg<q>")]
7820 )
7821
7822 (define_expand "aarch64_st1x2<vstruct_elt>"
7823 [(match_operand:DI 0 "register_operand")
7824 (match_operand:VSTRUCT_2QD 1 "register_operand")]
7825 "TARGET_SIMD"
7826 {
7827 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7828 emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
7829 DONE;
7830 })
7831
7832 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7833 [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7834 (unspec:VSTRUCT_2QD
7835 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7836 UNSPEC_ST1))]
7837 "TARGET_SIMD"
7838 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7839 [(set_attr "type" "neon_store1_2reg<q>")]
7840 )
7841
7842 (define_expand "aarch64_st1x3<vstruct_elt>"
7843 [(match_operand:DI 0 "register_operand")
7844 (match_operand:VSTRUCT_3QD 1 "register_operand")]
7845 "TARGET_SIMD"
7846 {
7847 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7848 emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
7849 DONE;
7850 })
7851
7852 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7853 [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7854 (unspec:VSTRUCT_3QD
7855 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7856 UNSPEC_ST1))]
7857 "TARGET_SIMD"
7858 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7859 [(set_attr "type" "neon_store1_3reg<q>")]
7860 )
7861
7862 (define_expand "aarch64_st1x4<vstruct_elt>"
7863 [(match_operand:DI 0 "register_operand" "")
7864 (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
7865 "TARGET_SIMD"
7866 {
7867 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7868 emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
7869 DONE;
7870 })
7871
7872 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7873 [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7874 (unspec:VSTRUCT_4QD
7875 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7876 UNSPEC_ST1))]
7877 "TARGET_SIMD"
7878 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7879 [(set_attr "type" "neon_store1_4reg<q>")]
7880 )
7881
7882 (define_insn "*aarch64_mov<mode>"
7883 [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7884 (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7885 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7886 && (register_operand (operands[0], <MODE>mode)
7887 || register_operand (operands[1], <MODE>mode))"
7888 "@
7889 #
7890 st1\\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7891 ld1\\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1"
7892 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7893 neon_load<nregs>_<nregs>reg_q")
7894 (set_attr "length" "<insn_count>,4,4")]
7895 )
7896
7897 (define_insn "*aarch64_mov<mode>"
7898 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7899 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7900 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7901 && (register_operand (operands[0], <MODE>mode)
7902 || register_operand (operands[1], <MODE>mode))"
7903 "@
7904 #
7905 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
7906 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
7907 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7908 neon_load<nregs>_<nregs>reg_q")
7909 (set_attr "length" "<insn_count>,4,4")]
7910 )
7911
7912 (define_insn "*aarch64_movv8di"
7913 [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7914 (match_operand:V8DI 1 "general_operand" " r,r,m"))]
7915 "(register_operand (operands[0], V8DImode)
7916 || register_operand (operands[1], V8DImode))"
7917 "#"
7918 [(set_attr "type" "multiple,multiple,multiple")
7919 (set_attr "length" "32,16,16")]
7920 )
7921
7922 (define_insn "aarch64_be_ld1<mode>"
7923 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
7924 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7925 "aarch64_simd_struct_operand" "Utv")]
7926 UNSPEC_LD1))]
7927 "TARGET_SIMD"
7928 "ld1\\t{%0<Vmtype>}, %1"
7929 [(set_attr "type" "neon_load1_1reg<q>")]
7930 )
7931
7932 (define_insn "aarch64_be_st1<mode>"
7933 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7934 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
7935 UNSPEC_ST1))]
7936 "TARGET_SIMD"
7937 "st1\\t{%1<Vmtype>}, %0"
7938 [(set_attr "type" "neon_store1_1reg<q>")]
7939 )
7940
7941 (define_insn "*aarch64_be_mov<mode>"
7942 [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
7943 (match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m"))]
7944 "TARGET_FLOAT
7945 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7946 && (register_operand (operands[0], <MODE>mode)
7947 || register_operand (operands[1], <MODE>mode))"
7948 "@
7949 #
7950 stp\\t%d1, %R1, %0
7951 ldp\\t%d0, %R0, %1"
7952 [(set_attr "type" "multiple,neon_stp,neon_ldp")
7953 (set_attr "length" "8,4,4")]
7954 )
7955
7956 (define_insn "*aarch64_be_mov<mode>"
7957 [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
7958 (match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m"))]
7959 "TARGET_FLOAT
7960 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7961 && (register_operand (operands[0], <MODE>mode)
7962 || register_operand (operands[1], <MODE>mode))"
7963 "@
7964 #
7965 stp\\t%q1, %R1, %0
7966 ldp\\t%q0, %R0, %1"
7967 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7968 (set_attr "arch" "simd,*,*")
7969 (set_attr "length" "8,4,4")]
7970 )
7971
7972 (define_insn "*aarch64_be_movoi"
7973 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
7974 (match_operand:OI 1 "general_operand" " w,w,m"))]
7975 "TARGET_FLOAT
7976 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7977 && (register_operand (operands[0], OImode)
7978 || register_operand (operands[1], OImode))"
7979 "@
7980 #
7981 stp\\t%q1, %R1, %0
7982 ldp\\t%q0, %R0, %1"
7983 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7984 (set_attr "arch" "simd,*,*")
7985 (set_attr "length" "8,4,4")]
7986 )
7987
7988 (define_insn "*aarch64_be_mov<mode>"
7989 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
7990 (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
7991 "TARGET_FLOAT
7992 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7993 && (register_operand (operands[0], <MODE>mode)
7994 || register_operand (operands[1], <MODE>mode))"
7995 "#"
7996 [(set_attr "type" "multiple")
7997 (set_attr "arch" "fp<q>,*,*")
7998 (set_attr "length" "12,8,8")]
7999 )
8000
8001 (define_insn "*aarch64_be_movci"
8002 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
8003 (match_operand:CI 1 "general_operand" " w,w,o"))]
8004 "TARGET_FLOAT
8005 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8006 && (register_operand (operands[0], CImode)
8007 || register_operand (operands[1], CImode))"
8008 "#"
8009 [(set_attr "type" "multiple")
8010 (set_attr "arch" "simd,*,*")
8011 (set_attr "length" "12,8,8")]
8012 )
8013
8014 (define_insn "*aarch64_be_mov<mode>"
8015 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
8016 (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
8017 "TARGET_FLOAT
8018 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8019 && (register_operand (operands[0], <MODE>mode)
8020 || register_operand (operands[1], <MODE>mode))"
8021 "#"
8022 [(set_attr "type" "multiple")
8023 (set_attr "arch" "fp<q>,*,*")
8024 (set_attr "length" "16,8,8")]
8025 )
8026
8027 (define_insn "*aarch64_be_movxi"
8028 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
8029 (match_operand:XI 1 "general_operand" " w,w,o"))]
8030 "TARGET_FLOAT
8031 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8032 && (register_operand (operands[0], XImode)
8033 || register_operand (operands[1], XImode))"
8034 "#"
8035 [(set_attr "type" "multiple")
8036 (set_attr "arch" "simd,*,*")
8037 (set_attr "length" "16,8,8")]
8038 )
8039
8040 (define_split
8041 [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
8042 (match_operand:VSTRUCT_2QD 1 "register_operand"))]
8043 "TARGET_FLOAT && reload_completed"
8044 [(const_int 0)]
8045 {
8046 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
8047 DONE;
8048 })
8049
8050 (define_split
8051 [(set (match_operand:OI 0 "register_operand")
8052 (match_operand:OI 1 "register_operand"))]
8053 "TARGET_FLOAT && reload_completed"
8054 [(const_int 0)]
8055 {
8056 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
8057 DONE;
8058 })
8059
8060 (define_split
8061 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
8062 (match_operand:VSTRUCT_3QD 1 "general_operand"))]
8063 "TARGET_FLOAT && reload_completed"
8064 [(const_int 0)]
8065 {
8066 if (register_operand (operands[0], <MODE>mode)
8067 && register_operand (operands[1], <MODE>mode))
8068 {
8069 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
8070 DONE;
8071 }
8072 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8073 {
8074 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8075 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8076 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8077 <MODE>mode, 0),
8078 simplify_gen_subreg (pair_mode, operands[1],
8079 <MODE>mode, 0));
8080 emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8081 simplify_gen_subreg (<VSTRUCT_ELT>mode,
8082 operands[0],
8083 <MODE>mode,
8084 2 * elt_size)),
8085 gen_lowpart (<VSTRUCT_ELT>mode,
8086 simplify_gen_subreg (<VSTRUCT_ELT>mode,
8087 operands[1],
8088 <MODE>mode,
8089 2 * elt_size)));
8090 DONE;
8091 }
8092 else
8093 FAIL;
8094 })
8095
8096 (define_split
8097 [(set (match_operand:CI 0 "nonimmediate_operand")
8098 (match_operand:CI 1 "general_operand"))]
8099 "TARGET_FLOAT && reload_completed"
8100 [(const_int 0)]
8101 {
8102 if (register_operand (operands[0], CImode)
8103 && register_operand (operands[1], CImode))
8104 {
8105 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
8106 DONE;
8107 }
8108 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8109 {
8110 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
8111 simplify_gen_subreg (OImode, operands[1], CImode, 0));
8112 emit_move_insn (gen_lowpart (V16QImode,
8113 simplify_gen_subreg (TImode, operands[0],
8114 CImode, 32)),
8115 gen_lowpart (V16QImode,
8116 simplify_gen_subreg (TImode, operands[1],
8117 CImode, 32)));
8118 DONE;
8119 }
8120 else
8121 FAIL;
8122 })
8123
8124 (define_split
8125 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
8126 (match_operand:VSTRUCT_4QD 1 "general_operand"))]
8127 "TARGET_FLOAT && reload_completed"
8128 [(const_int 0)]
8129 {
8130 if (register_operand (operands[0], <MODE>mode)
8131 && register_operand (operands[1], <MODE>mode))
8132 {
8133 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
8134 DONE;
8135 }
8136 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8137 {
8138 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8139 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8140 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8141 <MODE>mode, 0),
8142 simplify_gen_subreg (pair_mode, operands[1],
8143 <MODE>mode, 0));
8144 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8145 <MODE>mode, 2 * elt_size),
8146 simplify_gen_subreg (pair_mode, operands[1],
8147 <MODE>mode, 2 * elt_size));
8148 DONE;
8149 }
8150 else
8151 FAIL;
8152 })
8153
8154 (define_split
8155 [(set (match_operand:XI 0 "nonimmediate_operand")
8156 (match_operand:XI 1 "general_operand"))]
8157 "TARGET_FLOAT && reload_completed"
8158 [(const_int 0)]
8159 {
8160 if (register_operand (operands[0], XImode)
8161 && register_operand (operands[1], XImode))
8162 {
8163 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
8164 DONE;
8165 }
8166 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8167 {
8168 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
8169 simplify_gen_subreg (OImode, operands[1], XImode, 0));
8170 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
8171 simplify_gen_subreg (OImode, operands[1], XImode, 32));
8172 DONE;
8173 }
8174 else
8175 FAIL;
8176 })
8177
8178 (define_split
8179 [(set (match_operand:V8DI 0 "nonimmediate_operand")
8180 (match_operand:V8DI 1 "general_operand"))]
8181 "reload_completed"
8182 [(const_int 0)]
8183 {
8184 if (register_operand (operands[0], V8DImode)
8185 && register_operand (operands[1], V8DImode))
8186 {
8187 aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
8188 DONE;
8189 }
8190 else if ((register_operand (operands[0], V8DImode)
8191 && memory_operand (operands[1], V8DImode))
8192 || (memory_operand (operands[0], V8DImode)
8193 && register_operand (operands[1], V8DImode)))
8194 {
8195 for (int offset = 0; offset < 64; offset += 16)
8196 emit_move_insn (simplify_gen_subreg (TImode, operands[0],
8197 V8DImode, offset),
8198 simplify_gen_subreg (TImode, operands[1],
8199 V8DImode, offset));
8200 DONE;
8201 }
8202 else
8203 FAIL;
8204 })
8205
8206 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8207 [(match_operand:VSTRUCT_QD 0 "register_operand")
8208 (match_operand:DI 1 "register_operand")]
8209 "TARGET_SIMD"
8210 {
8211 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8212 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8213
8214 emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
8215 DONE;
8216 })
8217
8218 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8219 [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
8220 (unspec:VSTRUCT_2DNX [
8221 (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
8222 UNSPEC_LD2_DREG))]
8223 "TARGET_SIMD"
8224 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8225 [(set_attr "type" "neon_load2_2reg<q>")]
8226 )
8227
8228 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8229 [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
8230 (unspec:VSTRUCT_2DX [
8231 (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
8232 UNSPEC_LD2_DREG))]
8233 "TARGET_SIMD"
8234 "ld1\\t{%S0.1d - %T0.1d}, %1"
8235 [(set_attr "type" "neon_load1_2reg<q>")]
8236 )
8237
8238 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8239 [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
8240 (unspec:VSTRUCT_3DNX [
8241 (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
8242 UNSPEC_LD3_DREG))]
8243 "TARGET_SIMD"
8244 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8245 [(set_attr "type" "neon_load3_3reg<q>")]
8246 )
8247
8248 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8249 [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
8250 (unspec:VSTRUCT_3DX [
8251 (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
8252 UNSPEC_LD3_DREG))]
8253 "TARGET_SIMD"
8254 "ld1\\t{%S0.1d - %U0.1d}, %1"
8255 [(set_attr "type" "neon_load1_3reg<q>")]
8256 )
8257
8258 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8259 [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
8260 (unspec:VSTRUCT_4DNX [
8261 (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
8262 UNSPEC_LD4_DREG))]
8263 "TARGET_SIMD"
8264 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8265 [(set_attr "type" "neon_load4_4reg<q>")]
8266 )
8267
8268 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8269 [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
8270 (unspec:VSTRUCT_4DX [
8271 (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
8272 UNSPEC_LD4_DREG))]
8273 "TARGET_SIMD"
8274 "ld1\\t{%S0.1d - %V0.1d}, %1"
8275 [(set_attr "type" "neon_load1_4reg<q>")]
8276 )
8277
8278 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8279 [(match_operand:VSTRUCT_D 0 "register_operand")
8280 (match_operand:DI 1 "register_operand")]
8281 "TARGET_SIMD"
8282 {
8283 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8284 emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
8285 DONE;
8286 })
8287
8288 (define_expand "aarch64_ld1<VALL_F16:mode>"
8289 [(match_operand:VALL_F16 0 "register_operand")
8290 (match_operand:DI 1 "register_operand")]
8291 "TARGET_SIMD"
8292 {
8293 machine_mode mode = <VALL_F16:MODE>mode;
8294 rtx mem = gen_rtx_MEM (mode, operands[1]);
8295
8296 if (BYTES_BIG_ENDIAN)
8297 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
8298 else
8299 emit_move_insn (operands[0], mem);
8300 DONE;
8301 })
8302
8303 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8304 [(match_operand:VSTRUCT_Q 0 "register_operand")
8305 (match_operand:DI 1 "register_operand")]
8306 "TARGET_SIMD"
8307 {
8308 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8309 emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
8310 DONE;
8311 })
8312
8313 (define_expand "aarch64_ld1x2<vstruct_elt>"
8314 [(match_operand:VSTRUCT_2QD 0 "register_operand")
8315 (match_operand:DI 1 "register_operand")]
8316 "TARGET_SIMD"
8317 {
8318 machine_mode mode = <MODE>mode;
8319 rtx mem = gen_rtx_MEM (mode, operands[1]);
8320
8321 emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
8322 DONE;
8323 })
8324
8325 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
8326 [(match_operand:VSTRUCT_QD 0 "register_operand")
8327 (match_operand:DI 1 "register_operand")
8328 (match_operand:VSTRUCT_QD 2 "register_operand")
8329 (match_operand:SI 3 "immediate_operand")]
8330 "TARGET_SIMD"
8331 {
8332 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8333 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8334
8335 aarch64_simd_lane_bounds (operands[3], 0,
8336 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8337 emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
8338 mem, operands[2], operands[3]));
8339 DONE;
8340 })
8341
8342 ;; Permuted-store expanders for neon intrinsics.
8343
8344 ;; Permute instructions
8345
8346 ;; vec_perm support
8347
8348 (define_expand "vec_perm<mode>"
8349 [(match_operand:VB 0 "register_operand")
8350 (match_operand:VB 1 "register_operand")
8351 (match_operand:VB 2 "register_operand")
8352 (match_operand:VB 3 "register_operand")]
8353 "TARGET_SIMD"
8354 {
8355 aarch64_expand_vec_perm (operands[0], operands[1],
8356 operands[2], operands[3], <nunits>);
8357 DONE;
8358 })
8359
8360 (define_insn "aarch64_qtbl1<mode>"
8361 [(set (match_operand:VB 0 "register_operand" "=w")
8362 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
8363 (match_operand:VB 2 "register_operand" "w")]
8364 UNSPEC_TBL))]
8365 "TARGET_SIMD"
8366 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
8367 [(set_attr "type" "neon_tbl1<q>")]
8368 )
8369
8370 (define_insn "aarch64_qtbx1<mode>"
8371 [(set (match_operand:VB 0 "register_operand" "=w")
8372 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8373 (match_operand:V16QI 2 "register_operand" "w")
8374 (match_operand:VB 3 "register_operand" "w")]
8375 UNSPEC_TBX))]
8376 "TARGET_SIMD"
8377 "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
8378 [(set_attr "type" "neon_tbl1<q>")]
8379 )
8380
8381 ;; Two source registers.
8382
8383 (define_insn "aarch64_qtbl2<mode>"
8384 [(set (match_operand:VB 0 "register_operand" "=w")
8385 (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
8386 (match_operand:VB 2 "register_operand" "w")]
8387 UNSPEC_TBL))]
8388 "TARGET_SIMD"
8389 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8390 [(set_attr "type" "neon_tbl2")]
8391 )
8392
8393 (define_insn "aarch64_qtbx2<mode>"
8394 [(set (match_operand:VB 0 "register_operand" "=w")
8395 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8396 (match_operand:V2x16QI 2 "register_operand" "w")
8397 (match_operand:VB 3 "register_operand" "w")]
8398 UNSPEC_TBX))]
8399 "TARGET_SIMD"
8400 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8401 [(set_attr "type" "neon_tbl2")]
8402 )
8403
8404 ;; Three source registers.
8405
8406 (define_insn "aarch64_qtbl3<mode>"
8407 [(set (match_operand:VB 0 "register_operand" "=w")
8408 (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
8409 (match_operand:VB 2 "register_operand" "w")]
8410 UNSPEC_TBL))]
8411 "TARGET_SIMD"
8412 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8413 [(set_attr "type" "neon_tbl3")]
8414 )
8415
8416 (define_insn "aarch64_qtbx3<mode>"
8417 [(set (match_operand:VB 0 "register_operand" "=w")
8418 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8419 (match_operand:V3x16QI 2 "register_operand" "w")
8420 (match_operand:VB 3 "register_operand" "w")]
8421 UNSPEC_TBX))]
8422 "TARGET_SIMD"
8423 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8424 [(set_attr "type" "neon_tbl3")]
8425 )
8426
8427 ;; Four source registers.
8428
8429 (define_insn "aarch64_qtbl4<mode>"
8430 [(set (match_operand:VB 0 "register_operand" "=w")
8431 (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
8432 (match_operand:VB 2 "register_operand" "w")]
8433 UNSPEC_TBL))]
8434 "TARGET_SIMD"
8435 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8436 [(set_attr "type" "neon_tbl4")]
8437 )
8438
8439 (define_insn "aarch64_qtbx4<mode>"
8440 [(set (match_operand:VB 0 "register_operand" "=w")
8441 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8442 (match_operand:V4x16QI 2 "register_operand" "w")
8443 (match_operand:VB 3 "register_operand" "w")]
8444 UNSPEC_TBX))]
8445 "TARGET_SIMD"
8446 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8447 [(set_attr "type" "neon_tbl4")]
8448 )
8449
8450 (define_insn_and_split "aarch64_combinev16qi"
8451 [(set (match_operand:V2x16QI 0 "register_operand" "=w")
8452 (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
8453 (match_operand:V16QI 2 "register_operand" "w")]
8454 UNSPEC_CONCAT))]
8455 "TARGET_SIMD"
8456 "#"
8457 "&& reload_completed"
8458 [(const_int 0)]
8459 {
8460 aarch64_split_combinev16qi (operands);
8461 DONE;
8462 }
8463 [(set_attr "type" "multiple")]
8464 )
8465
8466 ;; This instruction's pattern is generated directly by
8467 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8468 ;; need corresponding changes there.
8469 (define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8470 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8471 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8472 (match_operand:VALL_F16 2 "register_operand" "w")]
8473 PERMUTE))]
8474 "TARGET_SIMD"
8475 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8476 [(set_attr "type" "neon_permute<q>")]
8477 )
8478
8479 ;; This instruction's pattern is generated directly by
8480 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8481 ;; need corresponding changes there. Note that the immediate (third)
8482 ;; operand is a lane index not a byte index.
8483 (define_insn "aarch64_ext<mode>"
8484 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8485 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8486 (match_operand:VALL_F16 2 "register_operand" "w")
8487 (match_operand:SI 3 "immediate_operand" "i")]
8488 UNSPEC_EXT))]
8489 "TARGET_SIMD"
8490 {
8491 operands[3] = GEN_INT (INTVAL (operands[3])
8492 * GET_MODE_UNIT_SIZE (<MODE>mode));
8493 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
8494 }
8495 [(set_attr "type" "neon_ext<q>")]
8496 )
8497
8498 ;; This instruction's pattern is generated directly by
8499 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8500 ;; need corresponding changes there.
8501 (define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8502 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8503 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
8504 REVERSE))]
8505 "TARGET_SIMD"
8506 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
8507 [(set_attr "type" "neon_rev<q>")]
8508 )
8509
8510 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8511 [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
8512 (unspec:VSTRUCT_2DNX [
8513 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
8514 UNSPEC_ST2))]
8515 "TARGET_SIMD"
8516 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8517 [(set_attr "type" "neon_store2_2reg")]
8518 )
8519
8520 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8521 [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
8522 (unspec:VSTRUCT_2DX [
8523 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
8524 UNSPEC_ST2))]
8525 "TARGET_SIMD"
8526 "st1\\t{%S1.1d - %T1.1d}, %0"
8527 [(set_attr "type" "neon_store1_2reg")]
8528 )
8529
8530 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8531 [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
8532 (unspec:VSTRUCT_3DNX [
8533 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
8534 UNSPEC_ST3))]
8535 "TARGET_SIMD"
8536 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8537 [(set_attr "type" "neon_store3_3reg")]
8538 )
8539
8540 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8541 [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
8542 (unspec:VSTRUCT_3DX [
8543 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
8544 UNSPEC_ST3))]
8545 "TARGET_SIMD"
8546 "st1\\t{%S1.1d - %U1.1d}, %0"
8547 [(set_attr "type" "neon_store1_3reg")]
8548 )
8549
8550 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8551 [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
8552 (unspec:VSTRUCT_4DNX [
8553 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
8554 UNSPEC_ST4))]
8555 "TARGET_SIMD"
8556 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8557 [(set_attr "type" "neon_store4_4reg")]
8558 )
8559
8560 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8561 [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
8562 (unspec:VSTRUCT_4DX [
8563 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
8564 UNSPEC_ST4))]
8565 "TARGET_SIMD"
8566 "st1\\t{%S1.1d - %V1.1d}, %0"
8567 [(set_attr "type" "neon_store1_4reg")]
8568 )
8569
8570 (define_expand "aarch64_st<nregs><vstruct_elt>"
8571 [(match_operand:DI 0 "register_operand")
8572 (match_operand:VSTRUCT_D 1 "register_operand")]
8573 "TARGET_SIMD"
8574 {
8575 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8576 emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
8577 DONE;
8578 })
8579
8580 (define_expand "aarch64_st<nregs><vstruct_elt>"
8581 [(match_operand:DI 0 "register_operand")
8582 (match_operand:VSTRUCT_Q 1 "register_operand")]
8583 "TARGET_SIMD"
8584 {
8585 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8586 emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
8587 DONE;
8588 })
8589
8590 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
8591 [(match_operand:DI 0 "register_operand")
8592 (match_operand:VSTRUCT_QD 1 "register_operand")
8593 (match_operand:SI 2 "immediate_operand")]
8594 "TARGET_SIMD"
8595 {
8596 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
8597 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8598
8599 aarch64_simd_lane_bounds (operands[2], 0,
8600 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8601 emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8602 operands[1], operands[2]));
8603 DONE;
8604 })
8605
8606 (define_expand "aarch64_st1<VALL_F16:mode>"
8607 [(match_operand:DI 0 "register_operand")
8608 (match_operand:VALL_F16 1 "register_operand")]
8609 "TARGET_SIMD"
8610 {
8611 machine_mode mode = <VALL_F16:MODE>mode;
8612 rtx mem = gen_rtx_MEM (mode, operands[0]);
8613
8614 if (BYTES_BIG_ENDIAN)
8615 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
8616 else
8617 emit_move_insn (mem, operands[1]);
8618 DONE;
8619 })
8620
8621 ;; Standard pattern name vec_init<mode><Vel>.
8622
8623 (define_expand "vec_init<mode><Vel>"
8624 [(match_operand:VALL_F16 0 "register_operand")
8625 (match_operand 1 "" "")]
8626 "TARGET_SIMD"
8627 {
8628 aarch64_expand_vector_init (operands[0], operands[1]);
8629 DONE;
8630 })
8631
8632 (define_expand "vec_init<mode><Vhalf>"
8633 [(match_operand:VQ_NO2E 0 "register_operand")
8634 (match_operand 1 "" "")]
8635 "TARGET_SIMD"
8636 {
8637 aarch64_expand_vector_init (operands[0], operands[1]);
8638 DONE;
8639 })
8640
8641 (define_insn "*aarch64_simd_ld1r<mode>"
8642 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8643 (vec_duplicate:VALL_F16
8644 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
8645 "TARGET_SIMD"
8646 "ld1r\\t{%0.<Vtype>}, %1"
8647 [(set_attr "type" "neon_load1_all_lanes")]
8648 )
8649
8650 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8651 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
8652 (unspec:VSTRUCT_2QD [
8653 (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
8654 UNSPEC_LD1))]
8655 "TARGET_SIMD"
8656 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8657 [(set_attr "type" "neon_load1_2reg<q>")]
8658 )
8659
8660
8661 (define_insn "@aarch64_frecpe<mode>"
8662 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8663 (unspec:VHSDF_HSDF
8664 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8665 UNSPEC_FRECPE))]
8666 "TARGET_SIMD"
8667 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8668 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8669 )
8670
8671 (define_insn "aarch64_frecpx<mode>"
8672 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8673 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8674 UNSPEC_FRECPX))]
8675 "TARGET_SIMD"
8676 "frecpx\t%<s>0, %<s>1"
8677 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8678 )
8679
8680 (define_insn "@aarch64_frecps<mode>"
8681 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8682 (unspec:VHSDF_HSDF
8683 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8684 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8685 UNSPEC_FRECPS))]
8686 "TARGET_SIMD"
8687 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8688 [(set_attr "type" "neon_fp_recps_<stype><q>")]
8689 )
8690
8691 (define_insn "aarch64_urecpe<mode>"
8692 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8693 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8694 UNSPEC_URECPE))]
8695 "TARGET_SIMD"
8696 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8697 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8698
8699 ;; Standard pattern name vec_extract<mode><Vel>.
8700
8701 (define_expand "vec_extract<mode><Vel>"
8702 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8703 (match_operand:VALL_F16 1 "register_operand")
8704 (match_operand:SI 2 "immediate_operand")]
8705 "TARGET_SIMD"
8706 {
8707 emit_insn
8708 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8709 DONE;
8710 })
8711
8712 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8713 (define_expand "vec_extract<mode><Vhalf>"
8714 [(match_operand:<VHALF> 0 "register_operand")
8715 (match_operand:VQMOV_NO2E 1 "register_operand")
8716 (match_operand 2 "immediate_operand")]
8717 "TARGET_SIMD"
8718 {
8719 int start = INTVAL (operands[2]);
8720 gcc_assert (start == 0 || start == 1);
8721 start *= <nunits> / 2;
8722 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8723 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8724 DONE;
8725 })
8726
8727 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8728 (define_expand "vec_extract<mode><V1half>"
8729 [(match_operand:<V1HALF> 0 "register_operand")
8730 (match_operand:VQ_2E 1 "register_operand")
8731 (match_operand 2 "immediate_operand")]
8732 "TARGET_SIMD"
8733 {
8734 /* V1DI and V1DF are rarely used by other patterns, so it should be better
8735 to hide it in a subreg destination of a normal DI or DF op. */
8736 rtx scalar0 = gen_lowpart (<VHALF>mode, operands[0]);
8737 emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[1], operands[2]));
8738 DONE;
8739 })
8740
8741 ;; aes
8742
8743 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8744 [(set (match_operand:V16QI 0 "register_operand" "=w")
8745 (unspec:V16QI
8746 [(xor:V16QI
8747 (match_operand:V16QI 1 "register_operand" "%0")
8748 (match_operand:V16QI 2 "register_operand" "w"))]
8749 CRYPTO_AES))]
8750 "TARGET_AES"
8751 "aes<aes_op>\\t%0.16b, %2.16b"
8752 [(set_attr "type" "crypto_aese")]
8753 )
8754
8755 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8756 [(set (match_operand:V16QI 0 "register_operand" "=w")
8757 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8758 CRYPTO_AESMC))]
8759 "TARGET_AES"
8760 "aes<aesmc_op>\\t%0.16b, %1.16b"
8761 [(set_attr "type" "crypto_aesmc")]
8762 )
8763
8764 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8765 ;; and enforce the register dependency without scheduling or register
8766 ;; allocation messing up the order or introducing moves inbetween.
8767 ;; Mash the two together during combine.
8768
8769 (define_insn "*aarch64_crypto_aese_fused"
8770 [(set (match_operand:V16QI 0 "register_operand" "=w")
8771 (unspec:V16QI
8772 [(unspec:V16QI
8773 [(xor:V16QI
8774 (match_operand:V16QI 1 "register_operand" "%0")
8775 (match_operand:V16QI 2 "register_operand" "w"))]
8776 UNSPEC_AESE)]
8777 UNSPEC_AESMC))]
8778 "TARGET_AES
8779 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8780 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8781 [(set_attr "type" "crypto_aese")
8782 (set_attr "length" "8")]
8783 )
8784
8785 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8786 ;; and enforce the register dependency without scheduling or register
8787 ;; allocation messing up the order or introducing moves inbetween.
8788 ;; Mash the two together during combine.
8789
8790 (define_insn "*aarch64_crypto_aesd_fused"
8791 [(set (match_operand:V16QI 0 "register_operand" "=w")
8792 (unspec:V16QI
8793 [(unspec:V16QI
8794 [(xor:V16QI
8795 (match_operand:V16QI 1 "register_operand" "%0")
8796 (match_operand:V16QI 2 "register_operand" "w"))]
8797 UNSPEC_AESD)]
8798 UNSPEC_AESIMC))]
8799 "TARGET_AES
8800 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8801 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8802 [(set_attr "type" "crypto_aese")
8803 (set_attr "length" "8")]
8804 )
8805
8806 ;; sha1
8807
8808 (define_insn "aarch64_crypto_sha1hsi"
8809 [(set (match_operand:SI 0 "register_operand" "=w")
8810 (unspec:SI [(match_operand:SI 1
8811 "register_operand" "w")]
8812 UNSPEC_SHA1H))]
8813 "TARGET_SHA2"
8814 "sha1h\\t%s0, %s1"
8815 [(set_attr "type" "crypto_sha1_fast")]
8816 )
8817
8818 (define_insn "aarch64_crypto_sha1hv4si"
8819 [(set (match_operand:SI 0 "register_operand" "=w")
8820 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8821 (parallel [(const_int 0)]))]
8822 UNSPEC_SHA1H))]
8823 "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8824 "sha1h\\t%s0, %s1"
8825 [(set_attr "type" "crypto_sha1_fast")]
8826 )
8827
8828 (define_insn "aarch64_be_crypto_sha1hv4si"
8829 [(set (match_operand:SI 0 "register_operand" "=w")
8830 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8831 (parallel [(const_int 3)]))]
8832 UNSPEC_SHA1H))]
8833 "TARGET_SHA2 && BYTES_BIG_ENDIAN"
8834 "sha1h\\t%s0, %s1"
8835 [(set_attr "type" "crypto_sha1_fast")]
8836 )
8837
8838 (define_insn "aarch64_crypto_sha1su1v4si"
8839 [(set (match_operand:V4SI 0 "register_operand" "=w")
8840 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8841 (match_operand:V4SI 2 "register_operand" "w")]
8842 UNSPEC_SHA1SU1))]
8843 "TARGET_SHA2"
8844 "sha1su1\\t%0.4s, %2.4s"
8845 [(set_attr "type" "crypto_sha1_fast")]
8846 )
8847
8848 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8849 [(set (match_operand:V4SI 0 "register_operand" "=w")
8850 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8851 (match_operand:SI 2 "register_operand" "w")
8852 (match_operand:V4SI 3 "register_operand" "w")]
8853 CRYPTO_SHA1))]
8854 "TARGET_SHA2"
8855 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8856 [(set_attr "type" "crypto_sha1_slow")]
8857 )
8858
8859 (define_insn "aarch64_crypto_sha1su0v4si"
8860 [(set (match_operand:V4SI 0 "register_operand" "=w")
8861 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8862 (match_operand:V4SI 2 "register_operand" "w")
8863 (match_operand:V4SI 3 "register_operand" "w")]
8864 UNSPEC_SHA1SU0))]
8865 "TARGET_SHA2"
8866 "sha1su0\\t%0.4s, %2.4s, %3.4s"
8867 [(set_attr "type" "crypto_sha1_xor")]
8868 )
8869
8870 ;; sha256
8871
8872 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8873 [(set (match_operand:V4SI 0 "register_operand" "=w")
8874 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8875 (match_operand:V4SI 2 "register_operand" "w")
8876 (match_operand:V4SI 3 "register_operand" "w")]
8877 CRYPTO_SHA256))]
8878 "TARGET_SHA2"
8879 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8880 [(set_attr "type" "crypto_sha256_slow")]
8881 )
8882
8883 (define_insn "aarch64_crypto_sha256su0v4si"
8884 [(set (match_operand:V4SI 0 "register_operand" "=w")
8885 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8886 (match_operand:V4SI 2 "register_operand" "w")]
8887 UNSPEC_SHA256SU0))]
8888 "TARGET_SHA2"
8889 "sha256su0\\t%0.4s, %2.4s"
8890 [(set_attr "type" "crypto_sha256_fast")]
8891 )
8892
8893 (define_insn "aarch64_crypto_sha256su1v4si"
8894 [(set (match_operand:V4SI 0 "register_operand" "=w")
8895 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8896 (match_operand:V4SI 2 "register_operand" "w")
8897 (match_operand:V4SI 3 "register_operand" "w")]
8898 UNSPEC_SHA256SU1))]
8899 "TARGET_SHA2"
8900 "sha256su1\\t%0.4s, %2.4s, %3.4s"
8901 [(set_attr "type" "crypto_sha256_slow")]
8902 )
8903
8904 ;; sha512
8905
8906 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8907 [(set (match_operand:V2DI 0 "register_operand" "=w")
8908 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8909 (match_operand:V2DI 2 "register_operand" "w")
8910 (match_operand:V2DI 3 "register_operand" "w")]
8911 CRYPTO_SHA512))]
8912 "TARGET_SHA3"
8913 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8914 [(set_attr "type" "crypto_sha512")]
8915 )
8916
8917 (define_insn "aarch64_crypto_sha512su0qv2di"
8918 [(set (match_operand:V2DI 0 "register_operand" "=w")
8919 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8920 (match_operand:V2DI 2 "register_operand" "w")]
8921 UNSPEC_SHA512SU0))]
8922 "TARGET_SHA3"
8923 "sha512su0\\t%0.2d, %2.2d"
8924 [(set_attr "type" "crypto_sha512")]
8925 )
8926
8927 (define_insn "aarch64_crypto_sha512su1qv2di"
8928 [(set (match_operand:V2DI 0 "register_operand" "=w")
8929 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8930 (match_operand:V2DI 2 "register_operand" "w")
8931 (match_operand:V2DI 3 "register_operand" "w")]
8932 UNSPEC_SHA512SU1))]
8933 "TARGET_SHA3"
8934 "sha512su1\\t%0.2d, %2.2d, %3.2d"
8935 [(set_attr "type" "crypto_sha512")]
8936 )
8937
8938 ;; sha3
8939
8940 (define_insn "eor3q<mode>4"
8941 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8942 (xor:VQ_I
8943 (xor:VQ_I
8944 (match_operand:VQ_I 2 "register_operand" "w")
8945 (match_operand:VQ_I 3 "register_operand" "w"))
8946 (match_operand:VQ_I 1 "register_operand" "w")))]
8947 "TARGET_SHA3"
8948 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
8949 [(set_attr "type" "crypto_sha3")]
8950 )
8951
8952 (define_insn "aarch64_rax1qv2di"
8953 [(set (match_operand:V2DI 0 "register_operand" "=w")
8954 (xor:V2DI
8955 (rotate:V2DI
8956 (match_operand:V2DI 2 "register_operand" "w")
8957 (const_int 1))
8958 (match_operand:V2DI 1 "register_operand" "w")))]
8959 "TARGET_SHA3"
8960 "rax1\\t%0.2d, %1.2d, %2.2d"
8961 [(set_attr "type" "crypto_sha3")]
8962 )
8963
8964 (define_insn "aarch64_xarqv2di"
8965 [(set (match_operand:V2DI 0 "register_operand" "=w")
8966 (rotatert:V2DI
8967 (xor:V2DI
8968 (match_operand:V2DI 1 "register_operand" "%w")
8969 (match_operand:V2DI 2 "register_operand" "w"))
8970 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
8971 "TARGET_SHA3"
8972 "xar\\t%0.2d, %1.2d, %2.2d, %3"
8973 [(set_attr "type" "crypto_sha3")]
8974 )
8975
8976 (define_insn "bcaxq<mode>4"
8977 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8978 (xor:VQ_I
8979 (and:VQ_I
8980 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
8981 (match_operand:VQ_I 2 "register_operand" "w"))
8982 (match_operand:VQ_I 1 "register_operand" "w")))]
8983 "TARGET_SHA3"
8984 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
8985 [(set_attr "type" "crypto_sha3")]
8986 )
8987
8988 ;; SM3
8989
8990 (define_insn "aarch64_sm3ss1qv4si"
8991 [(set (match_operand:V4SI 0 "register_operand" "=w")
8992 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8993 (match_operand:V4SI 2 "register_operand" "w")
8994 (match_operand:V4SI 3 "register_operand" "w")]
8995 UNSPEC_SM3SS1))]
8996 "TARGET_SM4"
8997 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
8998 [(set_attr "type" "crypto_sm3")]
8999 )
9000
9001
9002 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9003 [(set (match_operand:V4SI 0 "register_operand" "=w")
9004 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9005 (match_operand:V4SI 2 "register_operand" "w")
9006 (match_operand:V4SI 3 "register_operand" "w")
9007 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
9008 CRYPTO_SM3TT))]
9009 "TARGET_SM4"
9010 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
9011 [(set_attr "type" "crypto_sm3")]
9012 )
9013
9014 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9015 [(set (match_operand:V4SI 0 "register_operand" "=w")
9016 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9017 (match_operand:V4SI 2 "register_operand" "w")
9018 (match_operand:V4SI 3 "register_operand" "w")]
9019 CRYPTO_SM3PART))]
9020 "TARGET_SM4"
9021 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
9022 [(set_attr "type" "crypto_sm3")]
9023 )
9024
9025 ;; SM4
9026
9027 (define_insn "aarch64_sm4eqv4si"
9028 [(set (match_operand:V4SI 0 "register_operand" "=w")
9029 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9030 (match_operand:V4SI 2 "register_operand" "w")]
9031 UNSPEC_SM4E))]
9032 "TARGET_SM4"
9033 "sm4e\\t%0.4s, %2.4s"
9034 [(set_attr "type" "crypto_sm4")]
9035 )
9036
9037 (define_insn "aarch64_sm4ekeyqv4si"
9038 [(set (match_operand:V4SI 0 "register_operand" "=w")
9039 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9040 (match_operand:V4SI 2 "register_operand" "w")]
9041 UNSPEC_SM4EKEY))]
9042 "TARGET_SM4"
9043 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
9044 [(set_attr "type" "crypto_sm4")]
9045 )
9046
9047 ;; fp16fml
9048
9049 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9050 [(set (match_operand:VDQSF 0 "register_operand")
9051 (unspec:VDQSF
9052 [(match_operand:VDQSF 1 "register_operand")
9053 (match_operand:<VFMLA_W> 2 "register_operand")
9054 (match_operand:<VFMLA_W> 3 "register_operand")]
9055 VFMLA16_LOW))]
9056 "TARGET_F16FML"
9057 {
9058 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9059 <nunits> * 2, false);
9060 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9061 <nunits> * 2, false);
9062
9063 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
9064 operands[1],
9065 operands[2],
9066 operands[3],
9067 p1, p2));
9068 DONE;
9069
9070 })
9071
9072 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9073 [(set (match_operand:VDQSF 0 "register_operand")
9074 (unspec:VDQSF
9075 [(match_operand:VDQSF 1 "register_operand")
9076 (match_operand:<VFMLA_W> 2 "register_operand")
9077 (match_operand:<VFMLA_W> 3 "register_operand")]
9078 VFMLA16_HIGH))]
9079 "TARGET_F16FML"
9080 {
9081 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9082 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9083
9084 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
9085 operands[1],
9086 operands[2],
9087 operands[3],
9088 p1, p2));
9089 DONE;
9090 })
9091
9092 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9093 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9094 (fma:VDQSF
9095 (float_extend:VDQSF
9096 (vec_select:<VFMLA_SEL_W>
9097 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9098 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
9099 (float_extend:VDQSF
9100 (vec_select:<VFMLA_SEL_W>
9101 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9102 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9103 (match_operand:VDQSF 1 "register_operand" "0")))]
9104 "TARGET_F16FML"
9105 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9106 [(set_attr "type" "neon_fp_mul_s")]
9107 )
9108
9109 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9110 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9111 (fma:VDQSF
9112 (float_extend:VDQSF
9113 (neg:<VFMLA_SEL_W>
9114 (vec_select:<VFMLA_SEL_W>
9115 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9116 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
9117 (float_extend:VDQSF
9118 (vec_select:<VFMLA_SEL_W>
9119 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9120 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9121 (match_operand:VDQSF 1 "register_operand" "0")))]
9122 "TARGET_F16FML"
9123 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9124 [(set_attr "type" "neon_fp_mul_s")]
9125 )
9126
9127 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9128 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9129 (fma:VDQSF
9130 (float_extend:VDQSF
9131 (vec_select:<VFMLA_SEL_W>
9132 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9133 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
9134 (float_extend:VDQSF
9135 (vec_select:<VFMLA_SEL_W>
9136 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9137 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9138 (match_operand:VDQSF 1 "register_operand" "0")))]
9139 "TARGET_F16FML"
9140 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9141 [(set_attr "type" "neon_fp_mul_s")]
9142 )
9143
9144 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9145 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9146 (fma:VDQSF
9147 (float_extend:VDQSF
9148 (neg:<VFMLA_SEL_W>
9149 (vec_select:<VFMLA_SEL_W>
9150 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9151 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
9152 (float_extend:VDQSF
9153 (vec_select:<VFMLA_SEL_W>
9154 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9155 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9156 (match_operand:VDQSF 1 "register_operand" "0")))]
9157 "TARGET_F16FML"
9158 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9159 [(set_attr "type" "neon_fp_mul_s")]
9160 )
9161
9162 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9163 [(set (match_operand:V2SF 0 "register_operand")
9164 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9165 (match_operand:V4HF 2 "register_operand")
9166 (match_operand:V4HF 3 "register_operand")
9167 (match_operand:SI 4 "aarch64_imm2")]
9168 VFMLA16_LOW))]
9169 "TARGET_F16FML"
9170 {
9171 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9172 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9173
9174 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
9175 operands[1],
9176 operands[2],
9177 operands[3],
9178 p1, lane));
9179 DONE;
9180 }
9181 )
9182
9183 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9184 [(set (match_operand:V2SF 0 "register_operand")
9185 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9186 (match_operand:V4HF 2 "register_operand")
9187 (match_operand:V4HF 3 "register_operand")
9188 (match_operand:SI 4 "aarch64_imm2")]
9189 VFMLA16_HIGH))]
9190 "TARGET_F16FML"
9191 {
9192 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9193 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9194
9195 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
9196 operands[1],
9197 operands[2],
9198 operands[3],
9199 p1, lane));
9200 DONE;
9201 })
9202
9203 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9204 [(set (match_operand:V2SF 0 "register_operand" "=w")
9205 (fma:V2SF
9206 (float_extend:V2SF
9207 (vec_select:V2HF
9208 (match_operand:V4HF 2 "register_operand" "w")
9209 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9210 (float_extend:V2SF
9211 (vec_duplicate:V2HF
9212 (vec_select:HF
9213 (match_operand:V4HF 3 "register_operand" "x")
9214 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9215 (match_operand:V2SF 1 "register_operand" "0")))]
9216 "TARGET_F16FML"
9217 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9218 [(set_attr "type" "neon_fp_mul_s")]
9219 )
9220
9221 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9222 [(set (match_operand:V2SF 0 "register_operand" "=w")
9223 (fma:V2SF
9224 (float_extend:V2SF
9225 (neg:V2HF
9226 (vec_select:V2HF
9227 (match_operand:V4HF 2 "register_operand" "w")
9228 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9229 (float_extend:V2SF
9230 (vec_duplicate:V2HF
9231 (vec_select:HF
9232 (match_operand:V4HF 3 "register_operand" "x")
9233 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9234 (match_operand:V2SF 1 "register_operand" "0")))]
9235 "TARGET_F16FML"
9236 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9237 [(set_attr "type" "neon_fp_mul_s")]
9238 )
9239
9240 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9241 [(set (match_operand:V2SF 0 "register_operand" "=w")
9242 (fma:V2SF
9243 (float_extend:V2SF
9244 (vec_select:V2HF
9245 (match_operand:V4HF 2 "register_operand" "w")
9246 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9247 (float_extend:V2SF
9248 (vec_duplicate:V2HF
9249 (vec_select:HF
9250 (match_operand:V4HF 3 "register_operand" "x")
9251 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9252 (match_operand:V2SF 1 "register_operand" "0")))]
9253 "TARGET_F16FML"
9254 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9255 [(set_attr "type" "neon_fp_mul_s")]
9256 )
9257
9258 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9259 [(set (match_operand:V2SF 0 "register_operand" "=w")
9260 (fma:V2SF
9261 (float_extend:V2SF
9262 (neg:V2HF
9263 (vec_select:V2HF
9264 (match_operand:V4HF 2 "register_operand" "w")
9265 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9266 (float_extend:V2SF
9267 (vec_duplicate:V2HF
9268 (vec_select:HF
9269 (match_operand:V4HF 3 "register_operand" "x")
9270 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9271 (match_operand:V2SF 1 "register_operand" "0")))]
9272 "TARGET_F16FML"
9273 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9274 [(set_attr "type" "neon_fp_mul_s")]
9275 )
9276
9277 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9278 [(set (match_operand:V4SF 0 "register_operand")
9279 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9280 (match_operand:V8HF 2 "register_operand")
9281 (match_operand:V8HF 3 "register_operand")
9282 (match_operand:SI 4 "aarch64_lane_imm3")]
9283 VFMLA16_LOW))]
9284 "TARGET_F16FML"
9285 {
9286 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9287 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9288
9289 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
9290 operands[1],
9291 operands[2],
9292 operands[3],
9293 p1, lane));
9294 DONE;
9295 })
9296
9297 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9298 [(set (match_operand:V4SF 0 "register_operand")
9299 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9300 (match_operand:V8HF 2 "register_operand")
9301 (match_operand:V8HF 3 "register_operand")
9302 (match_operand:SI 4 "aarch64_lane_imm3")]
9303 VFMLA16_HIGH))]
9304 "TARGET_F16FML"
9305 {
9306 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9307 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9308
9309 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
9310 operands[1],
9311 operands[2],
9312 operands[3],
9313 p1, lane));
9314 DONE;
9315 })
9316
9317 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9318 [(set (match_operand:V4SF 0 "register_operand" "=w")
9319 (fma:V4SF
9320 (float_extend:V4SF
9321 (vec_select:V4HF
9322 (match_operand:V8HF 2 "register_operand" "w")
9323 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9324 (float_extend:V4SF
9325 (vec_duplicate:V4HF
9326 (vec_select:HF
9327 (match_operand:V8HF 3 "register_operand" "x")
9328 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9329 (match_operand:V4SF 1 "register_operand" "0")))]
9330 "TARGET_F16FML"
9331 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9332 [(set_attr "type" "neon_fp_mul_s")]
9333 )
9334
9335 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9336 [(set (match_operand:V4SF 0 "register_operand" "=w")
9337 (fma:V4SF
9338 (float_extend:V4SF
9339 (neg:V4HF
9340 (vec_select:V4HF
9341 (match_operand:V8HF 2 "register_operand" "w")
9342 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9343 (float_extend:V4SF
9344 (vec_duplicate:V4HF
9345 (vec_select:HF
9346 (match_operand:V8HF 3 "register_operand" "x")
9347 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9348 (match_operand:V4SF 1 "register_operand" "0")))]
9349 "TARGET_F16FML"
9350 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9351 [(set_attr "type" "neon_fp_mul_s")]
9352 )
9353
9354 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9355 [(set (match_operand:V4SF 0 "register_operand" "=w")
9356 (fma:V4SF
9357 (float_extend:V4SF
9358 (vec_select:V4HF
9359 (match_operand:V8HF 2 "register_operand" "w")
9360 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9361 (float_extend:V4SF
9362 (vec_duplicate:V4HF
9363 (vec_select:HF
9364 (match_operand:V8HF 3 "register_operand" "x")
9365 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9366 (match_operand:V4SF 1 "register_operand" "0")))]
9367 "TARGET_F16FML"
9368 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9369 [(set_attr "type" "neon_fp_mul_s")]
9370 )
9371
9372 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9373 [(set (match_operand:V4SF 0 "register_operand" "=w")
9374 (fma:V4SF
9375 (float_extend:V4SF
9376 (neg:V4HF
9377 (vec_select:V4HF
9378 (match_operand:V8HF 2 "register_operand" "w")
9379 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9380 (float_extend:V4SF
9381 (vec_duplicate:V4HF
9382 (vec_select:HF
9383 (match_operand:V8HF 3 "register_operand" "x")
9384 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9385 (match_operand:V4SF 1 "register_operand" "0")))]
9386 "TARGET_F16FML"
9387 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9388 [(set_attr "type" "neon_fp_mul_s")]
9389 )
9390
9391 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9392 [(set (match_operand:V2SF 0 "register_operand")
9393 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9394 (match_operand:V4HF 2 "register_operand")
9395 (match_operand:V8HF 3 "register_operand")
9396 (match_operand:SI 4 "aarch64_lane_imm3")]
9397 VFMLA16_LOW))]
9398 "TARGET_F16FML"
9399 {
9400 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9401 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9402
9403 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
9404 operands[1],
9405 operands[2],
9406 operands[3],
9407 p1, lane));
9408 DONE;
9409
9410 })
9411
9412 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9413 [(set (match_operand:V2SF 0 "register_operand")
9414 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9415 (match_operand:V4HF 2 "register_operand")
9416 (match_operand:V8HF 3 "register_operand")
9417 (match_operand:SI 4 "aarch64_lane_imm3")]
9418 VFMLA16_HIGH))]
9419 "TARGET_F16FML"
9420 {
9421 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9422 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9423
9424 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
9425 operands[1],
9426 operands[2],
9427 operands[3],
9428 p1, lane));
9429 DONE;
9430
9431 })
9432
9433 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9434 [(set (match_operand:V2SF 0 "register_operand" "=w")
9435 (fma:V2SF
9436 (float_extend:V2SF
9437 (vec_select:V2HF
9438 (match_operand:V4HF 2 "register_operand" "w")
9439 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9440 (float_extend:V2SF
9441 (vec_duplicate:V2HF
9442 (vec_select:HF
9443 (match_operand:V8HF 3 "register_operand" "x")
9444 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9445 (match_operand:V2SF 1 "register_operand" "0")))]
9446 "TARGET_F16FML"
9447 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9448 [(set_attr "type" "neon_fp_mul_s")]
9449 )
9450
9451 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9452 [(set (match_operand:V2SF 0 "register_operand" "=w")
9453 (fma:V2SF
9454 (float_extend:V2SF
9455 (neg:V2HF
9456 (vec_select:V2HF
9457 (match_operand:V4HF 2 "register_operand" "w")
9458 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9459 (float_extend:V2SF
9460 (vec_duplicate:V2HF
9461 (vec_select:HF
9462 (match_operand:V8HF 3 "register_operand" "x")
9463 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9464 (match_operand:V2SF 1 "register_operand" "0")))]
9465 "TARGET_F16FML"
9466 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9467 [(set_attr "type" "neon_fp_mul_s")]
9468 )
9469
9470 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9471 [(set (match_operand:V2SF 0 "register_operand" "=w")
9472 (fma:V2SF
9473 (float_extend:V2SF
9474 (vec_select:V2HF
9475 (match_operand:V4HF 2 "register_operand" "w")
9476 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9477 (float_extend:V2SF
9478 (vec_duplicate:V2HF
9479 (vec_select:HF
9480 (match_operand:V8HF 3 "register_operand" "x")
9481 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9482 (match_operand:V2SF 1 "register_operand" "0")))]
9483 "TARGET_F16FML"
9484 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9485 [(set_attr "type" "neon_fp_mul_s")]
9486 )
9487
9488 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9489 [(set (match_operand:V2SF 0 "register_operand" "=w")
9490 (fma:V2SF
9491 (float_extend:V2SF
9492 (neg:V2HF
9493 (vec_select:V2HF
9494 (match_operand:V4HF 2 "register_operand" "w")
9495 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9496 (float_extend:V2SF
9497 (vec_duplicate:V2HF
9498 (vec_select:HF
9499 (match_operand:V8HF 3 "register_operand" "x")
9500 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9501 (match_operand:V2SF 1 "register_operand" "0")))]
9502 "TARGET_F16FML"
9503 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9504 [(set_attr "type" "neon_fp_mul_s")]
9505 )
9506
9507 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9508 [(set (match_operand:V4SF 0 "register_operand")
9509 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9510 (match_operand:V8HF 2 "register_operand")
9511 (match_operand:V4HF 3 "register_operand")
9512 (match_operand:SI 4 "aarch64_imm2")]
9513 VFMLA16_LOW))]
9514 "TARGET_F16FML"
9515 {
9516 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9517 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9518
9519 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
9520 operands[1],
9521 operands[2],
9522 operands[3],
9523 p1, lane));
9524 DONE;
9525 })
9526
9527 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9528 [(set (match_operand:V4SF 0 "register_operand")
9529 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9530 (match_operand:V8HF 2 "register_operand")
9531 (match_operand:V4HF 3 "register_operand")
9532 (match_operand:SI 4 "aarch64_imm2")]
9533 VFMLA16_HIGH))]
9534 "TARGET_F16FML"
9535 {
9536 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9537 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9538
9539 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
9540 operands[1],
9541 operands[2],
9542 operands[3],
9543 p1, lane));
9544 DONE;
9545 })
9546
9547 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9548 [(set (match_operand:V4SF 0 "register_operand" "=w")
9549 (fma:V4SF
9550 (float_extend:V4SF
9551 (vec_select:V4HF
9552 (match_operand:V8HF 2 "register_operand" "w")
9553 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9554 (float_extend:V4SF
9555 (vec_duplicate:V4HF
9556 (vec_select:HF
9557 (match_operand:V4HF 3 "register_operand" "x")
9558 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9559 (match_operand:V4SF 1 "register_operand" "0")))]
9560 "TARGET_F16FML"
9561 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9562 [(set_attr "type" "neon_fp_mul_s")]
9563 )
9564
9565 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9566 [(set (match_operand:V4SF 0 "register_operand" "=w")
9567 (fma:V4SF
9568 (float_extend:V4SF
9569 (neg:V4HF
9570 (vec_select:V4HF
9571 (match_operand:V8HF 2 "register_operand" "w")
9572 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9573 (float_extend:V4SF
9574 (vec_duplicate:V4HF
9575 (vec_select:HF
9576 (match_operand:V4HF 3 "register_operand" "x")
9577 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9578 (match_operand:V4SF 1 "register_operand" "0")))]
9579 "TARGET_F16FML"
9580 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9581 [(set_attr "type" "neon_fp_mul_s")]
9582 )
9583
9584 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9585 [(set (match_operand:V4SF 0 "register_operand" "=w")
9586 (fma:V4SF
9587 (float_extend:V4SF
9588 (vec_select:V4HF
9589 (match_operand:V8HF 2 "register_operand" "w")
9590 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9591 (float_extend:V4SF
9592 (vec_duplicate:V4HF
9593 (vec_select:HF
9594 (match_operand:V4HF 3 "register_operand" "x")
9595 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9596 (match_operand:V4SF 1 "register_operand" "0")))]
9597 "TARGET_F16FML"
9598 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9599 [(set_attr "type" "neon_fp_mul_s")]
9600 )
9601
9602 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9603 [(set (match_operand:V4SF 0 "register_operand" "=w")
9604 (fma:V4SF
9605 (float_extend:V4SF
9606 (neg:V4HF
9607 (vec_select:V4HF
9608 (match_operand:V8HF 2 "register_operand" "w")
9609 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9610 (float_extend:V4SF
9611 (vec_duplicate:V4HF
9612 (vec_select:HF
9613 (match_operand:V4HF 3 "register_operand" "x")
9614 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9615 (match_operand:V4SF 1 "register_operand" "0")))]
9616 "TARGET_F16FML"
9617 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9618 [(set_attr "type" "neon_fp_mul_s")]
9619 )
9620
9621 ;; pmull
9622
9623 (define_insn "aarch64_crypto_pmulldi"
9624 [(set (match_operand:TI 0 "register_operand" "=w")
9625 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
9626 (match_operand:DI 2 "register_operand" "w")]
9627 UNSPEC_PMULL))]
9628 "TARGET_AES"
9629 "pmull\\t%0.1q, %1.1d, %2.1d"
9630 [(set_attr "type" "crypto_pmull")]
9631 )
9632
9633 (define_insn "aarch64_crypto_pmullv2di"
9634 [(set (match_operand:TI 0 "register_operand" "=w")
9635 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
9636 (match_operand:V2DI 2 "register_operand" "w")]
9637 UNSPEC_PMULL2))]
9638 "TARGET_AES"
9639 "pmull2\\t%0.1q, %1.2d, %2.2d"
9640 [(set_attr "type" "crypto_pmull")]
9641 )
9642
9643 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
9644 (define_insn "<optab><Vnarrowq><mode>2"
9645 [(set (match_operand:VQN 0 "register_operand" "=w")
9646 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9647 "TARGET_SIMD"
9648 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
9649 [(set_attr "type" "neon_shift_imm_long")]
9650 )
9651
9652 (define_expand "aarch64_<su>xtl<mode>"
9653 [(set (match_operand:VQN 0 "register_operand" "=w")
9654 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9655 "TARGET_SIMD"
9656 ""
9657 )
9658
9659 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9660 (define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
9661 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9662 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9663 "TARGET_SIMD"
9664 "xtn\t%0.<Vntype>, %1.<Vtype>"
9665 [(set_attr "type" "neon_move_narrow_q")]
9666 )
9667
9668 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9669 ;; trunc optab.
9670 (define_expand "aarch64_xtn<mode>"
9671 [(set (match_operand:<VNARROWQ> 0 "register_operand")
9672 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
9673 "TARGET_SIMD"
9674 {}
9675 )
9676
9677 (define_insn "aarch64_bfdot<mode>"
9678 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9679 (plus:VDQSF
9680 (unspec:VDQSF
9681 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9682 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9683 UNSPEC_BFDOT)
9684 (match_operand:VDQSF 1 "register_operand" "0")))]
9685 "TARGET_BF16_SIMD"
9686 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9687 [(set_attr "type" "neon_dot<q>")]
9688 )
9689
9690 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9691 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9692 (plus:VDQSF
9693 (unspec:VDQSF
9694 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9695 (match_operand:VBF 3 "register_operand" "w")
9696 (match_operand:SI 4 "const_int_operand" "n")]
9697 UNSPEC_BFDOT)
9698 (match_operand:VDQSF 1 "register_operand" "0")))]
9699 "TARGET_BF16_SIMD"
9700 {
9701 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9702 int lane = INTVAL (operands[4]);
9703 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9704 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9705 }
9706 [(set_attr "type" "neon_dot<VDQSF:q>")]
9707 )
9708
9709 ;; vget_low/high_bf16
9710 (define_expand "aarch64_vget_lo_halfv8bf"
9711 [(match_operand:V4BF 0 "register_operand")
9712 (match_operand:V8BF 1 "register_operand")]
9713 "TARGET_BF16_SIMD"
9714 {
9715 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9716 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9717 DONE;
9718 })
9719
9720 (define_expand "aarch64_vget_hi_halfv8bf"
9721 [(match_operand:V4BF 0 "register_operand")
9722 (match_operand:V8BF 1 "register_operand")]
9723 "TARGET_BF16_SIMD"
9724 {
9725 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9726 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9727 DONE;
9728 })
9729
9730 ;; bfmmla
9731 (define_insn "aarch64_bfmmlaqv4sf"
9732 [(set (match_operand:V4SF 0 "register_operand" "=w")
9733 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9734 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9735 (match_operand:V8BF 3 "register_operand" "w")]
9736 UNSPEC_BFMMLA)))]
9737 "TARGET_BF16_SIMD"
9738 "bfmmla\\t%0.4s, %2.8h, %3.8h"
9739 [(set_attr "type" "neon_fp_mla_s_q")]
9740 )
9741
9742 ;; bfmlal<bt>
9743 (define_insn "aarch64_bfmlal<bt>v4sf"
9744 [(set (match_operand:V4SF 0 "register_operand" "=w")
9745 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9746 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9747 (match_operand:V8BF 3 "register_operand" "w")]
9748 BF_MLA)))]
9749 "TARGET_BF16_SIMD"
9750 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9751 [(set_attr "type" "neon_fp_mla_s_q")]
9752 )
9753
9754 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9755 [(set (match_operand:V4SF 0 "register_operand" "=w")
9756 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9757 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9758 (match_operand:VBF 3 "register_operand" "x")
9759 (match_operand:SI 4 "const_int_operand" "n")]
9760 BF_MLA)))]
9761 "TARGET_BF16_SIMD"
9762 {
9763 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9764 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9765 }
9766 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9767 )
9768
9769 ;; 8-bit integer matrix multiply-accumulate
9770 (define_insn "aarch64_simd_<sur>mmlav16qi"
9771 [(set (match_operand:V4SI 0 "register_operand" "=w")
9772 (plus:V4SI
9773 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9774 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9775 (match_operand:V4SI 1 "register_operand" "0")))]
9776 "TARGET_I8MM"
9777 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9778 [(set_attr "type" "neon_mla_s_q")]
9779 )
9780
9781 ;; bfcvtn
9782 (define_insn "aarch64_bfcvtn<q><mode>"
9783 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9784 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9785 UNSPEC_BFCVTN))]
9786 "TARGET_BF16_SIMD"
9787 "bfcvtn\\t%0.4h, %1.4s"
9788 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9789 )
9790
9791 (define_insn "aarch64_bfcvtn2v8bf"
9792 [(set (match_operand:V8BF 0 "register_operand" "=w")
9793 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9794 (match_operand:V4SF 2 "register_operand" "w")]
9795 UNSPEC_BFCVTN2))]
9796 "TARGET_BF16_SIMD"
9797 "bfcvtn2\\t%0.8h, %2.4s"
9798 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9799 )
9800
9801 (define_insn "aarch64_bfcvtbf"
9802 [(set (match_operand:BF 0 "register_operand" "=w")
9803 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9804 UNSPEC_BFCVT))]
9805 "TARGET_BF16_FP"
9806 "bfcvt\\t%h0, %s1"
9807 [(set_attr "type" "f_cvt")]
9808 )
9809
9810 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9811 (define_insn "aarch64_vbfcvt<mode>"
9812 [(set (match_operand:V4SF 0 "register_operand" "=w")
9813 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9814 UNSPEC_BFCVTN))]
9815 "TARGET_BF16_SIMD"
9816 "shll\\t%0.4s, %1.4h, #16"
9817 [(set_attr "type" "neon_shift_imm_long")]
9818 )
9819
9820 (define_insn "aarch64_vbfcvt_highv8bf"
9821 [(set (match_operand:V4SF 0 "register_operand" "=w")
9822 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9823 UNSPEC_BFCVTN2))]
9824 "TARGET_BF16_SIMD"
9825 "shll2\\t%0.4s, %1.8h, #16"
9826 [(set_attr "type" "neon_shift_imm_long")]
9827 )
9828
9829 (define_insn "aarch64_bfcvtsf"
9830 [(set (match_operand:SF 0 "register_operand" "=w")
9831 (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9832 UNSPEC_BFCVT))]
9833 "TARGET_BF16_FP"
9834 "shl\\t%d0, %d1, #16"
9835 [(set_attr "type" "neon_shift_imm")]
9836 )
This page took 0.489657 seconds and 5 git commands to generate.