]> gcc.gnu.org Git - gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
add intrinsics for vld1(q)_x4 and vst1(q)_x4
[gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umq, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<mode>"
181 [(set (match_operand:VD 0 "register_operand" "=w")
182 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:VD 2 "register_operand" "=w")
184 (match_operand:VD 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "store_pair<mode>"
195 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:VD 1 "register_operand" "w"))
197 (set (match_operand:VD 2 "memory_operand" "=m")
198 (match_operand:VD 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_split
209 [(set (match_operand:VQ 0 "register_operand" "")
210 (match_operand:VQ 1 "register_operand" ""))]
211 "TARGET_SIMD && reload_completed
212 && GP_REGNUM_P (REGNO (operands[0]))
213 && GP_REGNUM_P (REGNO (operands[1]))"
214 [(const_int 0)]
215 {
216 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
217 DONE;
218 })
219
220 (define_split
221 [(set (match_operand:VQ 0 "register_operand" "")
222 (match_operand:VQ 1 "register_operand" ""))]
223 "TARGET_SIMD && reload_completed
224 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
225 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
226 [(const_int 0)]
227 {
228 aarch64_split_simd_move (operands[0], operands[1]);
229 DONE;
230 })
231
232 (define_expand "aarch64_split_simd_mov<mode>"
233 [(set (match_operand:VQ 0)
234 (match_operand:VQ 1))]
235 "TARGET_SIMD"
236 {
237 rtx dst = operands[0];
238 rtx src = operands[1];
239
240 if (GP_REGNUM_P (REGNO (src)))
241 {
242 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
243 rtx src_high_part = gen_highpart (<VHALF>mode, src);
244
245 emit_insn
246 (gen_move_lo_quad_<mode> (dst, src_low_part));
247 emit_insn
248 (gen_move_hi_quad_<mode> (dst, src_high_part));
249 }
250
251 else
252 {
253 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
254 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
255 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
256 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
257
258 emit_insn
259 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
260 emit_insn
261 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
262 }
263 DONE;
264 }
265 )
266
267 (define_insn "aarch64_simd_mov_from_<mode>low"
268 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
269 (vec_select:<VHALF>
270 (match_operand:VQ 1 "register_operand" "w")
271 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
272 "TARGET_SIMD && reload_completed"
273 "umov\t%0, %1.d[0]"
274 [(set_attr "type" "neon_to_gp<q>")
275 (set_attr "length" "4")
276 ])
277
278 (define_insn "aarch64_simd_mov_from_<mode>high"
279 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
280 (vec_select:<VHALF>
281 (match_operand:VQ 1 "register_operand" "w")
282 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
283 "TARGET_SIMD && reload_completed"
284 "umov\t%0, %1.d[1]"
285 [(set_attr "type" "neon_to_gp<q>")
286 (set_attr "length" "4")
287 ])
288
289 (define_insn "orn<mode>3"
290 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
291 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
292 (match_operand:VDQ_I 2 "register_operand" "w")))]
293 "TARGET_SIMD"
294 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
295 [(set_attr "type" "neon_logic<q>")]
296 )
297
298 (define_insn "bic<mode>3"
299 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
300 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
301 (match_operand:VDQ_I 2 "register_operand" "w")))]
302 "TARGET_SIMD"
303 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
304 [(set_attr "type" "neon_logic<q>")]
305 )
306
307 (define_insn "add<mode>3"
308 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
309 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
310 (match_operand:VDQ_I 2 "register_operand" "w")))]
311 "TARGET_SIMD"
312 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
313 [(set_attr "type" "neon_add<q>")]
314 )
315
316 (define_insn "sub<mode>3"
317 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
318 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
319 (match_operand:VDQ_I 2 "register_operand" "w")))]
320 "TARGET_SIMD"
321 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
322 [(set_attr "type" "neon_sub<q>")]
323 )
324
325 (define_insn "mul<mode>3"
326 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
327 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
328 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
329 "TARGET_SIMD"
330 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
331 [(set_attr "type" "neon_mul_<Vetype><q>")]
332 )
333
334 (define_insn "bswap<mode>2"
335 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
336 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
337 "TARGET_SIMD"
338 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
339 [(set_attr "type" "neon_rev<q>")]
340 )
341
342 (define_insn "aarch64_rbit<mode>"
343 [(set (match_operand:VB 0 "register_operand" "=w")
344 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
345 UNSPEC_RBIT))]
346 "TARGET_SIMD"
347 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
348 [(set_attr "type" "neon_rbit")]
349 )
350
351 (define_expand "ctz<mode>2"
352 [(set (match_operand:VS 0 "register_operand")
353 (ctz:VS (match_operand:VS 1 "register_operand")))]
354 "TARGET_SIMD"
355 {
356 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
357 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
358 <MODE>mode, 0);
359 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
360 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
361 DONE;
362 }
363 )
364
365 (define_expand "xorsign<mode>3"
366 [(match_operand:VHSDF 0 "register_operand")
367 (match_operand:VHSDF 1 "register_operand")
368 (match_operand:VHSDF 2 "register_operand")]
369 "TARGET_SIMD"
370 {
371
372 machine_mode imode = <V_INT_EQUIV>mode;
373 rtx v_bitmask = gen_reg_rtx (imode);
374 rtx op1x = gen_reg_rtx (imode);
375 rtx op2x = gen_reg_rtx (imode);
376
377 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
378 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
379
380 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
381
382 emit_move_insn (v_bitmask,
383 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
384 HOST_WIDE_INT_M1U << bits));
385
386 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
387 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
388 emit_move_insn (operands[0],
389 lowpart_subreg (<MODE>mode, op1x, imode));
390 DONE;
391 }
392 )
393
394 ;; These instructions map to the __builtins for the Dot Product operations.
395 (define_insn "aarch64_<sur>dot<vsi2qi>"
396 [(set (match_operand:VS 0 "register_operand" "=w")
397 (plus:VS (match_operand:VS 1 "register_operand" "0")
398 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
399 (match_operand:<VSI2QI> 3 "register_operand" "w")]
400 DOTPROD)))]
401 "TARGET_DOTPROD"
402 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
403 [(set_attr "type" "neon_dot")]
404 )
405
406 ;; These expands map to the Dot Product optab the vectorizer checks for.
407 ;; The auto-vectorizer expects a dot product builtin that also does an
408 ;; accumulation into the provided register.
409 ;; Given the following pattern
410 ;;
411 ;; for (i=0; i<len; i++) {
412 ;; c = a[i] * b[i];
413 ;; r += c;
414 ;; }
415 ;; return result;
416 ;;
417 ;; This can be auto-vectorized to
418 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
419 ;;
420 ;; given enough iterations. However the vectorizer can keep unrolling the loop
421 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
422 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
423 ;; ...
424 ;;
425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
426 (define_expand "<sur>dot_prod<vsi2qi>"
427 [(set (match_operand:VS 0 "register_operand")
428 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
429 (match_operand:<VSI2QI> 2 "register_operand")]
430 DOTPROD)
431 (match_operand:VS 3 "register_operand")))]
432 "TARGET_DOTPROD"
433 {
434 emit_insn (
435 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
436 operands[2]));
437 emit_insn (gen_rtx_SET (operands[0], operands[3]));
438 DONE;
439 })
440
441 ;; These instructions map to the __builtins for the Dot Product
442 ;; indexed operations.
443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
444 [(set (match_operand:VS 0 "register_operand" "=w")
445 (plus:VS (match_operand:VS 1 "register_operand" "0")
446 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
447 (match_operand:V8QI 3 "register_operand" "<h_con>")
448 (match_operand:SI 4 "immediate_operand" "i")]
449 DOTPROD)))]
450 "TARGET_DOTPROD"
451 {
452 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
453 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
454 }
455 [(set_attr "type" "neon_dot")]
456 )
457
458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
459 [(set (match_operand:VS 0 "register_operand" "=w")
460 (plus:VS (match_operand:VS 1 "register_operand" "0")
461 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
462 (match_operand:V16QI 3 "register_operand" "<h_con>")
463 (match_operand:SI 4 "immediate_operand" "i")]
464 DOTPROD)))]
465 "TARGET_DOTPROD"
466 {
467 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
468 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
469 }
470 [(set_attr "type" "neon_dot")]
471 )
472
473 (define_expand "copysign<mode>3"
474 [(match_operand:VHSDF 0 "register_operand")
475 (match_operand:VHSDF 1 "register_operand")
476 (match_operand:VHSDF 2 "register_operand")]
477 "TARGET_FLOAT && TARGET_SIMD"
478 {
479 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
480 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
481
482 emit_move_insn (v_bitmask,
483 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
484 HOST_WIDE_INT_M1U << bits));
485 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
486 operands[2], operands[1]));
487 DONE;
488 }
489 )
490
491 (define_insn "*aarch64_mul3_elt<mode>"
492 [(set (match_operand:VMUL 0 "register_operand" "=w")
493 (mult:VMUL
494 (vec_duplicate:VMUL
495 (vec_select:<VEL>
496 (match_operand:VMUL 1 "register_operand" "<h_con>")
497 (parallel [(match_operand:SI 2 "immediate_operand")])))
498 (match_operand:VMUL 3 "register_operand" "w")))]
499 "TARGET_SIMD"
500 {
501 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
502 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
503 }
504 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
505 )
506
507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
508 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
509 (mult:VMUL_CHANGE_NLANES
510 (vec_duplicate:VMUL_CHANGE_NLANES
511 (vec_select:<VEL>
512 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
513 (parallel [(match_operand:SI 2 "immediate_operand")])))
514 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
515 "TARGET_SIMD"
516 {
517 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
518 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
519 }
520 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
521 )
522
523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
524 [(set (match_operand:VMUL 0 "register_operand" "=w")
525 (mult:VMUL
526 (vec_duplicate:VMUL
527 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
528 (match_operand:VMUL 2 "register_operand" "w")))]
529 "TARGET_SIMD"
530 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
531 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
532 )
533
534 (define_insn "aarch64_rsqrte<mode>"
535 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
536 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
537 UNSPEC_RSQRTE))]
538 "TARGET_SIMD"
539 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
540 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
541
542 (define_insn "aarch64_rsqrts<mode>"
543 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
544 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
545 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
546 UNSPEC_RSQRTS))]
547 "TARGET_SIMD"
548 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
549 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
550
551 (define_expand "rsqrt<mode>2"
552 [(set (match_operand:VALLF 0 "register_operand" "=w")
553 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
554 UNSPEC_RSQRT))]
555 "TARGET_SIMD"
556 {
557 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
558 DONE;
559 })
560
561 (define_insn "*aarch64_mul3_elt_to_64v2df"
562 [(set (match_operand:DF 0 "register_operand" "=w")
563 (mult:DF
564 (vec_select:DF
565 (match_operand:V2DF 1 "register_operand" "w")
566 (parallel [(match_operand:SI 2 "immediate_operand")]))
567 (match_operand:DF 3 "register_operand" "w")))]
568 "TARGET_SIMD"
569 {
570 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
571 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
572 }
573 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
574 )
575
576 (define_insn "neg<mode>2"
577 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
579 "TARGET_SIMD"
580 "neg\t%0.<Vtype>, %1.<Vtype>"
581 [(set_attr "type" "neon_neg<q>")]
582 )
583
584 (define_insn "abs<mode>2"
585 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
586 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
587 "TARGET_SIMD"
588 "abs\t%0.<Vtype>, %1.<Vtype>"
589 [(set_attr "type" "neon_abs<q>")]
590 )
591
592 ;; The intrinsic version of integer ABS must not be allowed to
593 ;; combine with any operation with an integerated ABS step, such
594 ;; as SABD.
595 (define_insn "aarch64_abs<mode>"
596 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
597 (unspec:VSDQ_I_DI
598 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
599 UNSPEC_ABS))]
600 "TARGET_SIMD"
601 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
602 [(set_attr "type" "neon_abs<q>")]
603 )
604
605 (define_insn "abd<mode>_3"
606 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
607 (abs:VDQ_BHSI (minus:VDQ_BHSI
608 (match_operand:VDQ_BHSI 1 "register_operand" "w")
609 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
610 "TARGET_SIMD"
611 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
612 [(set_attr "type" "neon_abd<q>")]
613 )
614
615 (define_insn "aba<mode>_3"
616 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
617 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
618 (match_operand:VDQ_BHSI 1 "register_operand" "w")
619 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
620 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
621 "TARGET_SIMD"
622 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
623 [(set_attr "type" "neon_arith_acc<q>")]
624 )
625
626 (define_insn "fabd<mode>3"
627 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
628 (abs:VHSDF_HSDF
629 (minus:VHSDF_HSDF
630 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
631 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
632 "TARGET_SIMD"
633 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
634 [(set_attr "type" "neon_fp_abd_<stype><q>")]
635 )
636
637 ;; For AND (vector, register) and BIC (vector, immediate)
638 (define_insn "and<mode>3"
639 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
640 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
641 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
642 "TARGET_SIMD"
643 {
644 switch (which_alternative)
645 {
646 case 0:
647 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
648 case 1:
649 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
650 AARCH64_CHECK_BIC);
651 default:
652 gcc_unreachable ();
653 }
654 }
655 [(set_attr "type" "neon_logic<q>")]
656 )
657
658 ;; For ORR (vector, register) and ORR (vector, immediate)
659 (define_insn "ior<mode>3"
660 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
661 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
662 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
663 "TARGET_SIMD"
664 {
665 switch (which_alternative)
666 {
667 case 0:
668 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
669 case 1:
670 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
671 AARCH64_CHECK_ORR);
672 default:
673 gcc_unreachable ();
674 }
675 }
676 [(set_attr "type" "neon_logic<q>")]
677 )
678
679 (define_insn "xor<mode>3"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
682 (match_operand:VDQ_I 2 "register_operand" "w")))]
683 "TARGET_SIMD"
684 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
685 [(set_attr "type" "neon_logic<q>")]
686 )
687
688 (define_insn "one_cmpl<mode>2"
689 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
690 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
691 "TARGET_SIMD"
692 "not\t%0.<Vbtype>, %1.<Vbtype>"
693 [(set_attr "type" "neon_logic<q>")]
694 )
695
696 (define_insn "aarch64_simd_vec_set<mode>"
697 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
698 (vec_merge:VDQ_BHSI
699 (vec_duplicate:VDQ_BHSI
700 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
701 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
702 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
703 "TARGET_SIMD"
704 {
705 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
706 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
707 switch (which_alternative)
708 {
709 case 0:
710 return "ins\\t%0.<Vetype>[%p2], %w1";
711 case 1:
712 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
713 case 2:
714 return "ld1\\t{%0.<Vetype>}[%p2], %1";
715 default:
716 gcc_unreachable ();
717 }
718 }
719 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
720 )
721
722 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
723 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
724 (vec_merge:VALL_F16
725 (vec_duplicate:VALL_F16
726 (vec_select:<VEL>
727 (match_operand:VALL_F16 3 "register_operand" "w")
728 (parallel
729 [(match_operand:SI 4 "immediate_operand" "i")])))
730 (match_operand:VALL_F16 1 "register_operand" "0")
731 (match_operand:SI 2 "immediate_operand" "i")))]
732 "TARGET_SIMD"
733 {
734 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
735 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
736 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
737
738 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
739 }
740 [(set_attr "type" "neon_ins<q>")]
741 )
742
743 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
744 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
745 (vec_merge:VALL_F16_NO_V2Q
746 (vec_duplicate:VALL_F16_NO_V2Q
747 (vec_select:<VEL>
748 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
749 (parallel
750 [(match_operand:SI 4 "immediate_operand" "i")])))
751 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
752 (match_operand:SI 2 "immediate_operand" "i")))]
753 "TARGET_SIMD"
754 {
755 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
756 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
757 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
758 INTVAL (operands[4]));
759
760 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
761 }
762 [(set_attr "type" "neon_ins<q>")]
763 )
764
765 (define_insn "aarch64_simd_lshr<mode>"
766 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
767 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
768 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
769 "TARGET_SIMD"
770 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
771 [(set_attr "type" "neon_shift_imm<q>")]
772 )
773
774 (define_insn "aarch64_simd_ashr<mode>"
775 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
776 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
777 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
778 "TARGET_SIMD"
779 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
780 [(set_attr "type" "neon_shift_imm<q>")]
781 )
782
783 (define_insn "aarch64_simd_imm_shl<mode>"
784 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
785 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
786 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
787 "TARGET_SIMD"
788 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
789 [(set_attr "type" "neon_shift_imm<q>")]
790 )
791
792 (define_insn "aarch64_simd_reg_sshl<mode>"
793 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
794 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
795 (match_operand:VDQ_I 2 "register_operand" "w")))]
796 "TARGET_SIMD"
797 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
798 [(set_attr "type" "neon_shift_reg<q>")]
799 )
800
801 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
802 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
803 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
804 (match_operand:VDQ_I 2 "register_operand" "w")]
805 UNSPEC_ASHIFT_UNSIGNED))]
806 "TARGET_SIMD"
807 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
808 [(set_attr "type" "neon_shift_reg<q>")]
809 )
810
811 (define_insn "aarch64_simd_reg_shl<mode>_signed"
812 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
813 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
814 (match_operand:VDQ_I 2 "register_operand" "w")]
815 UNSPEC_ASHIFT_SIGNED))]
816 "TARGET_SIMD"
817 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
818 [(set_attr "type" "neon_shift_reg<q>")]
819 )
820
821 (define_expand "ashl<mode>3"
822 [(match_operand:VDQ_I 0 "register_operand" "")
823 (match_operand:VDQ_I 1 "register_operand" "")
824 (match_operand:SI 2 "general_operand" "")]
825 "TARGET_SIMD"
826 {
827 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
828 int shift_amount;
829
830 if (CONST_INT_P (operands[2]))
831 {
832 shift_amount = INTVAL (operands[2]);
833 if (shift_amount >= 0 && shift_amount < bit_width)
834 {
835 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
836 shift_amount);
837 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
838 operands[1],
839 tmp));
840 DONE;
841 }
842 else
843 {
844 operands[2] = force_reg (SImode, operands[2]);
845 }
846 }
847 else if (MEM_P (operands[2]))
848 {
849 operands[2] = force_reg (SImode, operands[2]);
850 }
851
852 if (REG_P (operands[2]))
853 {
854 rtx tmp = gen_reg_rtx (<MODE>mode);
855 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
856 convert_to_mode (<VEL>mode,
857 operands[2],
858 0)));
859 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
860 tmp));
861 DONE;
862 }
863 else
864 FAIL;
865 }
866 )
867
868 (define_expand "lshr<mode>3"
869 [(match_operand:VDQ_I 0 "register_operand" "")
870 (match_operand:VDQ_I 1 "register_operand" "")
871 (match_operand:SI 2 "general_operand" "")]
872 "TARGET_SIMD"
873 {
874 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
875 int shift_amount;
876
877 if (CONST_INT_P (operands[2]))
878 {
879 shift_amount = INTVAL (operands[2]);
880 if (shift_amount > 0 && shift_amount <= bit_width)
881 {
882 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
883 shift_amount);
884 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
885 operands[1],
886 tmp));
887 DONE;
888 }
889 else
890 operands[2] = force_reg (SImode, operands[2]);
891 }
892 else if (MEM_P (operands[2]))
893 {
894 operands[2] = force_reg (SImode, operands[2]);
895 }
896
897 if (REG_P (operands[2]))
898 {
899 rtx tmp = gen_reg_rtx (SImode);
900 rtx tmp1 = gen_reg_rtx (<MODE>mode);
901 emit_insn (gen_negsi2 (tmp, operands[2]));
902 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
903 convert_to_mode (<VEL>mode,
904 tmp, 0)));
905 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
906 operands[1],
907 tmp1));
908 DONE;
909 }
910 else
911 FAIL;
912 }
913 )
914
915 (define_expand "ashr<mode>3"
916 [(match_operand:VDQ_I 0 "register_operand" "")
917 (match_operand:VDQ_I 1 "register_operand" "")
918 (match_operand:SI 2 "general_operand" "")]
919 "TARGET_SIMD"
920 {
921 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
922 int shift_amount;
923
924 if (CONST_INT_P (operands[2]))
925 {
926 shift_amount = INTVAL (operands[2]);
927 if (shift_amount > 0 && shift_amount <= bit_width)
928 {
929 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
930 shift_amount);
931 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
932 operands[1],
933 tmp));
934 DONE;
935 }
936 else
937 operands[2] = force_reg (SImode, operands[2]);
938 }
939 else if (MEM_P (operands[2]))
940 {
941 operands[2] = force_reg (SImode, operands[2]);
942 }
943
944 if (REG_P (operands[2]))
945 {
946 rtx tmp = gen_reg_rtx (SImode);
947 rtx tmp1 = gen_reg_rtx (<MODE>mode);
948 emit_insn (gen_negsi2 (tmp, operands[2]));
949 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
950 convert_to_mode (<VEL>mode,
951 tmp, 0)));
952 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
953 operands[1],
954 tmp1));
955 DONE;
956 }
957 else
958 FAIL;
959 }
960 )
961
962 (define_expand "vashl<mode>3"
963 [(match_operand:VDQ_I 0 "register_operand" "")
964 (match_operand:VDQ_I 1 "register_operand" "")
965 (match_operand:VDQ_I 2 "register_operand" "")]
966 "TARGET_SIMD"
967 {
968 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
969 operands[2]));
970 DONE;
971 })
972
973 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
974 ;; Negating individual lanes most certainly offsets the
975 ;; gain from vectorization.
976 (define_expand "vashr<mode>3"
977 [(match_operand:VDQ_BHSI 0 "register_operand" "")
978 (match_operand:VDQ_BHSI 1 "register_operand" "")
979 (match_operand:VDQ_BHSI 2 "register_operand" "")]
980 "TARGET_SIMD"
981 {
982 rtx neg = gen_reg_rtx (<MODE>mode);
983 emit (gen_neg<mode>2 (neg, operands[2]));
984 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
985 neg));
986 DONE;
987 })
988
989 ;; DI vector shift
990 (define_expand "aarch64_ashr_simddi"
991 [(match_operand:DI 0 "register_operand" "=w")
992 (match_operand:DI 1 "register_operand" "w")
993 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
994 "TARGET_SIMD"
995 {
996 /* An arithmetic shift right by 64 fills the result with copies of the sign
997 bit, just like asr by 63 - however the standard pattern does not handle
998 a shift by 64. */
999 if (INTVAL (operands[2]) == 64)
1000 operands[2] = GEN_INT (63);
1001 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1002 DONE;
1003 }
1004 )
1005
1006 (define_expand "vlshr<mode>3"
1007 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1008 (match_operand:VDQ_BHSI 1 "register_operand" "")
1009 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1010 "TARGET_SIMD"
1011 {
1012 rtx neg = gen_reg_rtx (<MODE>mode);
1013 emit (gen_neg<mode>2 (neg, operands[2]));
1014 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1015 neg));
1016 DONE;
1017 })
1018
1019 (define_expand "aarch64_lshr_simddi"
1020 [(match_operand:DI 0 "register_operand" "=w")
1021 (match_operand:DI 1 "register_operand" "w")
1022 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1023 "TARGET_SIMD"
1024 {
1025 if (INTVAL (operands[2]) == 64)
1026 emit_move_insn (operands[0], const0_rtx);
1027 else
1028 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1029 DONE;
1030 }
1031 )
1032
1033 (define_expand "vec_set<mode>"
1034 [(match_operand:VDQ_BHSI 0 "register_operand")
1035 (match_operand:<VEL> 1 "register_operand")
1036 (match_operand:SI 2 "immediate_operand")]
1037 "TARGET_SIMD"
1038 {
1039 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1040 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1041 GEN_INT (elem), operands[0]));
1042 DONE;
1043 }
1044 )
1045
1046 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1047 (define_insn "vec_shr_<mode>"
1048 [(set (match_operand:VD 0 "register_operand" "=w")
1049 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1050 (match_operand:SI 2 "immediate_operand" "i")]
1051 UNSPEC_VEC_SHR))]
1052 "TARGET_SIMD"
1053 {
1054 if (BYTES_BIG_ENDIAN)
1055 return "shl %d0, %d1, %2";
1056 else
1057 return "ushr %d0, %d1, %2";
1058 }
1059 [(set_attr "type" "neon_shift_imm")]
1060 )
1061
1062 (define_insn "aarch64_simd_vec_setv2di"
1063 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1064 (vec_merge:V2DI
1065 (vec_duplicate:V2DI
1066 (match_operand:DI 1 "register_operand" "r,w"))
1067 (match_operand:V2DI 3 "register_operand" "0,0")
1068 (match_operand:SI 2 "immediate_operand" "i,i")))]
1069 "TARGET_SIMD"
1070 {
1071 int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1072 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1073 switch (which_alternative)
1074 {
1075 case 0:
1076 return "ins\\t%0.d[%p2], %1";
1077 case 1:
1078 return "ins\\t%0.d[%p2], %1.d[0]";
1079 default:
1080 gcc_unreachable ();
1081 }
1082 }
1083 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1084 )
1085
1086 (define_expand "vec_setv2di"
1087 [(match_operand:V2DI 0 "register_operand")
1088 (match_operand:DI 1 "register_operand")
1089 (match_operand:SI 2 "immediate_operand")]
1090 "TARGET_SIMD"
1091 {
1092 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1093 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1094 GEN_INT (elem), operands[0]));
1095 DONE;
1096 }
1097 )
1098
1099 (define_insn "aarch64_simd_vec_set<mode>"
1100 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1101 (vec_merge:VDQF_F16
1102 (vec_duplicate:VDQF_F16
1103 (match_operand:<VEL> 1 "register_operand" "w"))
1104 (match_operand:VDQF_F16 3 "register_operand" "0")
1105 (match_operand:SI 2 "immediate_operand" "i")))]
1106 "TARGET_SIMD"
1107 {
1108 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1109
1110 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1111 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1112 }
1113 [(set_attr "type" "neon_ins<q>")]
1114 )
1115
1116 (define_expand "vec_set<mode>"
1117 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1118 (match_operand:<VEL> 1 "register_operand" "w")
1119 (match_operand:SI 2 "immediate_operand" "")]
1120 "TARGET_SIMD"
1121 {
1122 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1123 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1124 GEN_INT (elem), operands[0]));
1125 DONE;
1126 }
1127 )
1128
1129
1130 (define_insn "aarch64_mla<mode>"
1131 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1132 (plus:VDQ_BHSI (mult:VDQ_BHSI
1133 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1134 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1135 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1136 "TARGET_SIMD"
1137 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1138 [(set_attr "type" "neon_mla_<Vetype><q>")]
1139 )
1140
1141 (define_insn "*aarch64_mla_elt<mode>"
1142 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1143 (plus:VDQHS
1144 (mult:VDQHS
1145 (vec_duplicate:VDQHS
1146 (vec_select:<VEL>
1147 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1148 (parallel [(match_operand:SI 2 "immediate_operand")])))
1149 (match_operand:VDQHS 3 "register_operand" "w"))
1150 (match_operand:VDQHS 4 "register_operand" "0")))]
1151 "TARGET_SIMD"
1152 {
1153 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1154 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1155 }
1156 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1157 )
1158
1159 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1160 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1161 (plus:VDQHS
1162 (mult:VDQHS
1163 (vec_duplicate:VDQHS
1164 (vec_select:<VEL>
1165 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1166 (parallel [(match_operand:SI 2 "immediate_operand")])))
1167 (match_operand:VDQHS 3 "register_operand" "w"))
1168 (match_operand:VDQHS 4 "register_operand" "0")))]
1169 "TARGET_SIMD"
1170 {
1171 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1172 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1173 }
1174 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1175 )
1176
1177 (define_insn "*aarch64_mla_elt_merge<mode>"
1178 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1179 (plus:VDQHS
1180 (mult:VDQHS (vec_duplicate:VDQHS
1181 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1182 (match_operand:VDQHS 2 "register_operand" "w"))
1183 (match_operand:VDQHS 3 "register_operand" "0")))]
1184 "TARGET_SIMD"
1185 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1186 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1187 )
1188
1189 (define_insn "aarch64_mls<mode>"
1190 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1191 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1192 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1193 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1194 "TARGET_SIMD"
1195 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1196 [(set_attr "type" "neon_mla_<Vetype><q>")]
1197 )
1198
1199 (define_insn "*aarch64_mls_elt<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201 (minus:VDQHS
1202 (match_operand:VDQHS 4 "register_operand" "0")
1203 (mult:VDQHS
1204 (vec_duplicate:VDQHS
1205 (vec_select:<VEL>
1206 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1207 (parallel [(match_operand:SI 2 "immediate_operand")])))
1208 (match_operand:VDQHS 3 "register_operand" "w"))))]
1209 "TARGET_SIMD"
1210 {
1211 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1212 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1213 }
1214 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1215 )
1216
1217 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1218 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1219 (minus:VDQHS
1220 (match_operand:VDQHS 4 "register_operand" "0")
1221 (mult:VDQHS
1222 (vec_duplicate:VDQHS
1223 (vec_select:<VEL>
1224 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1225 (parallel [(match_operand:SI 2 "immediate_operand")])))
1226 (match_operand:VDQHS 3 "register_operand" "w"))))]
1227 "TARGET_SIMD"
1228 {
1229 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1230 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1231 }
1232 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1233 )
1234
1235 (define_insn "*aarch64_mls_elt_merge<mode>"
1236 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1237 (minus:VDQHS
1238 (match_operand:VDQHS 1 "register_operand" "0")
1239 (mult:VDQHS (vec_duplicate:VDQHS
1240 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1241 (match_operand:VDQHS 3 "register_operand" "w"))))]
1242 "TARGET_SIMD"
1243 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1244 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1245 )
1246
1247 ;; Max/Min operations.
1248 (define_insn "<su><maxmin><mode>3"
1249 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1250 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1251 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1252 "TARGET_SIMD"
1253 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1254 [(set_attr "type" "neon_minmax<q>")]
1255 )
1256
1257 (define_expand "<su><maxmin>v2di3"
1258 [(set (match_operand:V2DI 0 "register_operand" "")
1259 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1260 (match_operand:V2DI 2 "register_operand" "")))]
1261 "TARGET_SIMD"
1262 {
1263 enum rtx_code cmp_operator;
1264 rtx cmp_fmt;
1265
1266 switch (<CODE>)
1267 {
1268 case UMIN:
1269 cmp_operator = LTU;
1270 break;
1271 case SMIN:
1272 cmp_operator = LT;
1273 break;
1274 case UMAX:
1275 cmp_operator = GTU;
1276 break;
1277 case SMAX:
1278 cmp_operator = GT;
1279 break;
1280 default:
1281 gcc_unreachable ();
1282 }
1283
1284 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1285 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1286 operands[2], cmp_fmt, operands[1], operands[2]));
1287 DONE;
1288 })
1289
1290 ;; Pairwise Integer Max/Min operations.
1291 (define_insn "aarch64_<maxmin_uns>p<mode>"
1292 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1293 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1294 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1295 MAXMINV))]
1296 "TARGET_SIMD"
1297 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1298 [(set_attr "type" "neon_minmax<q>")]
1299 )
1300
1301 ;; Pairwise FP Max/Min operations.
1302 (define_insn "aarch64_<maxmin_uns>p<mode>"
1303 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1304 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1305 (match_operand:VHSDF 2 "register_operand" "w")]
1306 FMAXMINV))]
1307 "TARGET_SIMD"
1308 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1309 [(set_attr "type" "neon_minmax<q>")]
1310 )
1311
1312 ;; vec_concat gives a new vector with the low elements from operand 1, and
1313 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1314 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1315 ;; What that means, is that the RTL descriptions of the below patterns
1316 ;; need to change depending on endianness.
1317
1318 ;; Move to the low architectural bits of the register.
1319 ;; On little-endian this is { operand, zeroes }
1320 ;; On big-endian this is { zeroes, operand }
1321
1322 (define_insn "move_lo_quad_internal_<mode>"
1323 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1324 (vec_concat:VQ_NO2E
1325 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1326 (vec_duplicate:<VHALF> (const_int 0))))]
1327 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1328 "@
1329 dup\\t%d0, %1.d[0]
1330 fmov\\t%d0, %1
1331 dup\\t%d0, %1"
1332 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1333 (set_attr "simd" "yes,*,yes")
1334 (set_attr "fp" "*,yes,*")
1335 (set_attr "length" "4")]
1336 )
1337
1338 (define_insn "move_lo_quad_internal_<mode>"
1339 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1340 (vec_concat:VQ_2E
1341 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1342 (const_int 0)))]
1343 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1344 "@
1345 dup\\t%d0, %1.d[0]
1346 fmov\\t%d0, %1
1347 dup\\t%d0, %1"
1348 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1349 (set_attr "simd" "yes,*,yes")
1350 (set_attr "fp" "*,yes,*")
1351 (set_attr "length" "4")]
1352 )
1353
1354 (define_insn "move_lo_quad_internal_be_<mode>"
1355 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1356 (vec_concat:VQ_NO2E
1357 (vec_duplicate:<VHALF> (const_int 0))
1358 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1359 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1360 "@
1361 dup\\t%d0, %1.d[0]
1362 fmov\\t%d0, %1
1363 dup\\t%d0, %1"
1364 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1365 (set_attr "simd" "yes,*,yes")
1366 (set_attr "fp" "*,yes,*")
1367 (set_attr "length" "4")]
1368 )
1369
1370 (define_insn "move_lo_quad_internal_be_<mode>"
1371 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1372 (vec_concat:VQ_2E
1373 (const_int 0)
1374 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1375 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1376 "@
1377 dup\\t%d0, %1.d[0]
1378 fmov\\t%d0, %1
1379 dup\\t%d0, %1"
1380 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1381 (set_attr "simd" "yes,*,yes")
1382 (set_attr "fp" "*,yes,*")
1383 (set_attr "length" "4")]
1384 )
1385
1386 (define_expand "move_lo_quad_<mode>"
1387 [(match_operand:VQ 0 "register_operand")
1388 (match_operand:VQ 1 "register_operand")]
1389 "TARGET_SIMD"
1390 {
1391 if (BYTES_BIG_ENDIAN)
1392 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1393 else
1394 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1395 DONE;
1396 }
1397 )
1398
1399 ;; Move operand1 to the high architectural bits of the register, keeping
1400 ;; the low architectural bits of operand2.
1401 ;; For little-endian this is { operand2, operand1 }
1402 ;; For big-endian this is { operand1, operand2 }
1403
1404 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1405 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1406 (vec_concat:VQ
1407 (vec_select:<VHALF>
1408 (match_dup 0)
1409 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1410 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1411 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1412 "@
1413 ins\\t%0.d[1], %1.d[0]
1414 ins\\t%0.d[1], %1"
1415 [(set_attr "type" "neon_ins")]
1416 )
1417
1418 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1419 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1420 (vec_concat:VQ
1421 (match_operand:<VHALF> 1 "register_operand" "w,r")
1422 (vec_select:<VHALF>
1423 (match_dup 0)
1424 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1425 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1426 "@
1427 ins\\t%0.d[1], %1.d[0]
1428 ins\\t%0.d[1], %1"
1429 [(set_attr "type" "neon_ins")]
1430 )
1431
1432 (define_expand "move_hi_quad_<mode>"
1433 [(match_operand:VQ 0 "register_operand" "")
1434 (match_operand:<VHALF> 1 "register_operand" "")]
1435 "TARGET_SIMD"
1436 {
1437 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1438 if (BYTES_BIG_ENDIAN)
1439 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1440 operands[1], p));
1441 else
1442 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1443 operands[1], p));
1444 DONE;
1445 })
1446
1447 ;; Narrowing operations.
1448
1449 ;; For doubles.
1450 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1451 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1452 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1453 "TARGET_SIMD"
1454 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1455 [(set_attr "type" "neon_shift_imm_narrow_q")]
1456 )
1457
1458 (define_expand "vec_pack_trunc_<mode>"
1459 [(match_operand:<VNARROWD> 0 "register_operand" "")
1460 (match_operand:VDN 1 "register_operand" "")
1461 (match_operand:VDN 2 "register_operand" "")]
1462 "TARGET_SIMD"
1463 {
1464 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1465 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1466 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1467
1468 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1469 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1470 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1471 DONE;
1472 })
1473
1474 ;; For quads.
1475
1476 (define_insn "vec_pack_trunc_<mode>"
1477 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1478 (vec_concat:<VNARROWQ2>
1479 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1480 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1481 "TARGET_SIMD"
1482 {
1483 if (BYTES_BIG_ENDIAN)
1484 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1485 else
1486 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1487 }
1488 [(set_attr "type" "multiple")
1489 (set_attr "length" "8")]
1490 )
1491
1492 ;; Widening operations.
1493
1494 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1495 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1496 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1497 (match_operand:VQW 1 "register_operand" "w")
1498 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1499 )))]
1500 "TARGET_SIMD"
1501 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1502 [(set_attr "type" "neon_shift_imm_long")]
1503 )
1504
1505 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1506 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1507 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1508 (match_operand:VQW 1 "register_operand" "w")
1509 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1510 )))]
1511 "TARGET_SIMD"
1512 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1513 [(set_attr "type" "neon_shift_imm_long")]
1514 )
1515
1516 (define_expand "vec_unpack<su>_hi_<mode>"
1517 [(match_operand:<VWIDE> 0 "register_operand" "")
1518 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1519 "TARGET_SIMD"
1520 {
1521 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1522 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1523 operands[1], p));
1524 DONE;
1525 }
1526 )
1527
1528 (define_expand "vec_unpack<su>_lo_<mode>"
1529 [(match_operand:<VWIDE> 0 "register_operand" "")
1530 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1531 "TARGET_SIMD"
1532 {
1533 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1534 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1535 operands[1], p));
1536 DONE;
1537 }
1538 )
1539
1540 ;; Widening arithmetic.
1541
1542 (define_insn "*aarch64_<su>mlal_lo<mode>"
1543 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1544 (plus:<VWIDE>
1545 (mult:<VWIDE>
1546 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1547 (match_operand:VQW 2 "register_operand" "w")
1548 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1549 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1550 (match_operand:VQW 4 "register_operand" "w")
1551 (match_dup 3))))
1552 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1553 "TARGET_SIMD"
1554 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1555 [(set_attr "type" "neon_mla_<Vetype>_long")]
1556 )
1557
1558 (define_insn "*aarch64_<su>mlal_hi<mode>"
1559 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1560 (plus:<VWIDE>
1561 (mult:<VWIDE>
1562 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1563 (match_operand:VQW 2 "register_operand" "w")
1564 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1565 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1566 (match_operand:VQW 4 "register_operand" "w")
1567 (match_dup 3))))
1568 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1569 "TARGET_SIMD"
1570 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1571 [(set_attr "type" "neon_mla_<Vetype>_long")]
1572 )
1573
1574 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1575 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1576 (minus:<VWIDE>
1577 (match_operand:<VWIDE> 1 "register_operand" "0")
1578 (mult:<VWIDE>
1579 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1580 (match_operand:VQW 2 "register_operand" "w")
1581 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1582 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1583 (match_operand:VQW 4 "register_operand" "w")
1584 (match_dup 3))))))]
1585 "TARGET_SIMD"
1586 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1587 [(set_attr "type" "neon_mla_<Vetype>_long")]
1588 )
1589
1590 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1591 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1592 (minus:<VWIDE>
1593 (match_operand:<VWIDE> 1 "register_operand" "0")
1594 (mult:<VWIDE>
1595 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1596 (match_operand:VQW 2 "register_operand" "w")
1597 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1598 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599 (match_operand:VQW 4 "register_operand" "w")
1600 (match_dup 3))))))]
1601 "TARGET_SIMD"
1602 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1603 [(set_attr "type" "neon_mla_<Vetype>_long")]
1604 )
1605
1606 (define_insn "*aarch64_<su>mlal<mode>"
1607 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1608 (plus:<VWIDE>
1609 (mult:<VWIDE>
1610 (ANY_EXTEND:<VWIDE>
1611 (match_operand:VD_BHSI 1 "register_operand" "w"))
1612 (ANY_EXTEND:<VWIDE>
1613 (match_operand:VD_BHSI 2 "register_operand" "w")))
1614 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1615 "TARGET_SIMD"
1616 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1617 [(set_attr "type" "neon_mla_<Vetype>_long")]
1618 )
1619
1620 (define_insn "*aarch64_<su>mlsl<mode>"
1621 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1622 (minus:<VWIDE>
1623 (match_operand:<VWIDE> 1 "register_operand" "0")
1624 (mult:<VWIDE>
1625 (ANY_EXTEND:<VWIDE>
1626 (match_operand:VD_BHSI 2 "register_operand" "w"))
1627 (ANY_EXTEND:<VWIDE>
1628 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1629 "TARGET_SIMD"
1630 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1631 [(set_attr "type" "neon_mla_<Vetype>_long")]
1632 )
1633
1634 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1635 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1636 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1637 (match_operand:VQW 1 "register_operand" "w")
1638 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1639 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1640 (match_operand:VQW 2 "register_operand" "w")
1641 (match_dup 3)))))]
1642 "TARGET_SIMD"
1643 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1644 [(set_attr "type" "neon_mul_<Vetype>_long")]
1645 )
1646
1647 (define_expand "vec_widen_<su>mult_lo_<mode>"
1648 [(match_operand:<VWIDE> 0 "register_operand" "")
1649 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1650 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1651 "TARGET_SIMD"
1652 {
1653 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1654 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1655 operands[1],
1656 operands[2], p));
1657 DONE;
1658 }
1659 )
1660
1661 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1662 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1663 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1664 (match_operand:VQW 1 "register_operand" "w")
1665 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1666 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1667 (match_operand:VQW 2 "register_operand" "w")
1668 (match_dup 3)))))]
1669 "TARGET_SIMD"
1670 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1671 [(set_attr "type" "neon_mul_<Vetype>_long")]
1672 )
1673
1674 (define_expand "vec_widen_<su>mult_hi_<mode>"
1675 [(match_operand:<VWIDE> 0 "register_operand" "")
1676 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1678 "TARGET_SIMD"
1679 {
1680 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1681 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1682 operands[1],
1683 operands[2], p));
1684 DONE;
1685
1686 }
1687 )
1688
1689 ;; FP vector operations.
1690 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1691 ;; double-precision (64-bit) floating-point data types and arithmetic as
1692 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1693 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1694 ;;
1695 ;; Floating-point operations can raise an exception. Vectorizing such
1696 ;; operations are safe because of reasons explained below.
1697 ;;
1698 ;; ARMv8 permits an extension to enable trapped floating-point
1699 ;; exception handling, however this is an optional feature. In the
1700 ;; event of a floating-point exception being raised by vectorised
1701 ;; code then:
1702 ;; 1. If trapped floating-point exceptions are available, then a trap
1703 ;; will be taken when any lane raises an enabled exception. A trap
1704 ;; handler may determine which lane raised the exception.
1705 ;; 2. Alternatively a sticky exception flag is set in the
1706 ;; floating-point status register (FPSR). Software may explicitly
1707 ;; test the exception flags, in which case the tests will either
1708 ;; prevent vectorisation, allowing precise identification of the
1709 ;; failing operation, or if tested outside of vectorisable regions
1710 ;; then the specific operation and lane are not of interest.
1711
1712 ;; FP arithmetic operations.
1713
1714 (define_insn "add<mode>3"
1715 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1716 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1717 (match_operand:VHSDF 2 "register_operand" "w")))]
1718 "TARGET_SIMD"
1719 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1720 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1721 )
1722
1723 (define_insn "sub<mode>3"
1724 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1725 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1726 (match_operand:VHSDF 2 "register_operand" "w")))]
1727 "TARGET_SIMD"
1728 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1729 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1730 )
1731
1732 (define_insn "mul<mode>3"
1733 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1734 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1735 (match_operand:VHSDF 2 "register_operand" "w")))]
1736 "TARGET_SIMD"
1737 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1738 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1739 )
1740
1741 (define_expand "div<mode>3"
1742 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1743 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1744 (match_operand:VHSDF 2 "register_operand" "w")))]
1745 "TARGET_SIMD"
1746 {
1747 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1748 DONE;
1749
1750 operands[1] = force_reg (<MODE>mode, operands[1]);
1751 })
1752
1753 (define_insn "*div<mode>3"
1754 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1755 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1756 (match_operand:VHSDF 2 "register_operand" "w")))]
1757 "TARGET_SIMD"
1758 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1759 [(set_attr "type" "neon_fp_div_<stype><q>")]
1760 )
1761
1762 (define_insn "neg<mode>2"
1763 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1764 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1765 "TARGET_SIMD"
1766 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1767 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1768 )
1769
1770 (define_insn "abs<mode>2"
1771 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1772 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1773 "TARGET_SIMD"
1774 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1775 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1776 )
1777
1778 (define_insn "fma<mode>4"
1779 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1780 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1781 (match_operand:VHSDF 2 "register_operand" "w")
1782 (match_operand:VHSDF 3 "register_operand" "0")))]
1783 "TARGET_SIMD"
1784 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1785 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1786 )
1787
1788 (define_insn "*aarch64_fma4_elt<mode>"
1789 [(set (match_operand:VDQF 0 "register_operand" "=w")
1790 (fma:VDQF
1791 (vec_duplicate:VDQF
1792 (vec_select:<VEL>
1793 (match_operand:VDQF 1 "register_operand" "<h_con>")
1794 (parallel [(match_operand:SI 2 "immediate_operand")])))
1795 (match_operand:VDQF 3 "register_operand" "w")
1796 (match_operand:VDQF 4 "register_operand" "0")))]
1797 "TARGET_SIMD"
1798 {
1799 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1800 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1801 }
1802 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1803 )
1804
1805 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1806 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1807 (fma:VDQSF
1808 (vec_duplicate:VDQSF
1809 (vec_select:<VEL>
1810 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1811 (parallel [(match_operand:SI 2 "immediate_operand")])))
1812 (match_operand:VDQSF 3 "register_operand" "w")
1813 (match_operand:VDQSF 4 "register_operand" "0")))]
1814 "TARGET_SIMD"
1815 {
1816 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1817 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1818 }
1819 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1820 )
1821
1822 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1823 [(set (match_operand:VMUL 0 "register_operand" "=w")
1824 (fma:VMUL
1825 (vec_duplicate:VMUL
1826 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1827 (match_operand:VMUL 2 "register_operand" "w")
1828 (match_operand:VMUL 3 "register_operand" "0")))]
1829 "TARGET_SIMD"
1830 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1831 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1832 )
1833
1834 (define_insn "*aarch64_fma4_elt_to_64v2df"
1835 [(set (match_operand:DF 0 "register_operand" "=w")
1836 (fma:DF
1837 (vec_select:DF
1838 (match_operand:V2DF 1 "register_operand" "w")
1839 (parallel [(match_operand:SI 2 "immediate_operand")]))
1840 (match_operand:DF 3 "register_operand" "w")
1841 (match_operand:DF 4 "register_operand" "0")))]
1842 "TARGET_SIMD"
1843 {
1844 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1845 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1846 }
1847 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1848 )
1849
1850 (define_insn "fnma<mode>4"
1851 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1852 (fma:VHSDF
1853 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1854 (match_operand:VHSDF 2 "register_operand" "w")
1855 (match_operand:VHSDF 3 "register_operand" "0")))]
1856 "TARGET_SIMD"
1857 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1858 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1859 )
1860
1861 (define_insn "*aarch64_fnma4_elt<mode>"
1862 [(set (match_operand:VDQF 0 "register_operand" "=w")
1863 (fma:VDQF
1864 (neg:VDQF
1865 (match_operand:VDQF 3 "register_operand" "w"))
1866 (vec_duplicate:VDQF
1867 (vec_select:<VEL>
1868 (match_operand:VDQF 1 "register_operand" "<h_con>")
1869 (parallel [(match_operand:SI 2 "immediate_operand")])))
1870 (match_operand:VDQF 4 "register_operand" "0")))]
1871 "TARGET_SIMD"
1872 {
1873 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1874 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1875 }
1876 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1877 )
1878
1879 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1880 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1881 (fma:VDQSF
1882 (neg:VDQSF
1883 (match_operand:VDQSF 3 "register_operand" "w"))
1884 (vec_duplicate:VDQSF
1885 (vec_select:<VEL>
1886 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1887 (parallel [(match_operand:SI 2 "immediate_operand")])))
1888 (match_operand:VDQSF 4 "register_operand" "0")))]
1889 "TARGET_SIMD"
1890 {
1891 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1892 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1893 }
1894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1895 )
1896
1897 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1898 [(set (match_operand:VMUL 0 "register_operand" "=w")
1899 (fma:VMUL
1900 (neg:VMUL
1901 (match_operand:VMUL 2 "register_operand" "w"))
1902 (vec_duplicate:VMUL
1903 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1904 (match_operand:VMUL 3 "register_operand" "0")))]
1905 "TARGET_SIMD"
1906 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1907 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1908 )
1909
1910 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1911 [(set (match_operand:DF 0 "register_operand" "=w")
1912 (fma:DF
1913 (vec_select:DF
1914 (match_operand:V2DF 1 "register_operand" "w")
1915 (parallel [(match_operand:SI 2 "immediate_operand")]))
1916 (neg:DF
1917 (match_operand:DF 3 "register_operand" "w"))
1918 (match_operand:DF 4 "register_operand" "0")))]
1919 "TARGET_SIMD"
1920 {
1921 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1922 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1923 }
1924 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1925 )
1926
1927 ;; Vector versions of the floating-point frint patterns.
1928 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1929 (define_insn "<frint_pattern><mode>2"
1930 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1931 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1932 FRINT))]
1933 "TARGET_SIMD"
1934 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1935 [(set_attr "type" "neon_fp_round_<stype><q>")]
1936 )
1937
1938 ;; Vector versions of the fcvt standard patterns.
1939 ;; Expands to lbtrunc, lround, lceil, lfloor
1940 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1941 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1942 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1943 [(match_operand:VHSDF 1 "register_operand" "w")]
1944 FCVT)))]
1945 "TARGET_SIMD"
1946 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1947 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1948 )
1949
1950 ;; HF Scalar variants of related SIMD instructions.
1951 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1952 [(set (match_operand:HI 0 "register_operand" "=w")
1953 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1954 FCVT)))]
1955 "TARGET_SIMD_F16INST"
1956 "fcvt<frint_suffix><su>\t%h0, %h1"
1957 [(set_attr "type" "neon_fp_to_int_s")]
1958 )
1959
1960 (define_insn "<optab>_trunchfhi2"
1961 [(set (match_operand:HI 0 "register_operand" "=w")
1962 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1963 "TARGET_SIMD_F16INST"
1964 "fcvtz<su>\t%h0, %h1"
1965 [(set_attr "type" "neon_fp_to_int_s")]
1966 )
1967
1968 (define_insn "<optab>hihf2"
1969 [(set (match_operand:HF 0 "register_operand" "=w")
1970 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1971 "TARGET_SIMD_F16INST"
1972 "<su_optab>cvtf\t%h0, %h1"
1973 [(set_attr "type" "neon_int_to_fp_s")]
1974 )
1975
1976 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1977 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1978 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1979 [(mult:VDQF
1980 (match_operand:VDQF 1 "register_operand" "w")
1981 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1982 UNSPEC_FRINTZ)))]
1983 "TARGET_SIMD
1984 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1985 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1986 {
1987 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1988 char buf[64];
1989 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1990 output_asm_insn (buf, operands);
1991 return "";
1992 }
1993 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1994 )
1995
1996 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1997 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1998 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1999 [(match_operand:VHSDF 1 "register_operand")]
2000 UNSPEC_FRINTZ)))]
2001 "TARGET_SIMD"
2002 {})
2003
2004 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2005 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2006 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2007 [(match_operand:VHSDF 1 "register_operand")]
2008 UNSPEC_FRINTZ)))]
2009 "TARGET_SIMD"
2010 {})
2011
2012 (define_expand "ftrunc<VHSDF:mode>2"
2013 [(set (match_operand:VHSDF 0 "register_operand")
2014 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2015 UNSPEC_FRINTZ))]
2016 "TARGET_SIMD"
2017 {})
2018
2019 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2020 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2021 (FLOATUORS:VHSDF
2022 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2023 "TARGET_SIMD"
2024 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2025 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2026 )
2027
2028 ;; Conversions between vectors of floats and doubles.
2029 ;; Contains a mix of patterns to match standard pattern names
2030 ;; and those for intrinsics.
2031
2032 ;; Float widening operations.
2033
2034 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2035 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2036 (float_extend:<VWIDE> (vec_select:<VHALF>
2037 (match_operand:VQ_HSF 1 "register_operand" "w")
2038 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2039 )))]
2040 "TARGET_SIMD"
2041 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2042 [(set_attr "type" "neon_fp_cvt_widen_s")]
2043 )
2044
2045 ;; Convert between fixed-point and floating-point (vector modes)
2046
2047 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2048 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2049 (unspec:<VHSDF:FCVT_TARGET>
2050 [(match_operand:VHSDF 1 "register_operand" "w")
2051 (match_operand:SI 2 "immediate_operand" "i")]
2052 FCVT_F2FIXED))]
2053 "TARGET_SIMD"
2054 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2055 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2056 )
2057
2058 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2059 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2060 (unspec:<VDQ_HSDI:FCVT_TARGET>
2061 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2062 (match_operand:SI 2 "immediate_operand" "i")]
2063 FCVT_FIXED2F))]
2064 "TARGET_SIMD"
2065 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2066 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2067 )
2068
2069 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2070 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2071 ;; the meaning of HI and LO changes depending on the target endianness.
2072 ;; While elsewhere we map the higher numbered elements of a vector to
2073 ;; the lower architectural lanes of the vector, for these patterns we want
2074 ;; to always treat "hi" as referring to the higher architectural lanes.
2075 ;; Consequently, while the patterns below look inconsistent with our
2076 ;; other big-endian patterns their behavior is as required.
2077
2078 (define_expand "vec_unpacks_lo_<mode>"
2079 [(match_operand:<VWIDE> 0 "register_operand" "")
2080 (match_operand:VQ_HSF 1 "register_operand" "")]
2081 "TARGET_SIMD"
2082 {
2083 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2084 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2085 operands[1], p));
2086 DONE;
2087 }
2088 )
2089
2090 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2091 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2092 (float_extend:<VWIDE> (vec_select:<VHALF>
2093 (match_operand:VQ_HSF 1 "register_operand" "w")
2094 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2095 )))]
2096 "TARGET_SIMD"
2097 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2098 [(set_attr "type" "neon_fp_cvt_widen_s")]
2099 )
2100
2101 (define_expand "vec_unpacks_hi_<mode>"
2102 [(match_operand:<VWIDE> 0 "register_operand" "")
2103 (match_operand:VQ_HSF 1 "register_operand" "")]
2104 "TARGET_SIMD"
2105 {
2106 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2107 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2108 operands[1], p));
2109 DONE;
2110 }
2111 )
2112 (define_insn "aarch64_float_extend_lo_<Vwide>"
2113 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114 (float_extend:<VWIDE>
2115 (match_operand:VDF 1 "register_operand" "w")))]
2116 "TARGET_SIMD"
2117 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2118 [(set_attr "type" "neon_fp_cvt_widen_s")]
2119 )
2120
2121 ;; Float narrowing operations.
2122
2123 (define_insn "aarch64_float_truncate_lo_<mode>"
2124 [(set (match_operand:VDF 0 "register_operand" "=w")
2125 (float_truncate:VDF
2126 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2127 "TARGET_SIMD"
2128 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2129 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2130 )
2131
2132 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2133 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2134 (vec_concat:<VDBL>
2135 (match_operand:VDF 1 "register_operand" "0")
2136 (float_truncate:VDF
2137 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2138 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2139 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2140 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2141 )
2142
2143 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2144 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2145 (vec_concat:<VDBL>
2146 (float_truncate:VDF
2147 (match_operand:<VWIDE> 2 "register_operand" "w"))
2148 (match_operand:VDF 1 "register_operand" "0")))]
2149 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2150 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2151 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2152 )
2153
2154 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2155 [(match_operand:<VDBL> 0 "register_operand" "=w")
2156 (match_operand:VDF 1 "register_operand" "0")
2157 (match_operand:<VWIDE> 2 "register_operand" "w")]
2158 "TARGET_SIMD"
2159 {
2160 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2161 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2162 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2163 emit_insn (gen (operands[0], operands[1], operands[2]));
2164 DONE;
2165 }
2166 )
2167
2168 (define_expand "vec_pack_trunc_v2df"
2169 [(set (match_operand:V4SF 0 "register_operand")
2170 (vec_concat:V4SF
2171 (float_truncate:V2SF
2172 (match_operand:V2DF 1 "register_operand"))
2173 (float_truncate:V2SF
2174 (match_operand:V2DF 2 "register_operand"))
2175 ))]
2176 "TARGET_SIMD"
2177 {
2178 rtx tmp = gen_reg_rtx (V2SFmode);
2179 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2180 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2181
2182 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2183 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2184 tmp, operands[hi]));
2185 DONE;
2186 }
2187 )
2188
2189 (define_expand "vec_pack_trunc_df"
2190 [(set (match_operand:V2SF 0 "register_operand")
2191 (vec_concat:V2SF
2192 (float_truncate:SF
2193 (match_operand:DF 1 "register_operand"))
2194 (float_truncate:SF
2195 (match_operand:DF 2 "register_operand"))
2196 ))]
2197 "TARGET_SIMD"
2198 {
2199 rtx tmp = gen_reg_rtx (V2SFmode);
2200 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2201 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2202
2203 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2204 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2205 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2206 DONE;
2207 }
2208 )
2209
2210 ;; FP Max/Min
2211 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2212 ;; expression like:
2213 ;; a = (b < c) ? b : c;
2214 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2215 ;; either explicitly or indirectly via -ffast-math.
2216 ;;
2217 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2218 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2219 ;; operand will be returned when both operands are zero (i.e. they may not
2220 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2221 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2222 ;; NaNs.
2223
2224 (define_insn "<su><maxmin><mode>3"
2225 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2226 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2227 (match_operand:VHSDF 2 "register_operand" "w")))]
2228 "TARGET_SIMD"
2229 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2230 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2231 )
2232
2233 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2234 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2235 ;; which implement the IEEE fmax ()/fmin () functions.
2236 (define_insn "<maxmin_uns><mode>3"
2237 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2238 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2239 (match_operand:VHSDF 2 "register_operand" "w")]
2240 FMAXMIN_UNS))]
2241 "TARGET_SIMD"
2242 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2243 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2244 )
2245
2246 ;; 'across lanes' add.
2247
2248 (define_expand "reduc_plus_scal_<mode>"
2249 [(match_operand:<VEL> 0 "register_operand" "=w")
2250 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2251 UNSPEC_ADDV)]
2252 "TARGET_SIMD"
2253 {
2254 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2255 rtx scratch = gen_reg_rtx (<MODE>mode);
2256 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2257 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2258 DONE;
2259 }
2260 )
2261
2262 (define_insn "aarch64_faddp<mode>"
2263 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2264 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2265 (match_operand:VHSDF 2 "register_operand" "w")]
2266 UNSPEC_FADDV))]
2267 "TARGET_SIMD"
2268 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2269 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2270 )
2271
2272 (define_insn "aarch64_reduc_plus_internal<mode>"
2273 [(set (match_operand:VDQV 0 "register_operand" "=w")
2274 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2275 UNSPEC_ADDV))]
2276 "TARGET_SIMD"
2277 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2278 [(set_attr "type" "neon_reduc_add<q>")]
2279 )
2280
2281 (define_insn "aarch64_reduc_plus_internalv2si"
2282 [(set (match_operand:V2SI 0 "register_operand" "=w")
2283 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2284 UNSPEC_ADDV))]
2285 "TARGET_SIMD"
2286 "addp\\t%0.2s, %1.2s, %1.2s"
2287 [(set_attr "type" "neon_reduc_add")]
2288 )
2289
2290 (define_insn "reduc_plus_scal_<mode>"
2291 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2292 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2293 UNSPEC_FADDV))]
2294 "TARGET_SIMD"
2295 "faddp\\t%<Vetype>0, %1.<Vtype>"
2296 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2297 )
2298
2299 (define_expand "reduc_plus_scal_v4sf"
2300 [(set (match_operand:SF 0 "register_operand")
2301 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2302 UNSPEC_FADDV))]
2303 "TARGET_SIMD"
2304 {
2305 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2306 rtx scratch = gen_reg_rtx (V4SFmode);
2307 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2308 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2309 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2310 DONE;
2311 })
2312
2313 (define_insn "clrsb<mode>2"
2314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2315 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2316 "TARGET_SIMD"
2317 "cls\\t%0.<Vtype>, %1.<Vtype>"
2318 [(set_attr "type" "neon_cls<q>")]
2319 )
2320
2321 (define_insn "clz<mode>2"
2322 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2323 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2324 "TARGET_SIMD"
2325 "clz\\t%0.<Vtype>, %1.<Vtype>"
2326 [(set_attr "type" "neon_cls<q>")]
2327 )
2328
2329 (define_insn "popcount<mode>2"
2330 [(set (match_operand:VB 0 "register_operand" "=w")
2331 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2332 "TARGET_SIMD"
2333 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2334 [(set_attr "type" "neon_cnt<q>")]
2335 )
2336
2337 ;; 'across lanes' max and min ops.
2338
2339 ;; Template for outputting a scalar, so we can create __builtins which can be
2340 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2341 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2342 [(match_operand:<VEL> 0 "register_operand")
2343 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2344 FMAXMINV)]
2345 "TARGET_SIMD"
2346 {
2347 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2348 rtx scratch = gen_reg_rtx (<MODE>mode);
2349 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2350 operands[1]));
2351 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2352 DONE;
2353 }
2354 )
2355
2356 ;; Likewise for integer cases, signed and unsigned.
2357 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2358 [(match_operand:<VEL> 0 "register_operand")
2359 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2360 MAXMINV)]
2361 "TARGET_SIMD"
2362 {
2363 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2364 rtx scratch = gen_reg_rtx (<MODE>mode);
2365 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2366 operands[1]));
2367 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2368 DONE;
2369 }
2370 )
2371
2372 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2373 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2374 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2375 MAXMINV))]
2376 "TARGET_SIMD"
2377 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2378 [(set_attr "type" "neon_reduc_minmax<q>")]
2379 )
2380
2381 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2382 [(set (match_operand:V2SI 0 "register_operand" "=w")
2383 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2384 MAXMINV))]
2385 "TARGET_SIMD"
2386 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2387 [(set_attr "type" "neon_reduc_minmax")]
2388 )
2389
2390 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2391 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2392 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2393 FMAXMINV))]
2394 "TARGET_SIMD"
2395 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2396 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2397 )
2398
2399 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2400 ;; allocation.
2401 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2402 ;; to select.
2403 ;;
2404 ;; Thus our BSL is of the form:
2405 ;; op0 = bsl (mask, op2, op3)
2406 ;; We can use any of:
2407 ;;
2408 ;; if (op0 = mask)
2409 ;; bsl mask, op1, op2
2410 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2411 ;; bit op0, op2, mask
2412 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2413 ;; bif op0, op1, mask
2414 ;;
2415 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2416 ;; Some forms of straight-line code may generate the equivalent form
2417 ;; in *aarch64_simd_bsl<mode>_alt.
2418
2419 (define_insn "aarch64_simd_bsl<mode>_internal"
2420 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2421 (xor:VDQ_I
2422 (and:VDQ_I
2423 (xor:VDQ_I
2424 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2425 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2426 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2427 (match_dup:<V_INT_EQUIV> 3)
2428 ))]
2429 "TARGET_SIMD"
2430 "@
2431 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2432 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2433 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2434 [(set_attr "type" "neon_bsl<q>")]
2435 )
2436
2437 ;; We need this form in addition to the above pattern to match the case
2438 ;; when combine tries merging three insns such that the second operand of
2439 ;; the outer XOR matches the second operand of the inner XOR rather than
2440 ;; the first. The two are equivalent but since recog doesn't try all
2441 ;; permutations of commutative operations, we have to have a separate pattern.
2442
2443 (define_insn "*aarch64_simd_bsl<mode>_alt"
2444 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2445 (xor:VDQ_I
2446 (and:VDQ_I
2447 (xor:VDQ_I
2448 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2449 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2450 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2451 (match_dup:<V_INT_EQUIV> 2)))]
2452 "TARGET_SIMD"
2453 "@
2454 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2455 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2456 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2457 [(set_attr "type" "neon_bsl<q>")]
2458 )
2459
2460 ;; DImode is special, we want to avoid computing operations which are
2461 ;; more naturally computed in general purpose registers in the vector
2462 ;; registers. If we do that, we need to move all three operands from general
2463 ;; purpose registers to vector registers, then back again. However, we
2464 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2465 ;; optimizations based on the component operations of a BSL.
2466 ;;
2467 ;; That means we need a splitter back to the individual operations, if they
2468 ;; would be better calculated on the integer side.
2469
2470 (define_insn_and_split "aarch64_simd_bsldi_internal"
2471 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2472 (xor:DI
2473 (and:DI
2474 (xor:DI
2475 (match_operand:DI 3 "register_operand" "w,0,w,r")
2476 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2477 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2478 (match_dup:DI 3)
2479 ))]
2480 "TARGET_SIMD"
2481 "@
2482 bsl\\t%0.8b, %2.8b, %3.8b
2483 bit\\t%0.8b, %2.8b, %1.8b
2484 bif\\t%0.8b, %3.8b, %1.8b
2485 #"
2486 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2487 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2488 {
2489 /* Split back to individual operations. If we're before reload, and
2490 able to create a temporary register, do so. If we're after reload,
2491 we've got an early-clobber destination register, so use that.
2492 Otherwise, we can't create pseudos and we can't yet guarantee that
2493 operands[0] is safe to write, so FAIL to split. */
2494
2495 rtx scratch;
2496 if (reload_completed)
2497 scratch = operands[0];
2498 else if (can_create_pseudo_p ())
2499 scratch = gen_reg_rtx (DImode);
2500 else
2501 FAIL;
2502
2503 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2504 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2505 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2506 DONE;
2507 }
2508 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2509 (set_attr "length" "4,4,4,12")]
2510 )
2511
2512 (define_insn_and_split "aarch64_simd_bsldi_alt"
2513 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2514 (xor:DI
2515 (and:DI
2516 (xor:DI
2517 (match_operand:DI 3 "register_operand" "w,w,0,r")
2518 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2519 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2520 (match_dup:DI 2)
2521 ))]
2522 "TARGET_SIMD"
2523 "@
2524 bsl\\t%0.8b, %3.8b, %2.8b
2525 bit\\t%0.8b, %3.8b, %1.8b
2526 bif\\t%0.8b, %2.8b, %1.8b
2527 #"
2528 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2529 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2530 {
2531 /* Split back to individual operations. If we're before reload, and
2532 able to create a temporary register, do so. If we're after reload,
2533 we've got an early-clobber destination register, so use that.
2534 Otherwise, we can't create pseudos and we can't yet guarantee that
2535 operands[0] is safe to write, so FAIL to split. */
2536
2537 rtx scratch;
2538 if (reload_completed)
2539 scratch = operands[0];
2540 else if (can_create_pseudo_p ())
2541 scratch = gen_reg_rtx (DImode);
2542 else
2543 FAIL;
2544
2545 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2546 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2547 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2548 DONE;
2549 }
2550 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2551 (set_attr "length" "4,4,4,12")]
2552 )
2553
2554 (define_expand "aarch64_simd_bsl<mode>"
2555 [(match_operand:VALLDIF 0 "register_operand")
2556 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2557 (match_operand:VALLDIF 2 "register_operand")
2558 (match_operand:VALLDIF 3 "register_operand")]
2559 "TARGET_SIMD"
2560 {
2561 /* We can't alias operands together if they have different modes. */
2562 rtx tmp = operands[0];
2563 if (FLOAT_MODE_P (<MODE>mode))
2564 {
2565 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2566 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2567 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2568 }
2569 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2570 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2571 operands[1],
2572 operands[2],
2573 operands[3]));
2574 if (tmp != operands[0])
2575 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2576
2577 DONE;
2578 })
2579
2580 (define_expand "vcond_mask_<mode><v_int_equiv>"
2581 [(match_operand:VALLDI 0 "register_operand")
2582 (match_operand:VALLDI 1 "nonmemory_operand")
2583 (match_operand:VALLDI 2 "nonmemory_operand")
2584 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2585 "TARGET_SIMD"
2586 {
2587 /* If we have (a = (P) ? -1 : 0);
2588 Then we can simply move the generated mask (result must be int). */
2589 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2590 && operands[2] == CONST0_RTX (<MODE>mode))
2591 emit_move_insn (operands[0], operands[3]);
2592 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2593 else if (operands[1] == CONST0_RTX (<MODE>mode)
2594 && operands[2] == CONSTM1_RTX (<MODE>mode))
2595 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2596 else
2597 {
2598 if (!REG_P (operands[1]))
2599 operands[1] = force_reg (<MODE>mode, operands[1]);
2600 if (!REG_P (operands[2]))
2601 operands[2] = force_reg (<MODE>mode, operands[2]);
2602 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2603 operands[1], operands[2]));
2604 }
2605
2606 DONE;
2607 })
2608
2609 ;; Patterns comparing two vectors to produce a mask.
2610
2611 (define_expand "vec_cmp<mode><mode>"
2612 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2613 (match_operator 1 "comparison_operator"
2614 [(match_operand:VSDQ_I_DI 2 "register_operand")
2615 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2616 "TARGET_SIMD"
2617 {
2618 rtx mask = operands[0];
2619 enum rtx_code code = GET_CODE (operands[1]);
2620
2621 switch (code)
2622 {
2623 case NE:
2624 case LE:
2625 case LT:
2626 case GE:
2627 case GT:
2628 case EQ:
2629 if (operands[3] == CONST0_RTX (<MODE>mode))
2630 break;
2631
2632 /* Fall through. */
2633 default:
2634 if (!REG_P (operands[3]))
2635 operands[3] = force_reg (<MODE>mode, operands[3]);
2636
2637 break;
2638 }
2639
2640 switch (code)
2641 {
2642 case LT:
2643 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2644 break;
2645
2646 case GE:
2647 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2648 break;
2649
2650 case LE:
2651 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2652 break;
2653
2654 case GT:
2655 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2656 break;
2657
2658 case LTU:
2659 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2660 break;
2661
2662 case GEU:
2663 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2664 break;
2665
2666 case LEU:
2667 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2668 break;
2669
2670 case GTU:
2671 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2672 break;
2673
2674 case NE:
2675 /* Handle NE as !EQ. */
2676 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2677 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2678 break;
2679
2680 case EQ:
2681 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2682 break;
2683
2684 default:
2685 gcc_unreachable ();
2686 }
2687
2688 DONE;
2689 })
2690
2691 (define_expand "vec_cmp<mode><v_int_equiv>"
2692 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2693 (match_operator 1 "comparison_operator"
2694 [(match_operand:VDQF 2 "register_operand")
2695 (match_operand:VDQF 3 "nonmemory_operand")]))]
2696 "TARGET_SIMD"
2697 {
2698 int use_zero_form = 0;
2699 enum rtx_code code = GET_CODE (operands[1]);
2700 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2701
2702 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2703
2704 switch (code)
2705 {
2706 case LE:
2707 case LT:
2708 case GE:
2709 case GT:
2710 case EQ:
2711 if (operands[3] == CONST0_RTX (<MODE>mode))
2712 {
2713 use_zero_form = 1;
2714 break;
2715 }
2716 /* Fall through. */
2717 default:
2718 if (!REG_P (operands[3]))
2719 operands[3] = force_reg (<MODE>mode, operands[3]);
2720
2721 break;
2722 }
2723
2724 switch (code)
2725 {
2726 case LT:
2727 if (use_zero_form)
2728 {
2729 comparison = gen_aarch64_cmlt<mode>;
2730 break;
2731 }
2732 /* Fall through. */
2733 case UNLT:
2734 std::swap (operands[2], operands[3]);
2735 /* Fall through. */
2736 case UNGT:
2737 case GT:
2738 comparison = gen_aarch64_cmgt<mode>;
2739 break;
2740 case LE:
2741 if (use_zero_form)
2742 {
2743 comparison = gen_aarch64_cmle<mode>;
2744 break;
2745 }
2746 /* Fall through. */
2747 case UNLE:
2748 std::swap (operands[2], operands[3]);
2749 /* Fall through. */
2750 case UNGE:
2751 case GE:
2752 comparison = gen_aarch64_cmge<mode>;
2753 break;
2754 case NE:
2755 case EQ:
2756 comparison = gen_aarch64_cmeq<mode>;
2757 break;
2758 case UNEQ:
2759 case ORDERED:
2760 case UNORDERED:
2761 case LTGT:
2762 break;
2763 default:
2764 gcc_unreachable ();
2765 }
2766
2767 switch (code)
2768 {
2769 case UNGE:
2770 case UNGT:
2771 case UNLE:
2772 case UNLT:
2773 {
2774 /* All of the above must not raise any FP exceptions. Thus we first
2775 check each operand for NaNs and force any elements containing NaN to
2776 zero before using them in the compare.
2777 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2778 (cm<cc> (isnan (a) ? 0.0 : a,
2779 isnan (b) ? 0.0 : b))
2780 We use the following transformations for doing the comparisions:
2781 a UNGE b -> a GE b
2782 a UNGT b -> a GT b
2783 a UNLE b -> b GE a
2784 a UNLT b -> b GT a. */
2785
2786 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2787 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2788 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2789 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2790 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2791 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2792 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2793 lowpart_subreg (<V_INT_EQUIV>mode,
2794 operands[2],
2795 <MODE>mode)));
2796 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2797 lowpart_subreg (<V_INT_EQUIV>mode,
2798 operands[3],
2799 <MODE>mode)));
2800 gcc_assert (comparison != NULL);
2801 emit_insn (comparison (operands[0],
2802 lowpart_subreg (<MODE>mode,
2803 tmp0, <V_INT_EQUIV>mode),
2804 lowpart_subreg (<MODE>mode,
2805 tmp1, <V_INT_EQUIV>mode)));
2806 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2807 }
2808 break;
2809
2810 case LT:
2811 case LE:
2812 case GT:
2813 case GE:
2814 case EQ:
2815 case NE:
2816 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2817 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2818 a GE b -> a GE b
2819 a GT b -> a GT b
2820 a LE b -> b GE a
2821 a LT b -> b GT a
2822 a EQ b -> a EQ b
2823 a NE b -> ~(a EQ b) */
2824 gcc_assert (comparison != NULL);
2825 emit_insn (comparison (operands[0], operands[2], operands[3]));
2826 if (code == NE)
2827 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2828 break;
2829
2830 case LTGT:
2831 /* LTGT is not guranteed to not generate a FP exception. So let's
2832 go the faster way : ((a > b) || (b > a)). */
2833 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2834 operands[2], operands[3]));
2835 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2836 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2837 break;
2838
2839 case ORDERED:
2840 case UNORDERED:
2841 case UNEQ:
2842 /* cmeq (a, a) & cmeq (b, b). */
2843 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2844 operands[2], operands[2]));
2845 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2846 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2847
2848 if (code == UNORDERED)
2849 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2850 else if (code == UNEQ)
2851 {
2852 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2853 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2854 }
2855 break;
2856
2857 default:
2858 gcc_unreachable ();
2859 }
2860
2861 DONE;
2862 })
2863
2864 (define_expand "vec_cmpu<mode><mode>"
2865 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2866 (match_operator 1 "comparison_operator"
2867 [(match_operand:VSDQ_I_DI 2 "register_operand")
2868 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2869 "TARGET_SIMD"
2870 {
2871 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2872 operands[2], operands[3]));
2873 DONE;
2874 })
2875
2876 (define_expand "vcond<mode><mode>"
2877 [(set (match_operand:VALLDI 0 "register_operand")
2878 (if_then_else:VALLDI
2879 (match_operator 3 "comparison_operator"
2880 [(match_operand:VALLDI 4 "register_operand")
2881 (match_operand:VALLDI 5 "nonmemory_operand")])
2882 (match_operand:VALLDI 1 "nonmemory_operand")
2883 (match_operand:VALLDI 2 "nonmemory_operand")))]
2884 "TARGET_SIMD"
2885 {
2886 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2887 enum rtx_code code = GET_CODE (operands[3]);
2888
2889 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2890 it as well as switch operands 1/2 in order to avoid the additional
2891 NOT instruction. */
2892 if (code == NE)
2893 {
2894 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2895 operands[4], operands[5]);
2896 std::swap (operands[1], operands[2]);
2897 }
2898 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2899 operands[4], operands[5]));
2900 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2901 operands[2], mask));
2902
2903 DONE;
2904 })
2905
2906 (define_expand "vcond<v_cmp_mixed><mode>"
2907 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2908 (if_then_else:<V_cmp_mixed>
2909 (match_operator 3 "comparison_operator"
2910 [(match_operand:VDQF_COND 4 "register_operand")
2911 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2912 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2913 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2914 "TARGET_SIMD"
2915 {
2916 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2917 enum rtx_code code = GET_CODE (operands[3]);
2918
2919 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2920 it as well as switch operands 1/2 in order to avoid the additional
2921 NOT instruction. */
2922 if (code == NE)
2923 {
2924 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2925 operands[4], operands[5]);
2926 std::swap (operands[1], operands[2]);
2927 }
2928 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2929 operands[4], operands[5]));
2930 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2931 operands[0], operands[1],
2932 operands[2], mask));
2933
2934 DONE;
2935 })
2936
2937 (define_expand "vcondu<mode><mode>"
2938 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2939 (if_then_else:VSDQ_I_DI
2940 (match_operator 3 "comparison_operator"
2941 [(match_operand:VSDQ_I_DI 4 "register_operand")
2942 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2943 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2944 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2945 "TARGET_SIMD"
2946 {
2947 rtx mask = gen_reg_rtx (<MODE>mode);
2948 enum rtx_code code = GET_CODE (operands[3]);
2949
2950 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2951 it as well as switch operands 1/2 in order to avoid the additional
2952 NOT instruction. */
2953 if (code == NE)
2954 {
2955 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2956 operands[4], operands[5]);
2957 std::swap (operands[1], operands[2]);
2958 }
2959 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2960 operands[4], operands[5]));
2961 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2962 operands[2], mask));
2963 DONE;
2964 })
2965
2966 (define_expand "vcondu<mode><v_cmp_mixed>"
2967 [(set (match_operand:VDQF 0 "register_operand")
2968 (if_then_else:VDQF
2969 (match_operator 3 "comparison_operator"
2970 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2971 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2972 (match_operand:VDQF 1 "nonmemory_operand")
2973 (match_operand:VDQF 2 "nonmemory_operand")))]
2974 "TARGET_SIMD"
2975 {
2976 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2977 enum rtx_code code = GET_CODE (operands[3]);
2978
2979 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2980 it as well as switch operands 1/2 in order to avoid the additional
2981 NOT instruction. */
2982 if (code == NE)
2983 {
2984 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2985 operands[4], operands[5]);
2986 std::swap (operands[1], operands[2]);
2987 }
2988 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2989 mask, operands[3],
2990 operands[4], operands[5]));
2991 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2992 operands[2], mask));
2993 DONE;
2994 })
2995
2996 ;; Patterns for AArch64 SIMD Intrinsics.
2997
2998 ;; Lane extraction with sign extension to general purpose register.
2999 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3000 [(set (match_operand:GPI 0 "register_operand" "=r")
3001 (sign_extend:GPI
3002 (vec_select:<VEL>
3003 (match_operand:VDQQH 1 "register_operand" "w")
3004 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3005 "TARGET_SIMD"
3006 {
3007 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3008 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3009 }
3010 [(set_attr "type" "neon_to_gp<q>")]
3011 )
3012
3013 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3014 [(set (match_operand:SI 0 "register_operand" "=r")
3015 (zero_extend:SI
3016 (vec_select:<VEL>
3017 (match_operand:VDQQH 1 "register_operand" "w")
3018 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3019 "TARGET_SIMD"
3020 {
3021 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3022 return "umov\\t%w0, %1.<Vetype>[%2]";
3023 }
3024 [(set_attr "type" "neon_to_gp<q>")]
3025 )
3026
3027 ;; Lane extraction of a value, neither sign nor zero extension
3028 ;; is guaranteed so upper bits should be considered undefined.
3029 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3030 (define_insn "aarch64_get_lane<mode>"
3031 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3032 (vec_select:<VEL>
3033 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3034 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3035 "TARGET_SIMD"
3036 {
3037 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3038 switch (which_alternative)
3039 {
3040 case 0:
3041 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3042 case 1:
3043 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3044 case 2:
3045 return "st1\\t{%1.<Vetype>}[%2], %0";
3046 default:
3047 gcc_unreachable ();
3048 }
3049 }
3050 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3051 )
3052
3053 (define_insn "load_pair_lanes<mode>"
3054 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3055 (vec_concat:<VDBL>
3056 (match_operand:VDC 1 "memory_operand" "Utq")
3057 (match_operand:VDC 2 "memory_operand" "m")))]
3058 "TARGET_SIMD && !STRICT_ALIGNMENT
3059 && rtx_equal_p (XEXP (operands[2], 0),
3060 plus_constant (Pmode,
3061 XEXP (operands[1], 0),
3062 GET_MODE_SIZE (<MODE>mode)))"
3063 "ldr\\t%q0, %1"
3064 [(set_attr "type" "neon_load1_1reg_q")]
3065 )
3066
3067 (define_insn "store_pair_lanes<mode>"
3068 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3069 (vec_concat:<VDBL>
3070 (match_operand:VDC 1 "register_operand" "w, r")
3071 (match_operand:VDC 2 "register_operand" "w, r")))]
3072 "TARGET_SIMD"
3073 "@
3074 stp\\t%d1, %d2, %y0
3075 stp\\t%x1, %x2, %y0"
3076 [(set_attr "type" "neon_stp, store_16")]
3077 )
3078
3079 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3080 ;; dest vector.
3081
3082 (define_insn "*aarch64_combinez<mode>"
3083 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3084 (vec_concat:<VDBL>
3085 (match_operand:VDC 1 "general_operand" "w,?r,m")
3086 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3087 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3088 "@
3089 mov\\t%0.8b, %1.8b
3090 fmov\t%d0, %1
3091 ldr\\t%d0, %1"
3092 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3093 (set_attr "simd" "yes,*,yes")
3094 (set_attr "fp" "*,yes,*")]
3095 )
3096
3097 (define_insn "*aarch64_combinez_be<mode>"
3098 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3099 (vec_concat:<VDBL>
3100 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3101 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3102 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3103 "@
3104 mov\\t%0.8b, %1.8b
3105 fmov\t%d0, %1
3106 ldr\\t%d0, %1"
3107 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3108 (set_attr "simd" "yes,*,yes")
3109 (set_attr "fp" "*,yes,*")]
3110 )
3111
3112 (define_expand "aarch64_combine<mode>"
3113 [(match_operand:<VDBL> 0 "register_operand")
3114 (match_operand:VDC 1 "register_operand")
3115 (match_operand:VDC 2 "register_operand")]
3116 "TARGET_SIMD"
3117 {
3118 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3119
3120 DONE;
3121 }
3122 )
3123
3124 (define_expand "aarch64_simd_combine<mode>"
3125 [(match_operand:<VDBL> 0 "register_operand")
3126 (match_operand:VDC 1 "register_operand")
3127 (match_operand:VDC 2 "register_operand")]
3128 "TARGET_SIMD"
3129 {
3130 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3131 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3132 DONE;
3133 }
3134 [(set_attr "type" "multiple")]
3135 )
3136
3137 ;; <su><addsub>l<q>.
3138
3139 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3140 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3141 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3142 (match_operand:VQW 1 "register_operand" "w")
3143 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3144 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3145 (match_operand:VQW 2 "register_operand" "w")
3146 (match_dup 3)))))]
3147 "TARGET_SIMD"
3148 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3149 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3150 )
3151
3152 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3153 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3154 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3155 (match_operand:VQW 1 "register_operand" "w")
3156 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3157 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3158 (match_operand:VQW 2 "register_operand" "w")
3159 (match_dup 3)))))]
3160 "TARGET_SIMD"
3161 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3162 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3163 )
3164
3165
3166 (define_expand "aarch64_saddl2<mode>"
3167 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3168 (match_operand:VQW 1 "register_operand" "w")
3169 (match_operand:VQW 2 "register_operand" "w")]
3170 "TARGET_SIMD"
3171 {
3172 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3173 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3174 operands[2], p));
3175 DONE;
3176 })
3177
3178 (define_expand "aarch64_uaddl2<mode>"
3179 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3180 (match_operand:VQW 1 "register_operand" "w")
3181 (match_operand:VQW 2 "register_operand" "w")]
3182 "TARGET_SIMD"
3183 {
3184 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3185 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3186 operands[2], p));
3187 DONE;
3188 })
3189
3190 (define_expand "aarch64_ssubl2<mode>"
3191 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3192 (match_operand:VQW 1 "register_operand" "w")
3193 (match_operand:VQW 2 "register_operand" "w")]
3194 "TARGET_SIMD"
3195 {
3196 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3197 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3198 operands[2], p));
3199 DONE;
3200 })
3201
3202 (define_expand "aarch64_usubl2<mode>"
3203 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3204 (match_operand:VQW 1 "register_operand" "w")
3205 (match_operand:VQW 2 "register_operand" "w")]
3206 "TARGET_SIMD"
3207 {
3208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3209 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3210 operands[2], p));
3211 DONE;
3212 })
3213
3214 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3216 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3217 (match_operand:VD_BHSI 1 "register_operand" "w"))
3218 (ANY_EXTEND:<VWIDE>
3219 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3220 "TARGET_SIMD"
3221 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3222 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3223 )
3224
3225 ;; <su><addsub>w<q>.
3226
3227 (define_expand "widen_ssum<mode>3"
3228 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3229 (plus:<VDBLW> (sign_extend:<VDBLW>
3230 (match_operand:VQW 1 "register_operand" ""))
3231 (match_operand:<VDBLW> 2 "register_operand" "")))]
3232 "TARGET_SIMD"
3233 {
3234 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3235 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3236
3237 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3238 operands[1], p));
3239 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3240 DONE;
3241 }
3242 )
3243
3244 (define_expand "widen_ssum<mode>3"
3245 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3246 (plus:<VWIDE> (sign_extend:<VWIDE>
3247 (match_operand:VD_BHSI 1 "register_operand" ""))
3248 (match_operand:<VWIDE> 2 "register_operand" "")))]
3249 "TARGET_SIMD"
3250 {
3251 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3252 DONE;
3253 })
3254
3255 (define_expand "widen_usum<mode>3"
3256 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3257 (plus:<VDBLW> (zero_extend:<VDBLW>
3258 (match_operand:VQW 1 "register_operand" ""))
3259 (match_operand:<VDBLW> 2 "register_operand" "")))]
3260 "TARGET_SIMD"
3261 {
3262 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3263 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3264
3265 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3266 operands[1], p));
3267 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3268 DONE;
3269 }
3270 )
3271
3272 (define_expand "widen_usum<mode>3"
3273 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3274 (plus:<VWIDE> (zero_extend:<VWIDE>
3275 (match_operand:VD_BHSI 1 "register_operand" ""))
3276 (match_operand:<VWIDE> 2 "register_operand" "")))]
3277 "TARGET_SIMD"
3278 {
3279 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3280 DONE;
3281 })
3282
3283 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3284 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3285 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3286 (ANY_EXTEND:<VWIDE>
3287 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3288 "TARGET_SIMD"
3289 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3290 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3291 )
3292
3293 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3294 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3295 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3296 (ANY_EXTEND:<VWIDE>
3297 (vec_select:<VHALF>
3298 (match_operand:VQW 2 "register_operand" "w")
3299 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3300 "TARGET_SIMD"
3301 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3302 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3303 )
3304
3305 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3307 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3308 (ANY_EXTEND:<VWIDE>
3309 (vec_select:<VHALF>
3310 (match_operand:VQW 2 "register_operand" "w")
3311 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3312 "TARGET_SIMD"
3313 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3314 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3315 )
3316
3317 (define_expand "aarch64_saddw2<mode>"
3318 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3319 (match_operand:<VWIDE> 1 "register_operand" "w")
3320 (match_operand:VQW 2 "register_operand" "w")]
3321 "TARGET_SIMD"
3322 {
3323 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3324 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3325 operands[2], p));
3326 DONE;
3327 })
3328
3329 (define_expand "aarch64_uaddw2<mode>"
3330 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3331 (match_operand:<VWIDE> 1 "register_operand" "w")
3332 (match_operand:VQW 2 "register_operand" "w")]
3333 "TARGET_SIMD"
3334 {
3335 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3336 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3337 operands[2], p));
3338 DONE;
3339 })
3340
3341
3342 (define_expand "aarch64_ssubw2<mode>"
3343 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3344 (match_operand:<VWIDE> 1 "register_operand" "w")
3345 (match_operand:VQW 2 "register_operand" "w")]
3346 "TARGET_SIMD"
3347 {
3348 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3349 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3350 operands[2], p));
3351 DONE;
3352 })
3353
3354 (define_expand "aarch64_usubw2<mode>"
3355 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3356 (match_operand:<VWIDE> 1 "register_operand" "w")
3357 (match_operand:VQW 2 "register_operand" "w")]
3358 "TARGET_SIMD"
3359 {
3360 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3361 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3362 operands[2], p));
3363 DONE;
3364 })
3365
3366 ;; <su><r>h<addsub>.
3367
3368 (define_insn "aarch64_<sur>h<addsub><mode>"
3369 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3370 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3371 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3372 HADDSUB))]
3373 "TARGET_SIMD"
3374 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3375 [(set_attr "type" "neon_<addsub>_halve<q>")]
3376 )
3377
3378 ;; <r><addsub>hn<q>.
3379
3380 (define_insn "aarch64_<sur><addsub>hn<mode>"
3381 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3382 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3383 (match_operand:VQN 2 "register_operand" "w")]
3384 ADDSUBHN))]
3385 "TARGET_SIMD"
3386 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3387 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3388 )
3389
3390 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3391 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3392 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3393 (match_operand:VQN 2 "register_operand" "w")
3394 (match_operand:VQN 3 "register_operand" "w")]
3395 ADDSUBHN2))]
3396 "TARGET_SIMD"
3397 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3398 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3399 )
3400
3401 ;; pmul.
3402
3403 (define_insn "aarch64_pmul<mode>"
3404 [(set (match_operand:VB 0 "register_operand" "=w")
3405 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3406 (match_operand:VB 2 "register_operand" "w")]
3407 UNSPEC_PMUL))]
3408 "TARGET_SIMD"
3409 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3410 [(set_attr "type" "neon_mul_<Vetype><q>")]
3411 )
3412
3413 ;; fmulx.
3414
3415 (define_insn "aarch64_fmulx<mode>"
3416 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3417 (unspec:VHSDF_HSDF
3418 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3419 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3420 UNSPEC_FMULX))]
3421 "TARGET_SIMD"
3422 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3423 [(set_attr "type" "neon_fp_mul_<stype>")]
3424 )
3425
3426 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3427
3428 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3429 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3430 (unspec:VDQSF
3431 [(match_operand:VDQSF 1 "register_operand" "w")
3432 (vec_duplicate:VDQSF
3433 (vec_select:<VEL>
3434 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3435 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3436 UNSPEC_FMULX))]
3437 "TARGET_SIMD"
3438 {
3439 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3440 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3441 }
3442 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3443 )
3444
3445 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3446
3447 (define_insn "*aarch64_mulx_elt<mode>"
3448 [(set (match_operand:VDQF 0 "register_operand" "=w")
3449 (unspec:VDQF
3450 [(match_operand:VDQF 1 "register_operand" "w")
3451 (vec_duplicate:VDQF
3452 (vec_select:<VEL>
3453 (match_operand:VDQF 2 "register_operand" "w")
3454 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3455 UNSPEC_FMULX))]
3456 "TARGET_SIMD"
3457 {
3458 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3459 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3460 }
3461 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3462 )
3463
3464 ;; vmulxq_lane
3465
3466 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3467 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3468 (unspec:VHSDF
3469 [(match_operand:VHSDF 1 "register_operand" "w")
3470 (vec_duplicate:VHSDF
3471 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3472 UNSPEC_FMULX))]
3473 "TARGET_SIMD"
3474 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3475 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3476 )
3477
3478 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3479 ;; vmulxd_lane_f64 == vmulx_lane_f64
3480 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3481
3482 (define_insn "*aarch64_vgetfmulx<mode>"
3483 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3484 (unspec:<VEL>
3485 [(match_operand:<VEL> 1 "register_operand" "w")
3486 (vec_select:<VEL>
3487 (match_operand:VDQF 2 "register_operand" "w")
3488 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3489 UNSPEC_FMULX))]
3490 "TARGET_SIMD"
3491 {
3492 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3493 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3494 }
3495 [(set_attr "type" "fmul<Vetype>")]
3496 )
3497 ;; <su>q<addsub>
3498
3499 (define_insn "aarch64_<su_optab><optab><mode>"
3500 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3501 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3502 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3503 "TARGET_SIMD"
3504 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3505 [(set_attr "type" "neon_<optab><q>")]
3506 )
3507
3508 ;; suqadd and usqadd
3509
3510 (define_insn "aarch64_<sur>qadd<mode>"
3511 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3512 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3513 (match_operand:VSDQ_I 2 "register_operand" "w")]
3514 USSUQADD))]
3515 "TARGET_SIMD"
3516 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3517 [(set_attr "type" "neon_qadd<q>")]
3518 )
3519
3520 ;; sqmovun
3521
3522 (define_insn "aarch64_sqmovun<mode>"
3523 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3524 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3525 UNSPEC_SQXTUN))]
3526 "TARGET_SIMD"
3527 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3528 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3529 )
3530
3531 ;; sqmovn and uqmovn
3532
3533 (define_insn "aarch64_<sur>qmovn<mode>"
3534 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3535 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3536 SUQMOVN))]
3537 "TARGET_SIMD"
3538 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3539 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3540 )
3541
3542 ;; <su>q<absneg>
3543
3544 (define_insn "aarch64_s<optab><mode>"
3545 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3546 (UNQOPS:VSDQ_I
3547 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3548 "TARGET_SIMD"
3549 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3550 [(set_attr "type" "neon_<optab><q>")]
3551 )
3552
3553 ;; sq<r>dmulh.
3554
3555 (define_insn "aarch64_sq<r>dmulh<mode>"
3556 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3557 (unspec:VSDQ_HSI
3558 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3559 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3560 VQDMULH))]
3561 "TARGET_SIMD"
3562 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3563 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3564 )
3565
3566 ;; sq<r>dmulh_lane
3567
3568 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3569 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3570 (unspec:VDQHS
3571 [(match_operand:VDQHS 1 "register_operand" "w")
3572 (vec_select:<VEL>
3573 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3574 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3575 VQDMULH))]
3576 "TARGET_SIMD"
3577 "*
3578 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3579 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3580 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3581 )
3582
3583 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3584 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3585 (unspec:VDQHS
3586 [(match_operand:VDQHS 1 "register_operand" "w")
3587 (vec_select:<VEL>
3588 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3589 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3590 VQDMULH))]
3591 "TARGET_SIMD"
3592 "*
3593 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3594 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3595 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3596 )
3597
3598 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3599 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3600 (unspec:SD_HSI
3601 [(match_operand:SD_HSI 1 "register_operand" "w")
3602 (vec_select:<VEL>
3603 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3604 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3605 VQDMULH))]
3606 "TARGET_SIMD"
3607 "*
3608 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3609 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3610 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3611 )
3612
3613 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3614 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3615 (unspec:SD_HSI
3616 [(match_operand:SD_HSI 1 "register_operand" "w")
3617 (vec_select:<VEL>
3618 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3619 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3620 VQDMULH))]
3621 "TARGET_SIMD"
3622 "*
3623 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3624 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3625 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3626 )
3627
3628 ;; sqrdml[as]h.
3629
3630 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3631 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3632 (unspec:VSDQ_HSI
3633 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3634 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3635 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3636 SQRDMLH_AS))]
3637 "TARGET_SIMD_RDMA"
3638 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3639 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3640 )
3641
3642 ;; sqrdml[as]h_lane.
3643
3644 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3645 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3646 (unspec:VDQHS
3647 [(match_operand:VDQHS 1 "register_operand" "0")
3648 (match_operand:VDQHS 2 "register_operand" "w")
3649 (vec_select:<VEL>
3650 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3651 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3652 SQRDMLH_AS))]
3653 "TARGET_SIMD_RDMA"
3654 {
3655 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3656 return
3657 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3658 }
3659 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3660 )
3661
3662 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3663 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3664 (unspec:SD_HSI
3665 [(match_operand:SD_HSI 1 "register_operand" "0")
3666 (match_operand:SD_HSI 2 "register_operand" "w")
3667 (vec_select:<VEL>
3668 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3669 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3670 SQRDMLH_AS))]
3671 "TARGET_SIMD_RDMA"
3672 {
3673 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3674 return
3675 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3676 }
3677 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3678 )
3679
3680 ;; sqrdml[as]h_laneq.
3681
3682 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3683 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3684 (unspec:VDQHS
3685 [(match_operand:VDQHS 1 "register_operand" "0")
3686 (match_operand:VDQHS 2 "register_operand" "w")
3687 (vec_select:<VEL>
3688 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3689 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3690 SQRDMLH_AS))]
3691 "TARGET_SIMD_RDMA"
3692 {
3693 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3694 return
3695 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3696 }
3697 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3698 )
3699
3700 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3701 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3702 (unspec:SD_HSI
3703 [(match_operand:SD_HSI 1 "register_operand" "0")
3704 (match_operand:SD_HSI 2 "register_operand" "w")
3705 (vec_select:<VEL>
3706 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3707 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3708 SQRDMLH_AS))]
3709 "TARGET_SIMD_RDMA"
3710 {
3711 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3712 return
3713 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3714 }
3715 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3716 )
3717
3718 ;; vqdml[sa]l
3719
3720 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3721 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3722 (SBINQOPS:<VWIDE>
3723 (match_operand:<VWIDE> 1 "register_operand" "0")
3724 (ss_ashift:<VWIDE>
3725 (mult:<VWIDE>
3726 (sign_extend:<VWIDE>
3727 (match_operand:VSD_HSI 2 "register_operand" "w"))
3728 (sign_extend:<VWIDE>
3729 (match_operand:VSD_HSI 3 "register_operand" "w")))
3730 (const_int 1))))]
3731 "TARGET_SIMD"
3732 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3733 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3734 )
3735
3736 ;; vqdml[sa]l_lane
3737
3738 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3739 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3740 (SBINQOPS:<VWIDE>
3741 (match_operand:<VWIDE> 1 "register_operand" "0")
3742 (ss_ashift:<VWIDE>
3743 (mult:<VWIDE>
3744 (sign_extend:<VWIDE>
3745 (match_operand:VD_HSI 2 "register_operand" "w"))
3746 (sign_extend:<VWIDE>
3747 (vec_duplicate:VD_HSI
3748 (vec_select:<VEL>
3749 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3750 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3751 ))
3752 (const_int 1))))]
3753 "TARGET_SIMD"
3754 {
3755 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3756 return
3757 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3758 }
3759 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3760 )
3761
3762 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3763 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3764 (SBINQOPS:<VWIDE>
3765 (match_operand:<VWIDE> 1 "register_operand" "0")
3766 (ss_ashift:<VWIDE>
3767 (mult:<VWIDE>
3768 (sign_extend:<VWIDE>
3769 (match_operand:VD_HSI 2 "register_operand" "w"))
3770 (sign_extend:<VWIDE>
3771 (vec_duplicate:VD_HSI
3772 (vec_select:<VEL>
3773 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3774 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3775 ))
3776 (const_int 1))))]
3777 "TARGET_SIMD"
3778 {
3779 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3780 return
3781 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3782 }
3783 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3784 )
3785
3786 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3787 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3788 (SBINQOPS:<VWIDE>
3789 (match_operand:<VWIDE> 1 "register_operand" "0")
3790 (ss_ashift:<VWIDE>
3791 (mult:<VWIDE>
3792 (sign_extend:<VWIDE>
3793 (match_operand:SD_HSI 2 "register_operand" "w"))
3794 (sign_extend:<VWIDE>
3795 (vec_select:<VEL>
3796 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3797 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3798 )
3799 (const_int 1))))]
3800 "TARGET_SIMD"
3801 {
3802 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3803 return
3804 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3805 }
3806 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3807 )
3808
3809 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3810 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3811 (SBINQOPS:<VWIDE>
3812 (match_operand:<VWIDE> 1 "register_operand" "0")
3813 (ss_ashift:<VWIDE>
3814 (mult:<VWIDE>
3815 (sign_extend:<VWIDE>
3816 (match_operand:SD_HSI 2 "register_operand" "w"))
3817 (sign_extend:<VWIDE>
3818 (vec_select:<VEL>
3819 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3820 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3821 )
3822 (const_int 1))))]
3823 "TARGET_SIMD"
3824 {
3825 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3826 return
3827 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3828 }
3829 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3830 )
3831
3832 ;; vqdml[sa]l_n
3833
3834 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3835 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3836 (SBINQOPS:<VWIDE>
3837 (match_operand:<VWIDE> 1 "register_operand" "0")
3838 (ss_ashift:<VWIDE>
3839 (mult:<VWIDE>
3840 (sign_extend:<VWIDE>
3841 (match_operand:VD_HSI 2 "register_operand" "w"))
3842 (sign_extend:<VWIDE>
3843 (vec_duplicate:VD_HSI
3844 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3845 (const_int 1))))]
3846 "TARGET_SIMD"
3847 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3848 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3849 )
3850
3851 ;; sqdml[as]l2
3852
3853 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3854 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3855 (SBINQOPS:<VWIDE>
3856 (match_operand:<VWIDE> 1 "register_operand" "0")
3857 (ss_ashift:<VWIDE>
3858 (mult:<VWIDE>
3859 (sign_extend:<VWIDE>
3860 (vec_select:<VHALF>
3861 (match_operand:VQ_HSI 2 "register_operand" "w")
3862 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3863 (sign_extend:<VWIDE>
3864 (vec_select:<VHALF>
3865 (match_operand:VQ_HSI 3 "register_operand" "w")
3866 (match_dup 4))))
3867 (const_int 1))))]
3868 "TARGET_SIMD"
3869 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3870 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3871 )
3872
3873 (define_expand "aarch64_sqdmlal2<mode>"
3874 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3875 (match_operand:<VWIDE> 1 "register_operand" "w")
3876 (match_operand:VQ_HSI 2 "register_operand" "w")
3877 (match_operand:VQ_HSI 3 "register_operand" "w")]
3878 "TARGET_SIMD"
3879 {
3880 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3881 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3882 operands[2], operands[3], p));
3883 DONE;
3884 })
3885
3886 (define_expand "aarch64_sqdmlsl2<mode>"
3887 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3888 (match_operand:<VWIDE> 1 "register_operand" "w")
3889 (match_operand:VQ_HSI 2 "register_operand" "w")
3890 (match_operand:VQ_HSI 3 "register_operand" "w")]
3891 "TARGET_SIMD"
3892 {
3893 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3894 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3895 operands[2], operands[3], p));
3896 DONE;
3897 })
3898
3899 ;; vqdml[sa]l2_lane
3900
3901 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3902 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3903 (SBINQOPS:<VWIDE>
3904 (match_operand:<VWIDE> 1 "register_operand" "0")
3905 (ss_ashift:<VWIDE>
3906 (mult:<VWIDE>
3907 (sign_extend:<VWIDE>
3908 (vec_select:<VHALF>
3909 (match_operand:VQ_HSI 2 "register_operand" "w")
3910 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3911 (sign_extend:<VWIDE>
3912 (vec_duplicate:<VHALF>
3913 (vec_select:<VEL>
3914 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3915 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3916 ))))
3917 (const_int 1))))]
3918 "TARGET_SIMD"
3919 {
3920 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3921 return
3922 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3923 }
3924 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3925 )
3926
3927 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3928 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3929 (SBINQOPS:<VWIDE>
3930 (match_operand:<VWIDE> 1 "register_operand" "0")
3931 (ss_ashift:<VWIDE>
3932 (mult:<VWIDE>
3933 (sign_extend:<VWIDE>
3934 (vec_select:<VHALF>
3935 (match_operand:VQ_HSI 2 "register_operand" "w")
3936 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3937 (sign_extend:<VWIDE>
3938 (vec_duplicate:<VHALF>
3939 (vec_select:<VEL>
3940 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3941 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3942 ))))
3943 (const_int 1))))]
3944 "TARGET_SIMD"
3945 {
3946 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3947 return
3948 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3949 }
3950 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3951 )
3952
3953 (define_expand "aarch64_sqdmlal2_lane<mode>"
3954 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3955 (match_operand:<VWIDE> 1 "register_operand" "w")
3956 (match_operand:VQ_HSI 2 "register_operand" "w")
3957 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3958 (match_operand:SI 4 "immediate_operand" "i")]
3959 "TARGET_SIMD"
3960 {
3961 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3962 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3963 operands[2], operands[3],
3964 operands[4], p));
3965 DONE;
3966 })
3967
3968 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3969 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3970 (match_operand:<VWIDE> 1 "register_operand" "w")
3971 (match_operand:VQ_HSI 2 "register_operand" "w")
3972 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3973 (match_operand:SI 4 "immediate_operand" "i")]
3974 "TARGET_SIMD"
3975 {
3976 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3977 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3978 operands[2], operands[3],
3979 operands[4], p));
3980 DONE;
3981 })
3982
3983 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3984 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3985 (match_operand:<VWIDE> 1 "register_operand" "w")
3986 (match_operand:VQ_HSI 2 "register_operand" "w")
3987 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3988 (match_operand:SI 4 "immediate_operand" "i")]
3989 "TARGET_SIMD"
3990 {
3991 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3992 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3993 operands[2], operands[3],
3994 operands[4], p));
3995 DONE;
3996 })
3997
3998 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3999 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4000 (match_operand:<VWIDE> 1 "register_operand" "w")
4001 (match_operand:VQ_HSI 2 "register_operand" "w")
4002 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4003 (match_operand:SI 4 "immediate_operand" "i")]
4004 "TARGET_SIMD"
4005 {
4006 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4007 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4008 operands[2], operands[3],
4009 operands[4], p));
4010 DONE;
4011 })
4012
4013 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4014 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4015 (SBINQOPS:<VWIDE>
4016 (match_operand:<VWIDE> 1 "register_operand" "0")
4017 (ss_ashift:<VWIDE>
4018 (mult:<VWIDE>
4019 (sign_extend:<VWIDE>
4020 (vec_select:<VHALF>
4021 (match_operand:VQ_HSI 2 "register_operand" "w")
4022 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4023 (sign_extend:<VWIDE>
4024 (vec_duplicate:<VHALF>
4025 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4026 (const_int 1))))]
4027 "TARGET_SIMD"
4028 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4029 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4030 )
4031
4032 (define_expand "aarch64_sqdmlal2_n<mode>"
4033 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4034 (match_operand:<VWIDE> 1 "register_operand" "w")
4035 (match_operand:VQ_HSI 2 "register_operand" "w")
4036 (match_operand:<VEL> 3 "register_operand" "w")]
4037 "TARGET_SIMD"
4038 {
4039 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4040 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4041 operands[2], operands[3],
4042 p));
4043 DONE;
4044 })
4045
4046 (define_expand "aarch64_sqdmlsl2_n<mode>"
4047 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4048 (match_operand:<VWIDE> 1 "register_operand" "w")
4049 (match_operand:VQ_HSI 2 "register_operand" "w")
4050 (match_operand:<VEL> 3 "register_operand" "w")]
4051 "TARGET_SIMD"
4052 {
4053 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4054 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4055 operands[2], operands[3],
4056 p));
4057 DONE;
4058 })
4059
4060 ;; vqdmull
4061
4062 (define_insn "aarch64_sqdmull<mode>"
4063 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4064 (ss_ashift:<VWIDE>
4065 (mult:<VWIDE>
4066 (sign_extend:<VWIDE>
4067 (match_operand:VSD_HSI 1 "register_operand" "w"))
4068 (sign_extend:<VWIDE>
4069 (match_operand:VSD_HSI 2 "register_operand" "w")))
4070 (const_int 1)))]
4071 "TARGET_SIMD"
4072 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4073 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4074 )
4075
4076 ;; vqdmull_lane
4077
4078 (define_insn "aarch64_sqdmull_lane<mode>"
4079 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4080 (ss_ashift:<VWIDE>
4081 (mult:<VWIDE>
4082 (sign_extend:<VWIDE>
4083 (match_operand:VD_HSI 1 "register_operand" "w"))
4084 (sign_extend:<VWIDE>
4085 (vec_duplicate:VD_HSI
4086 (vec_select:<VEL>
4087 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4088 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4089 ))
4090 (const_int 1)))]
4091 "TARGET_SIMD"
4092 {
4093 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4094 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4095 }
4096 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4097 )
4098
4099 (define_insn "aarch64_sqdmull_laneq<mode>"
4100 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4101 (ss_ashift:<VWIDE>
4102 (mult:<VWIDE>
4103 (sign_extend:<VWIDE>
4104 (match_operand:VD_HSI 1 "register_operand" "w"))
4105 (sign_extend:<VWIDE>
4106 (vec_duplicate:VD_HSI
4107 (vec_select:<VEL>
4108 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4109 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4110 ))
4111 (const_int 1)))]
4112 "TARGET_SIMD"
4113 {
4114 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4115 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4116 }
4117 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4118 )
4119
4120 (define_insn "aarch64_sqdmull_lane<mode>"
4121 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4122 (ss_ashift:<VWIDE>
4123 (mult:<VWIDE>
4124 (sign_extend:<VWIDE>
4125 (match_operand:SD_HSI 1 "register_operand" "w"))
4126 (sign_extend:<VWIDE>
4127 (vec_select:<VEL>
4128 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4129 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4130 ))
4131 (const_int 1)))]
4132 "TARGET_SIMD"
4133 {
4134 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4135 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4136 }
4137 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4138 )
4139
4140 (define_insn "aarch64_sqdmull_laneq<mode>"
4141 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4142 (ss_ashift:<VWIDE>
4143 (mult:<VWIDE>
4144 (sign_extend:<VWIDE>
4145 (match_operand:SD_HSI 1 "register_operand" "w"))
4146 (sign_extend:<VWIDE>
4147 (vec_select:<VEL>
4148 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4149 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4150 ))
4151 (const_int 1)))]
4152 "TARGET_SIMD"
4153 {
4154 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4155 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4156 }
4157 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4158 )
4159
4160 ;; vqdmull_n
4161
4162 (define_insn "aarch64_sqdmull_n<mode>"
4163 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4164 (ss_ashift:<VWIDE>
4165 (mult:<VWIDE>
4166 (sign_extend:<VWIDE>
4167 (match_operand:VD_HSI 1 "register_operand" "w"))
4168 (sign_extend:<VWIDE>
4169 (vec_duplicate:VD_HSI
4170 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4171 )
4172 (const_int 1)))]
4173 "TARGET_SIMD"
4174 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4175 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4176 )
4177
4178 ;; vqdmull2
4179
4180
4181
4182 (define_insn "aarch64_sqdmull2<mode>_internal"
4183 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4184 (ss_ashift:<VWIDE>
4185 (mult:<VWIDE>
4186 (sign_extend:<VWIDE>
4187 (vec_select:<VHALF>
4188 (match_operand:VQ_HSI 1 "register_operand" "w")
4189 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4190 (sign_extend:<VWIDE>
4191 (vec_select:<VHALF>
4192 (match_operand:VQ_HSI 2 "register_operand" "w")
4193 (match_dup 3)))
4194 )
4195 (const_int 1)))]
4196 "TARGET_SIMD"
4197 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4198 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4199 )
4200
4201 (define_expand "aarch64_sqdmull2<mode>"
4202 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4203 (match_operand:VQ_HSI 1 "register_operand" "w")
4204 (match_operand:VQ_HSI 2 "register_operand" "w")]
4205 "TARGET_SIMD"
4206 {
4207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4208 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4209 operands[2], p));
4210 DONE;
4211 })
4212
4213 ;; vqdmull2_lane
4214
4215 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4216 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4217 (ss_ashift:<VWIDE>
4218 (mult:<VWIDE>
4219 (sign_extend:<VWIDE>
4220 (vec_select:<VHALF>
4221 (match_operand:VQ_HSI 1 "register_operand" "w")
4222 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4223 (sign_extend:<VWIDE>
4224 (vec_duplicate:<VHALF>
4225 (vec_select:<VEL>
4226 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4227 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4228 ))
4229 (const_int 1)))]
4230 "TARGET_SIMD"
4231 {
4232 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4233 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4234 }
4235 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4236 )
4237
4238 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4239 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4240 (ss_ashift:<VWIDE>
4241 (mult:<VWIDE>
4242 (sign_extend:<VWIDE>
4243 (vec_select:<VHALF>
4244 (match_operand:VQ_HSI 1 "register_operand" "w")
4245 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4246 (sign_extend:<VWIDE>
4247 (vec_duplicate:<VHALF>
4248 (vec_select:<VEL>
4249 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4250 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4251 ))
4252 (const_int 1)))]
4253 "TARGET_SIMD"
4254 {
4255 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4256 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4257 }
4258 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4259 )
4260
4261 (define_expand "aarch64_sqdmull2_lane<mode>"
4262 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4263 (match_operand:VQ_HSI 1 "register_operand" "w")
4264 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4265 (match_operand:SI 3 "immediate_operand" "i")]
4266 "TARGET_SIMD"
4267 {
4268 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4269 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4270 operands[2], operands[3],
4271 p));
4272 DONE;
4273 })
4274
4275 (define_expand "aarch64_sqdmull2_laneq<mode>"
4276 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4277 (match_operand:VQ_HSI 1 "register_operand" "w")
4278 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4279 (match_operand:SI 3 "immediate_operand" "i")]
4280 "TARGET_SIMD"
4281 {
4282 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4283 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4284 operands[2], operands[3],
4285 p));
4286 DONE;
4287 })
4288
4289 ;; vqdmull2_n
4290
4291 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4292 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4293 (ss_ashift:<VWIDE>
4294 (mult:<VWIDE>
4295 (sign_extend:<VWIDE>
4296 (vec_select:<VHALF>
4297 (match_operand:VQ_HSI 1 "register_operand" "w")
4298 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4299 (sign_extend:<VWIDE>
4300 (vec_duplicate:<VHALF>
4301 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4302 )
4303 (const_int 1)))]
4304 "TARGET_SIMD"
4305 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4306 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4307 )
4308
4309 (define_expand "aarch64_sqdmull2_n<mode>"
4310 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4311 (match_operand:VQ_HSI 1 "register_operand" "w")
4312 (match_operand:<VEL> 2 "register_operand" "w")]
4313 "TARGET_SIMD"
4314 {
4315 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4316 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4317 operands[2], p));
4318 DONE;
4319 })
4320
4321 ;; vshl
4322
4323 (define_insn "aarch64_<sur>shl<mode>"
4324 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4325 (unspec:VSDQ_I_DI
4326 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4327 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4328 VSHL))]
4329 "TARGET_SIMD"
4330 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4331 [(set_attr "type" "neon_shift_reg<q>")]
4332 )
4333
4334
4335 ;; vqshl
4336
4337 (define_insn "aarch64_<sur>q<r>shl<mode>"
4338 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4339 (unspec:VSDQ_I
4340 [(match_operand:VSDQ_I 1 "register_operand" "w")
4341 (match_operand:VSDQ_I 2 "register_operand" "w")]
4342 VQSHL))]
4343 "TARGET_SIMD"
4344 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4345 [(set_attr "type" "neon_sat_shift_reg<q>")]
4346 )
4347
4348 ;; vshll_n
4349
4350 (define_insn "aarch64_<sur>shll_n<mode>"
4351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4352 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4353 (match_operand:SI 2
4354 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4355 VSHLL))]
4356 "TARGET_SIMD"
4357 {
4358 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4359 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4360 else
4361 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4362 }
4363 [(set_attr "type" "neon_shift_imm_long")]
4364 )
4365
4366 ;; vshll_high_n
4367
4368 (define_insn "aarch64_<sur>shll2_n<mode>"
4369 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4370 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4371 (match_operand:SI 2 "immediate_operand" "i")]
4372 VSHLL))]
4373 "TARGET_SIMD"
4374 {
4375 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4376 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4377 else
4378 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4379 }
4380 [(set_attr "type" "neon_shift_imm_long")]
4381 )
4382
4383 ;; vrshr_n
4384
4385 (define_insn "aarch64_<sur>shr_n<mode>"
4386 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4387 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4388 (match_operand:SI 2
4389 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4390 VRSHR_N))]
4391 "TARGET_SIMD"
4392 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4393 [(set_attr "type" "neon_sat_shift_imm<q>")]
4394 )
4395
4396 ;; v(r)sra_n
4397
4398 (define_insn "aarch64_<sur>sra_n<mode>"
4399 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4400 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4401 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4402 (match_operand:SI 3
4403 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4404 VSRA))]
4405 "TARGET_SIMD"
4406 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4407 [(set_attr "type" "neon_shift_acc<q>")]
4408 )
4409
4410 ;; vs<lr>i_n
4411
4412 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4413 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4414 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4415 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4416 (match_operand:SI 3
4417 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4418 VSLRI))]
4419 "TARGET_SIMD"
4420 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4421 [(set_attr "type" "neon_shift_imm<q>")]
4422 )
4423
4424 ;; vqshl(u)
4425
4426 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4427 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4428 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4429 (match_operand:SI 2
4430 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4431 VQSHL_N))]
4432 "TARGET_SIMD"
4433 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4434 [(set_attr "type" "neon_sat_shift_imm<q>")]
4435 )
4436
4437
4438 ;; vq(r)shr(u)n_n
4439
4440 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4441 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4442 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4443 (match_operand:SI 2
4444 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4445 VQSHRN_N))]
4446 "TARGET_SIMD"
4447 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4448 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4449 )
4450
4451
4452 ;; cm(eq|ge|gt|lt|le)
4453 ;; Note, we have constraints for Dz and Z as different expanders
4454 ;; have different ideas of what should be passed to this pattern.
4455
4456 (define_insn "aarch64_cm<optab><mode>"
4457 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4458 (neg:<V_INT_EQUIV>
4459 (COMPARISONS:<V_INT_EQUIV>
4460 (match_operand:VDQ_I 1 "register_operand" "w,w")
4461 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4462 )))]
4463 "TARGET_SIMD"
4464 "@
4465 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4466 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4467 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4468 )
4469
4470 (define_insn_and_split "aarch64_cm<optab>di"
4471 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4472 (neg:DI
4473 (COMPARISONS:DI
4474 (match_operand:DI 1 "register_operand" "w,w,r")
4475 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4476 )))
4477 (clobber (reg:CC CC_REGNUM))]
4478 "TARGET_SIMD"
4479 "#"
4480 "&& reload_completed"
4481 [(set (match_operand:DI 0 "register_operand")
4482 (neg:DI
4483 (COMPARISONS:DI
4484 (match_operand:DI 1 "register_operand")
4485 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4486 )))]
4487 {
4488 /* If we are in the general purpose register file,
4489 we split to a sequence of comparison and store. */
4490 if (GP_REGNUM_P (REGNO (operands[0]))
4491 && GP_REGNUM_P (REGNO (operands[1])))
4492 {
4493 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4494 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4495 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4496 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4497 DONE;
4498 }
4499 /* Otherwise, we expand to a similar pattern which does not
4500 clobber CC_REGNUM. */
4501 }
4502 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4503 )
4504
4505 (define_insn "*aarch64_cm<optab>di"
4506 [(set (match_operand:DI 0 "register_operand" "=w,w")
4507 (neg:DI
4508 (COMPARISONS:DI
4509 (match_operand:DI 1 "register_operand" "w,w")
4510 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4511 )))]
4512 "TARGET_SIMD && reload_completed"
4513 "@
4514 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4515 cm<optab>\t%d0, %d1, #0"
4516 [(set_attr "type" "neon_compare, neon_compare_zero")]
4517 )
4518
4519 ;; cm(hs|hi)
4520
4521 (define_insn "aarch64_cm<optab><mode>"
4522 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4523 (neg:<V_INT_EQUIV>
4524 (UCOMPARISONS:<V_INT_EQUIV>
4525 (match_operand:VDQ_I 1 "register_operand" "w")
4526 (match_operand:VDQ_I 2 "register_operand" "w")
4527 )))]
4528 "TARGET_SIMD"
4529 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4530 [(set_attr "type" "neon_compare<q>")]
4531 )
4532
4533 (define_insn_and_split "aarch64_cm<optab>di"
4534 [(set (match_operand:DI 0 "register_operand" "=w,r")
4535 (neg:DI
4536 (UCOMPARISONS:DI
4537 (match_operand:DI 1 "register_operand" "w,r")
4538 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4539 )))
4540 (clobber (reg:CC CC_REGNUM))]
4541 "TARGET_SIMD"
4542 "#"
4543 "&& reload_completed"
4544 [(set (match_operand:DI 0 "register_operand")
4545 (neg:DI
4546 (UCOMPARISONS:DI
4547 (match_operand:DI 1 "register_operand")
4548 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4549 )))]
4550 {
4551 /* If we are in the general purpose register file,
4552 we split to a sequence of comparison and store. */
4553 if (GP_REGNUM_P (REGNO (operands[0]))
4554 && GP_REGNUM_P (REGNO (operands[1])))
4555 {
4556 machine_mode mode = CCmode;
4557 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4558 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4559 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4560 DONE;
4561 }
4562 /* Otherwise, we expand to a similar pattern which does not
4563 clobber CC_REGNUM. */
4564 }
4565 [(set_attr "type" "neon_compare,multiple")]
4566 )
4567
4568 (define_insn "*aarch64_cm<optab>di"
4569 [(set (match_operand:DI 0 "register_operand" "=w")
4570 (neg:DI
4571 (UCOMPARISONS:DI
4572 (match_operand:DI 1 "register_operand" "w")
4573 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4574 )))]
4575 "TARGET_SIMD && reload_completed"
4576 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4577 [(set_attr "type" "neon_compare")]
4578 )
4579
4580 ;; cmtst
4581
4582 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4583 ;; we don't have any insns using ne, and aarch64_vcond outputs
4584 ;; not (neg (eq (and x y) 0))
4585 ;; which is rewritten by simplify_rtx as
4586 ;; plus (eq (and x y) 0) -1.
4587
4588 (define_insn "aarch64_cmtst<mode>"
4589 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4590 (plus:<V_INT_EQUIV>
4591 (eq:<V_INT_EQUIV>
4592 (and:VDQ_I
4593 (match_operand:VDQ_I 1 "register_operand" "w")
4594 (match_operand:VDQ_I 2 "register_operand" "w"))
4595 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4596 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4597 ]
4598 "TARGET_SIMD"
4599 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4600 [(set_attr "type" "neon_tst<q>")]
4601 )
4602
4603 (define_insn_and_split "aarch64_cmtstdi"
4604 [(set (match_operand:DI 0 "register_operand" "=w,r")
4605 (neg:DI
4606 (ne:DI
4607 (and:DI
4608 (match_operand:DI 1 "register_operand" "w,r")
4609 (match_operand:DI 2 "register_operand" "w,r"))
4610 (const_int 0))))
4611 (clobber (reg:CC CC_REGNUM))]
4612 "TARGET_SIMD"
4613 "#"
4614 "&& reload_completed"
4615 [(set (match_operand:DI 0 "register_operand")
4616 (neg:DI
4617 (ne:DI
4618 (and:DI
4619 (match_operand:DI 1 "register_operand")
4620 (match_operand:DI 2 "register_operand"))
4621 (const_int 0))))]
4622 {
4623 /* If we are in the general purpose register file,
4624 we split to a sequence of comparison and store. */
4625 if (GP_REGNUM_P (REGNO (operands[0]))
4626 && GP_REGNUM_P (REGNO (operands[1])))
4627 {
4628 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4629 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4630 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4631 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4632 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4633 DONE;
4634 }
4635 /* Otherwise, we expand to a similar pattern which does not
4636 clobber CC_REGNUM. */
4637 }
4638 [(set_attr "type" "neon_tst,multiple")]
4639 )
4640
4641 (define_insn "*aarch64_cmtstdi"
4642 [(set (match_operand:DI 0 "register_operand" "=w")
4643 (neg:DI
4644 (ne:DI
4645 (and:DI
4646 (match_operand:DI 1 "register_operand" "w")
4647 (match_operand:DI 2 "register_operand" "w"))
4648 (const_int 0))))]
4649 "TARGET_SIMD"
4650 "cmtst\t%d0, %d1, %d2"
4651 [(set_attr "type" "neon_tst")]
4652 )
4653
4654 ;; fcm(eq|ge|gt|le|lt)
4655
4656 (define_insn "aarch64_cm<optab><mode>"
4657 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4658 (neg:<V_INT_EQUIV>
4659 (COMPARISONS:<V_INT_EQUIV>
4660 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4661 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4662 )))]
4663 "TARGET_SIMD"
4664 "@
4665 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4666 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4667 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4668 )
4669
4670 ;; fac(ge|gt)
4671 ;; Note we can also handle what would be fac(le|lt) by
4672 ;; generating fac(ge|gt).
4673
4674 (define_insn "aarch64_fac<optab><mode>"
4675 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4676 (neg:<V_INT_EQUIV>
4677 (FAC_COMPARISONS:<V_INT_EQUIV>
4678 (abs:VHSDF_HSDF
4679 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4680 (abs:VHSDF_HSDF
4681 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4682 )))]
4683 "TARGET_SIMD"
4684 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4685 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4686 )
4687
4688 ;; addp
4689
4690 (define_insn "aarch64_addp<mode>"
4691 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4692 (unspec:VD_BHSI
4693 [(match_operand:VD_BHSI 1 "register_operand" "w")
4694 (match_operand:VD_BHSI 2 "register_operand" "w")]
4695 UNSPEC_ADDP))]
4696 "TARGET_SIMD"
4697 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4698 [(set_attr "type" "neon_reduc_add<q>")]
4699 )
4700
4701 (define_insn "aarch64_addpdi"
4702 [(set (match_operand:DI 0 "register_operand" "=w")
4703 (unspec:DI
4704 [(match_operand:V2DI 1 "register_operand" "w")]
4705 UNSPEC_ADDP))]
4706 "TARGET_SIMD"
4707 "addp\t%d0, %1.2d"
4708 [(set_attr "type" "neon_reduc_add")]
4709 )
4710
4711 ;; sqrt
4712
4713 (define_expand "sqrt<mode>2"
4714 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4715 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4716 "TARGET_SIMD"
4717 {
4718 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4719 DONE;
4720 })
4721
4722 (define_insn "*sqrt<mode>2"
4723 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4724 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4725 "TARGET_SIMD"
4726 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4727 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4728 )
4729
4730 ;; Patterns for vector struct loads and stores.
4731
4732 (define_insn "aarch64_simd_ld2<mode>"
4733 [(set (match_operand:OI 0 "register_operand" "=w")
4734 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4735 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4736 UNSPEC_LD2))]
4737 "TARGET_SIMD"
4738 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4739 [(set_attr "type" "neon_load2_2reg<q>")]
4740 )
4741
4742 (define_insn "aarch64_simd_ld2r<mode>"
4743 [(set (match_operand:OI 0 "register_operand" "=w")
4744 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4745 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4746 UNSPEC_LD2_DUP))]
4747 "TARGET_SIMD"
4748 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4749 [(set_attr "type" "neon_load2_all_lanes<q>")]
4750 )
4751
4752 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4753 [(set (match_operand:OI 0 "register_operand" "=w")
4754 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4755 (match_operand:OI 2 "register_operand" "0")
4756 (match_operand:SI 3 "immediate_operand" "i")
4757 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4758 UNSPEC_LD2_LANE))]
4759 "TARGET_SIMD"
4760 {
4761 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4762 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4763 }
4764 [(set_attr "type" "neon_load2_one_lane")]
4765 )
4766
4767 (define_expand "vec_load_lanesoi<mode>"
4768 [(set (match_operand:OI 0 "register_operand" "=w")
4769 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4770 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4771 UNSPEC_LD2))]
4772 "TARGET_SIMD"
4773 {
4774 if (BYTES_BIG_ENDIAN)
4775 {
4776 rtx tmp = gen_reg_rtx (OImode);
4777 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4778 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4779 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4780 }
4781 else
4782 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4783 DONE;
4784 })
4785
4786 (define_insn "aarch64_simd_st2<mode>"
4787 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4788 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4789 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4790 UNSPEC_ST2))]
4791 "TARGET_SIMD"
4792 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4793 [(set_attr "type" "neon_store2_2reg<q>")]
4794 )
4795
4796 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4797 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4798 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4799 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4800 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4801 (match_operand:SI 2 "immediate_operand" "i")]
4802 UNSPEC_ST2_LANE))]
4803 "TARGET_SIMD"
4804 {
4805 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4806 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4807 }
4808 [(set_attr "type" "neon_store2_one_lane<q>")]
4809 )
4810
4811 (define_expand "vec_store_lanesoi<mode>"
4812 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4813 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4814 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4815 UNSPEC_ST2))]
4816 "TARGET_SIMD"
4817 {
4818 if (BYTES_BIG_ENDIAN)
4819 {
4820 rtx tmp = gen_reg_rtx (OImode);
4821 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4822 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4823 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4824 }
4825 else
4826 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4827 DONE;
4828 })
4829
4830 (define_insn "aarch64_simd_ld3<mode>"
4831 [(set (match_operand:CI 0 "register_operand" "=w")
4832 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4833 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4834 UNSPEC_LD3))]
4835 "TARGET_SIMD"
4836 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4837 [(set_attr "type" "neon_load3_3reg<q>")]
4838 )
4839
4840 (define_insn "aarch64_simd_ld3r<mode>"
4841 [(set (match_operand:CI 0 "register_operand" "=w")
4842 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4843 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4844 UNSPEC_LD3_DUP))]
4845 "TARGET_SIMD"
4846 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4847 [(set_attr "type" "neon_load3_all_lanes<q>")]
4848 )
4849
4850 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4851 [(set (match_operand:CI 0 "register_operand" "=w")
4852 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4853 (match_operand:CI 2 "register_operand" "0")
4854 (match_operand:SI 3 "immediate_operand" "i")
4855 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4856 UNSPEC_LD3_LANE))]
4857 "TARGET_SIMD"
4858 {
4859 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4860 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4861 }
4862 [(set_attr "type" "neon_load3_one_lane")]
4863 )
4864
4865 (define_expand "vec_load_lanesci<mode>"
4866 [(set (match_operand:CI 0 "register_operand" "=w")
4867 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4868 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4869 UNSPEC_LD3))]
4870 "TARGET_SIMD"
4871 {
4872 if (BYTES_BIG_ENDIAN)
4873 {
4874 rtx tmp = gen_reg_rtx (CImode);
4875 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4876 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4877 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4878 }
4879 else
4880 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4881 DONE;
4882 })
4883
4884 (define_insn "aarch64_simd_st3<mode>"
4885 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4886 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4887 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4888 UNSPEC_ST3))]
4889 "TARGET_SIMD"
4890 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4891 [(set_attr "type" "neon_store3_3reg<q>")]
4892 )
4893
4894 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4895 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4896 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4897 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4898 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4899 (match_operand:SI 2 "immediate_operand" "i")]
4900 UNSPEC_ST3_LANE))]
4901 "TARGET_SIMD"
4902 {
4903 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4904 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4905 }
4906 [(set_attr "type" "neon_store3_one_lane<q>")]
4907 )
4908
4909 (define_expand "vec_store_lanesci<mode>"
4910 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4911 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4912 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4913 UNSPEC_ST3))]
4914 "TARGET_SIMD"
4915 {
4916 if (BYTES_BIG_ENDIAN)
4917 {
4918 rtx tmp = gen_reg_rtx (CImode);
4919 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4920 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4921 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4922 }
4923 else
4924 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4925 DONE;
4926 })
4927
4928 (define_insn "aarch64_simd_ld4<mode>"
4929 [(set (match_operand:XI 0 "register_operand" "=w")
4930 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4931 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4932 UNSPEC_LD4))]
4933 "TARGET_SIMD"
4934 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4935 [(set_attr "type" "neon_load4_4reg<q>")]
4936 )
4937
4938 (define_insn "aarch64_simd_ld4r<mode>"
4939 [(set (match_operand:XI 0 "register_operand" "=w")
4940 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4941 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4942 UNSPEC_LD4_DUP))]
4943 "TARGET_SIMD"
4944 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4945 [(set_attr "type" "neon_load4_all_lanes<q>")]
4946 )
4947
4948 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4949 [(set (match_operand:XI 0 "register_operand" "=w")
4950 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4951 (match_operand:XI 2 "register_operand" "0")
4952 (match_operand:SI 3 "immediate_operand" "i")
4953 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4954 UNSPEC_LD4_LANE))]
4955 "TARGET_SIMD"
4956 {
4957 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4958 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4959 }
4960 [(set_attr "type" "neon_load4_one_lane")]
4961 )
4962
4963 (define_expand "vec_load_lanesxi<mode>"
4964 [(set (match_operand:XI 0 "register_operand" "=w")
4965 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4966 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4967 UNSPEC_LD4))]
4968 "TARGET_SIMD"
4969 {
4970 if (BYTES_BIG_ENDIAN)
4971 {
4972 rtx tmp = gen_reg_rtx (XImode);
4973 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4974 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4975 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4976 }
4977 else
4978 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4979 DONE;
4980 })
4981
4982 (define_insn "aarch64_simd_st4<mode>"
4983 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4984 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4985 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4986 UNSPEC_ST4))]
4987 "TARGET_SIMD"
4988 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4989 [(set_attr "type" "neon_store4_4reg<q>")]
4990 )
4991
4992 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4993 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4994 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4995 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4996 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4997 (match_operand:SI 2 "immediate_operand" "i")]
4998 UNSPEC_ST4_LANE))]
4999 "TARGET_SIMD"
5000 {
5001 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5002 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5003 }
5004 [(set_attr "type" "neon_store4_one_lane<q>")]
5005 )
5006
5007 (define_expand "vec_store_lanesxi<mode>"
5008 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5009 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5010 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5011 UNSPEC_ST4))]
5012 "TARGET_SIMD"
5013 {
5014 if (BYTES_BIG_ENDIAN)
5015 {
5016 rtx tmp = gen_reg_rtx (XImode);
5017 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5018 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5019 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5020 }
5021 else
5022 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5023 DONE;
5024 })
5025
5026 (define_insn_and_split "aarch64_rev_reglist<mode>"
5027 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5028 (unspec:VSTRUCT
5029 [(match_operand:VSTRUCT 1 "register_operand" "w")
5030 (match_operand:V16QI 2 "register_operand" "w")]
5031 UNSPEC_REV_REGLIST))]
5032 "TARGET_SIMD"
5033 "#"
5034 "&& reload_completed"
5035 [(const_int 0)]
5036 {
5037 int i;
5038 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5039 for (i = 0; i < nregs; i++)
5040 {
5041 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5042 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5043 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5044 }
5045 DONE;
5046 }
5047 [(set_attr "type" "neon_tbl1_q")
5048 (set_attr "length" "<insn_count>")]
5049 )
5050
5051 ;; Reload patterns for AdvSIMD register list operands.
5052
5053 (define_expand "mov<mode>"
5054 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5055 (match_operand:VSTRUCT 1 "general_operand" ""))]
5056 "TARGET_SIMD"
5057 {
5058 if (can_create_pseudo_p ())
5059 {
5060 if (GET_CODE (operands[0]) != REG)
5061 operands[1] = force_reg (<MODE>mode, operands[1]);
5062 }
5063
5064 /* If we have a paradoxical subreg trying to write to <MODE> from and the
5065 registers don't overlap then we need to break it apart. What it's trying
5066 to do is give two kind of information at the same time. It's trying to
5067 convey liveness information by saying that the entire register will be
5068 written to eventually, but it also only wants to write a single part of the
5069 register. Hence the paradoxical subreg.
5070
5071 Instead of allowing this we will split the two concerns. The liveness
5072 information will be conveyed using a clobber and then we break apart the
5073 paradoxical subreg into just a normal write of the part that it wanted to
5074 write originally. */
5075
5076 if (REG_P (operands[0]) && paradoxical_subreg_p (operands[1]))
5077 {
5078 if (!reg_overlap_mentioned_p (operands[0], operands[1]))
5079 emit_clobber (operands[0]);
5080 operands[1] = SUBREG_REG (operands[1]);
5081 operands[0] = gen_lowpart (GET_MODE (operands[1]), operands[0]);
5082 }
5083 })
5084
5085
5086 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5087 [(match_operand:CI 0 "register_operand" "=w")
5088 (match_operand:DI 1 "register_operand" "r")
5089 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5090 "TARGET_SIMD"
5091 {
5092 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5093 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5094 DONE;
5095 })
5096
5097 (define_insn "aarch64_ld1_x3_<mode>"
5098 [(set (match_operand:CI 0 "register_operand" "=w")
5099 (unspec:CI
5100 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5101 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5102 "TARGET_SIMD"
5103 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5104 [(set_attr "type" "neon_load1_3reg<q>")]
5105 )
5106
5107 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5108 [(match_operand:XI 0 "register_operand" "=w")
5109 (match_operand:DI 1 "register_operand" "r")
5110 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5111 "TARGET_SIMD"
5112 {
5113 rtx mem = gen_rtx_MEM (XImode, operands[1]);
5114 emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5115 DONE;
5116 })
5117
5118 (define_insn "aarch64_ld1_x4_<mode>"
5119 [(set (match_operand:XI 0 "register_operand" "=w")
5120 (unspec:XI
5121 [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5122 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5123 UNSPEC_LD1))]
5124 "TARGET_SIMD"
5125 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5126 [(set_attr "type" "neon_load1_4reg<q>")]
5127 )
5128
5129 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5130 [(match_operand:DI 0 "register_operand" "")
5131 (match_operand:OI 1 "register_operand" "")
5132 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5133 "TARGET_SIMD"
5134 {
5135 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5136 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5137 DONE;
5138 })
5139
5140 (define_insn "aarch64_st1_x2_<mode>"
5141 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5142 (unspec:OI
5143 [(match_operand:OI 1 "register_operand" "w")
5144 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5145 "TARGET_SIMD"
5146 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5147 [(set_attr "type" "neon_store1_2reg<q>")]
5148 )
5149
5150 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5151 [(match_operand:DI 0 "register_operand" "")
5152 (match_operand:CI 1 "register_operand" "")
5153 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5154 "TARGET_SIMD"
5155 {
5156 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5157 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5158 DONE;
5159 })
5160
5161 (define_insn "aarch64_st1_x3_<mode>"
5162 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5163 (unspec:CI
5164 [(match_operand:CI 1 "register_operand" "w")
5165 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5166 "TARGET_SIMD"
5167 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5168 [(set_attr "type" "neon_store1_3reg<q>")]
5169 )
5170
5171 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5172 [(match_operand:DI 0 "register_operand" "")
5173 (match_operand:XI 1 "register_operand" "")
5174 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5175 "TARGET_SIMD"
5176 {
5177 rtx mem = gen_rtx_MEM (XImode, operands[0]);
5178 emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5179 DONE;
5180 })
5181
5182 (define_insn "aarch64_st1_x4_<mode>"
5183 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5184 (unspec:XI
5185 [(match_operand:XI 1 "register_operand" "w")
5186 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5187 UNSPEC_ST1))]
5188 "TARGET_SIMD"
5189 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5190 [(set_attr "type" "neon_store1_4reg<q>")]
5191 )
5192
5193 (define_insn "*aarch64_mov<mode>"
5194 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5195 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5196 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5197 && (register_operand (operands[0], <MODE>mode)
5198 || register_operand (operands[1], <MODE>mode))"
5199 "@
5200 #
5201 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5202 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5203 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5204 neon_load<nregs>_<nregs>reg_q")
5205 (set_attr "length" "<insn_count>,4,4")]
5206 )
5207
5208 (define_insn "aarch64_be_ld1<mode>"
5209 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5210 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5211 "aarch64_simd_struct_operand" "Utv")]
5212 UNSPEC_LD1))]
5213 "TARGET_SIMD"
5214 "ld1\\t{%0<Vmtype>}, %1"
5215 [(set_attr "type" "neon_load1_1reg<q>")]
5216 )
5217
5218 (define_insn "aarch64_be_st1<mode>"
5219 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5220 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5221 UNSPEC_ST1))]
5222 "TARGET_SIMD"
5223 "st1\\t{%1<Vmtype>}, %0"
5224 [(set_attr "type" "neon_store1_1reg<q>")]
5225 )
5226
5227 (define_insn "*aarch64_be_movoi"
5228 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5229 (match_operand:OI 1 "general_operand" " w,w,m"))]
5230 "TARGET_SIMD && BYTES_BIG_ENDIAN
5231 && (register_operand (operands[0], OImode)
5232 || register_operand (operands[1], OImode))"
5233 "@
5234 #
5235 stp\\t%q1, %R1, %0
5236 ldp\\t%q0, %R0, %1"
5237 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5238 (set_attr "length" "8,4,4")]
5239 )
5240
5241 (define_insn "*aarch64_be_movci"
5242 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5243 (match_operand:CI 1 "general_operand" " w,w,o"))]
5244 "TARGET_SIMD && BYTES_BIG_ENDIAN
5245 && (register_operand (operands[0], CImode)
5246 || register_operand (operands[1], CImode))"
5247 "#"
5248 [(set_attr "type" "multiple")
5249 (set_attr "length" "12,4,4")]
5250 )
5251
5252 (define_insn "*aarch64_be_movxi"
5253 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5254 (match_operand:XI 1 "general_operand" " w,w,o"))]
5255 "TARGET_SIMD && BYTES_BIG_ENDIAN
5256 && (register_operand (operands[0], XImode)
5257 || register_operand (operands[1], XImode))"
5258 "#"
5259 [(set_attr "type" "multiple")
5260 (set_attr "length" "16,4,4")]
5261 )
5262
5263 (define_split
5264 [(set (match_operand:OI 0 "register_operand")
5265 (match_operand:OI 1 "register_operand"))]
5266 "TARGET_SIMD && reload_completed"
5267 [(const_int 0)]
5268 {
5269 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5270 DONE;
5271 })
5272
5273 (define_split
5274 [(set (match_operand:CI 0 "nonimmediate_operand")
5275 (match_operand:CI 1 "general_operand"))]
5276 "TARGET_SIMD && reload_completed"
5277 [(const_int 0)]
5278 {
5279 if (register_operand (operands[0], CImode)
5280 && register_operand (operands[1], CImode))
5281 {
5282 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5283 DONE;
5284 }
5285 else if (BYTES_BIG_ENDIAN)
5286 {
5287 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5288 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5289 emit_move_insn (gen_lowpart (V16QImode,
5290 simplify_gen_subreg (TImode, operands[0],
5291 CImode, 32)),
5292 gen_lowpart (V16QImode,
5293 simplify_gen_subreg (TImode, operands[1],
5294 CImode, 32)));
5295 DONE;
5296 }
5297 else
5298 FAIL;
5299 })
5300
5301 (define_split
5302 [(set (match_operand:XI 0 "nonimmediate_operand")
5303 (match_operand:XI 1 "general_operand"))]
5304 "TARGET_SIMD && reload_completed"
5305 [(const_int 0)]
5306 {
5307 if (register_operand (operands[0], XImode)
5308 && register_operand (operands[1], XImode))
5309 {
5310 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5311 DONE;
5312 }
5313 else if (BYTES_BIG_ENDIAN)
5314 {
5315 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5316 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5317 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5318 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5319 DONE;
5320 }
5321 else
5322 FAIL;
5323 })
5324
5325 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5326 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5327 (match_operand:DI 1 "register_operand" "w")
5328 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5329 "TARGET_SIMD"
5330 {
5331 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5332 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5333 * <VSTRUCT:nregs>);
5334
5335 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5336 mem));
5337 DONE;
5338 })
5339
5340 (define_insn "aarch64_ld2<mode>_dreg"
5341 [(set (match_operand:OI 0 "register_operand" "=w")
5342 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5343 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5344 UNSPEC_LD2_DREG))]
5345 "TARGET_SIMD"
5346 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5347 [(set_attr "type" "neon_load2_2reg<q>")]
5348 )
5349
5350 (define_insn "aarch64_ld2<mode>_dreg"
5351 [(set (match_operand:OI 0 "register_operand" "=w")
5352 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5353 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5354 UNSPEC_LD2_DREG))]
5355 "TARGET_SIMD"
5356 "ld1\\t{%S0.1d - %T0.1d}, %1"
5357 [(set_attr "type" "neon_load1_2reg<q>")]
5358 )
5359
5360 (define_insn "aarch64_ld3<mode>_dreg"
5361 [(set (match_operand:CI 0 "register_operand" "=w")
5362 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5363 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5364 UNSPEC_LD3_DREG))]
5365 "TARGET_SIMD"
5366 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5367 [(set_attr "type" "neon_load3_3reg<q>")]
5368 )
5369
5370 (define_insn "aarch64_ld3<mode>_dreg"
5371 [(set (match_operand:CI 0 "register_operand" "=w")
5372 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5373 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5374 UNSPEC_LD3_DREG))]
5375 "TARGET_SIMD"
5376 "ld1\\t{%S0.1d - %U0.1d}, %1"
5377 [(set_attr "type" "neon_load1_3reg<q>")]
5378 )
5379
5380 (define_insn "aarch64_ld4<mode>_dreg"
5381 [(set (match_operand:XI 0 "register_operand" "=w")
5382 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5383 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5384 UNSPEC_LD4_DREG))]
5385 "TARGET_SIMD"
5386 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5387 [(set_attr "type" "neon_load4_4reg<q>")]
5388 )
5389
5390 (define_insn "aarch64_ld4<mode>_dreg"
5391 [(set (match_operand:XI 0 "register_operand" "=w")
5392 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5393 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5394 UNSPEC_LD4_DREG))]
5395 "TARGET_SIMD"
5396 "ld1\\t{%S0.1d - %V0.1d}, %1"
5397 [(set_attr "type" "neon_load1_4reg<q>")]
5398 )
5399
5400 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5401 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5402 (match_operand:DI 1 "register_operand" "r")
5403 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5404 "TARGET_SIMD"
5405 {
5406 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5407 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5408
5409 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5410 DONE;
5411 })
5412
5413 (define_expand "aarch64_ld1<VALL_F16:mode>"
5414 [(match_operand:VALL_F16 0 "register_operand")
5415 (match_operand:DI 1 "register_operand")]
5416 "TARGET_SIMD"
5417 {
5418 machine_mode mode = <VALL_F16:MODE>mode;
5419 rtx mem = gen_rtx_MEM (mode, operands[1]);
5420
5421 if (BYTES_BIG_ENDIAN)
5422 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5423 else
5424 emit_move_insn (operands[0], mem);
5425 DONE;
5426 })
5427
5428 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5429 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5430 (match_operand:DI 1 "register_operand" "r")
5431 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5432 "TARGET_SIMD"
5433 {
5434 machine_mode mode = <VSTRUCT:MODE>mode;
5435 rtx mem = gen_rtx_MEM (mode, operands[1]);
5436
5437 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5438 DONE;
5439 })
5440
5441 (define_expand "aarch64_ld1x2<VQ:mode>"
5442 [(match_operand:OI 0 "register_operand" "=w")
5443 (match_operand:DI 1 "register_operand" "r")
5444 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5445 "TARGET_SIMD"
5446 {
5447 machine_mode mode = OImode;
5448 rtx mem = gen_rtx_MEM (mode, operands[1]);
5449
5450 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5451 DONE;
5452 })
5453
5454 (define_expand "aarch64_ld1x2<VDC:mode>"
5455 [(match_operand:OI 0 "register_operand" "=w")
5456 (match_operand:DI 1 "register_operand" "r")
5457 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5458 "TARGET_SIMD"
5459 {
5460 machine_mode mode = OImode;
5461 rtx mem = gen_rtx_MEM (mode, operands[1]);
5462
5463 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5464 DONE;
5465 })
5466
5467
5468 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5469 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5470 (match_operand:DI 1 "register_operand" "w")
5471 (match_operand:VSTRUCT 2 "register_operand" "0")
5472 (match_operand:SI 3 "immediate_operand" "i")
5473 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5474 "TARGET_SIMD"
5475 {
5476 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5477 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5478 * <VSTRUCT:nregs>);
5479
5480 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5481 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5482 operands[0], mem, operands[2], operands[3]));
5483 DONE;
5484 })
5485
5486 ;; Expanders for builtins to extract vector registers from large
5487 ;; opaque integer modes.
5488
5489 ;; D-register list.
5490
5491 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5492 [(match_operand:VDC 0 "register_operand" "=w")
5493 (match_operand:VSTRUCT 1 "register_operand" "w")
5494 (match_operand:SI 2 "immediate_operand" "i")]
5495 "TARGET_SIMD"
5496 {
5497 int part = INTVAL (operands[2]);
5498 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5499 int offset = part * 16;
5500
5501 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5502 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5503 DONE;
5504 })
5505
5506 ;; Q-register list.
5507
5508 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5509 [(match_operand:VQ 0 "register_operand" "=w")
5510 (match_operand:VSTRUCT 1 "register_operand" "w")
5511 (match_operand:SI 2 "immediate_operand" "i")]
5512 "TARGET_SIMD"
5513 {
5514 int part = INTVAL (operands[2]);
5515 int offset = part * 16;
5516
5517 emit_move_insn (operands[0],
5518 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5519 DONE;
5520 })
5521
5522 ;; Permuted-store expanders for neon intrinsics.
5523
5524 ;; Permute instructions
5525
5526 ;; vec_perm support
5527
5528 (define_expand "vec_perm<mode>"
5529 [(match_operand:VB 0 "register_operand")
5530 (match_operand:VB 1 "register_operand")
5531 (match_operand:VB 2 "register_operand")
5532 (match_operand:VB 3 "register_operand")]
5533 "TARGET_SIMD"
5534 {
5535 aarch64_expand_vec_perm (operands[0], operands[1],
5536 operands[2], operands[3], <nunits>);
5537 DONE;
5538 })
5539
5540 (define_insn "aarch64_tbl1<mode>"
5541 [(set (match_operand:VB 0 "register_operand" "=w")
5542 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5543 (match_operand:VB 2 "register_operand" "w")]
5544 UNSPEC_TBL))]
5545 "TARGET_SIMD"
5546 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5547 [(set_attr "type" "neon_tbl1<q>")]
5548 )
5549
5550 ;; Two source registers.
5551
5552 (define_insn "aarch64_tbl2v16qi"
5553 [(set (match_operand:V16QI 0 "register_operand" "=w")
5554 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5555 (match_operand:V16QI 2 "register_operand" "w")]
5556 UNSPEC_TBL))]
5557 "TARGET_SIMD"
5558 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5559 [(set_attr "type" "neon_tbl2_q")]
5560 )
5561
5562 (define_insn "aarch64_tbl3<mode>"
5563 [(set (match_operand:VB 0 "register_operand" "=w")
5564 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5565 (match_operand:VB 2 "register_operand" "w")]
5566 UNSPEC_TBL))]
5567 "TARGET_SIMD"
5568 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5569 [(set_attr "type" "neon_tbl3")]
5570 )
5571
5572 (define_insn "aarch64_tbx4<mode>"
5573 [(set (match_operand:VB 0 "register_operand" "=w")
5574 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5575 (match_operand:OI 2 "register_operand" "w")
5576 (match_operand:VB 3 "register_operand" "w")]
5577 UNSPEC_TBX))]
5578 "TARGET_SIMD"
5579 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5580 [(set_attr "type" "neon_tbl4")]
5581 )
5582
5583 ;; Three source registers.
5584
5585 (define_insn "aarch64_qtbl3<mode>"
5586 [(set (match_operand:VB 0 "register_operand" "=w")
5587 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5588 (match_operand:VB 2 "register_operand" "w")]
5589 UNSPEC_TBL))]
5590 "TARGET_SIMD"
5591 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5592 [(set_attr "type" "neon_tbl3")]
5593 )
5594
5595 (define_insn "aarch64_qtbx3<mode>"
5596 [(set (match_operand:VB 0 "register_operand" "=w")
5597 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5598 (match_operand:CI 2 "register_operand" "w")
5599 (match_operand:VB 3 "register_operand" "w")]
5600 UNSPEC_TBX))]
5601 "TARGET_SIMD"
5602 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5603 [(set_attr "type" "neon_tbl3")]
5604 )
5605
5606 ;; Four source registers.
5607
5608 (define_insn "aarch64_qtbl4<mode>"
5609 [(set (match_operand:VB 0 "register_operand" "=w")
5610 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5611 (match_operand:VB 2 "register_operand" "w")]
5612 UNSPEC_TBL))]
5613 "TARGET_SIMD"
5614 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5615 [(set_attr "type" "neon_tbl4")]
5616 )
5617
5618 (define_insn "aarch64_qtbx4<mode>"
5619 [(set (match_operand:VB 0 "register_operand" "=w")
5620 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5621 (match_operand:XI 2 "register_operand" "w")
5622 (match_operand:VB 3 "register_operand" "w")]
5623 UNSPEC_TBX))]
5624 "TARGET_SIMD"
5625 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5626 [(set_attr "type" "neon_tbl4")]
5627 )
5628
5629 (define_insn_and_split "aarch64_combinev16qi"
5630 [(set (match_operand:OI 0 "register_operand" "=w")
5631 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5632 (match_operand:V16QI 2 "register_operand" "w")]
5633 UNSPEC_CONCAT))]
5634 "TARGET_SIMD"
5635 "#"
5636 "&& reload_completed"
5637 [(const_int 0)]
5638 {
5639 aarch64_split_combinev16qi (operands);
5640 DONE;
5641 }
5642 [(set_attr "type" "multiple")]
5643 )
5644
5645 ;; This instruction's pattern is generated directly by
5646 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5647 ;; need corresponding changes there.
5648 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5649 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5650 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5651 (match_operand:VALL_F16 2 "register_operand" "w")]
5652 PERMUTE))]
5653 "TARGET_SIMD"
5654 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5655 [(set_attr "type" "neon_permute<q>")]
5656 )
5657
5658 ;; This instruction's pattern is generated directly by
5659 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5660 ;; need corresponding changes there. Note that the immediate (third)
5661 ;; operand is a lane index not a byte index.
5662 (define_insn "aarch64_ext<mode>"
5663 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5664 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5665 (match_operand:VALL_F16 2 "register_operand" "w")
5666 (match_operand:SI 3 "immediate_operand" "i")]
5667 UNSPEC_EXT))]
5668 "TARGET_SIMD"
5669 {
5670 operands[3] = GEN_INT (INTVAL (operands[3])
5671 * GET_MODE_UNIT_SIZE (<MODE>mode));
5672 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5673 }
5674 [(set_attr "type" "neon_ext<q>")]
5675 )
5676
5677 ;; This instruction's pattern is generated directly by
5678 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5679 ;; need corresponding changes there.
5680 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5681 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5682 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5683 REVERSE))]
5684 "TARGET_SIMD"
5685 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5686 [(set_attr "type" "neon_rev<q>")]
5687 )
5688
5689 (define_insn "aarch64_st2<mode>_dreg"
5690 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5691 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5692 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5693 UNSPEC_ST2))]
5694 "TARGET_SIMD"
5695 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5696 [(set_attr "type" "neon_store2_2reg")]
5697 )
5698
5699 (define_insn "aarch64_st2<mode>_dreg"
5700 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5701 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5702 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5703 UNSPEC_ST2))]
5704 "TARGET_SIMD"
5705 "st1\\t{%S1.1d - %T1.1d}, %0"
5706 [(set_attr "type" "neon_store1_2reg")]
5707 )
5708
5709 (define_insn "aarch64_st3<mode>_dreg"
5710 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5711 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5712 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5713 UNSPEC_ST3))]
5714 "TARGET_SIMD"
5715 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5716 [(set_attr "type" "neon_store3_3reg")]
5717 )
5718
5719 (define_insn "aarch64_st3<mode>_dreg"
5720 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5721 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5722 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5723 UNSPEC_ST3))]
5724 "TARGET_SIMD"
5725 "st1\\t{%S1.1d - %U1.1d}, %0"
5726 [(set_attr "type" "neon_store1_3reg")]
5727 )
5728
5729 (define_insn "aarch64_st4<mode>_dreg"
5730 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5731 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5732 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5733 UNSPEC_ST4))]
5734 "TARGET_SIMD"
5735 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5736 [(set_attr "type" "neon_store4_4reg")]
5737 )
5738
5739 (define_insn "aarch64_st4<mode>_dreg"
5740 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5741 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5742 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5743 UNSPEC_ST4))]
5744 "TARGET_SIMD"
5745 "st1\\t{%S1.1d - %V1.1d}, %0"
5746 [(set_attr "type" "neon_store1_4reg")]
5747 )
5748
5749 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5750 [(match_operand:DI 0 "register_operand" "r")
5751 (match_operand:VSTRUCT 1 "register_operand" "w")
5752 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5753 "TARGET_SIMD"
5754 {
5755 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5756 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5757
5758 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5759 DONE;
5760 })
5761
5762 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5763 [(match_operand:DI 0 "register_operand" "r")
5764 (match_operand:VSTRUCT 1 "register_operand" "w")
5765 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5766 "TARGET_SIMD"
5767 {
5768 machine_mode mode = <VSTRUCT:MODE>mode;
5769 rtx mem = gen_rtx_MEM (mode, operands[0]);
5770
5771 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5772 DONE;
5773 })
5774
5775 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5776 [(match_operand:DI 0 "register_operand" "r")
5777 (match_operand:VSTRUCT 1 "register_operand" "w")
5778 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5779 (match_operand:SI 2 "immediate_operand")]
5780 "TARGET_SIMD"
5781 {
5782 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5783 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5784 * <VSTRUCT:nregs>);
5785
5786 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5787 mem, operands[1], operands[2]));
5788 DONE;
5789 })
5790
5791 (define_expand "aarch64_st1<VALL_F16:mode>"
5792 [(match_operand:DI 0 "register_operand")
5793 (match_operand:VALL_F16 1 "register_operand")]
5794 "TARGET_SIMD"
5795 {
5796 machine_mode mode = <VALL_F16:MODE>mode;
5797 rtx mem = gen_rtx_MEM (mode, operands[0]);
5798
5799 if (BYTES_BIG_ENDIAN)
5800 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5801 else
5802 emit_move_insn (mem, operands[1]);
5803 DONE;
5804 })
5805
5806 ;; Expander for builtins to insert vector registers into large
5807 ;; opaque integer modes.
5808
5809 ;; Q-register list. We don't need a D-reg inserter as we zero
5810 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5811
5812 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5813 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5814 (match_operand:VSTRUCT 1 "register_operand" "0")
5815 (match_operand:VQ 2 "register_operand" "w")
5816 (match_operand:SI 3 "immediate_operand" "i")]
5817 "TARGET_SIMD"
5818 {
5819 int part = INTVAL (operands[3]);
5820 int offset = part * 16;
5821
5822 emit_move_insn (operands[0], operands[1]);
5823 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5824 operands[2]);
5825 DONE;
5826 })
5827
5828 ;; Standard pattern name vec_init<mode><Vel>.
5829
5830 (define_expand "vec_init<mode><Vel>"
5831 [(match_operand:VALL_F16 0 "register_operand" "")
5832 (match_operand 1 "" "")]
5833 "TARGET_SIMD"
5834 {
5835 aarch64_expand_vector_init (operands[0], operands[1]);
5836 DONE;
5837 })
5838
5839 (define_insn "*aarch64_simd_ld1r<mode>"
5840 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5841 (vec_duplicate:VALL_F16
5842 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5843 "TARGET_SIMD"
5844 "ld1r\\t{%0.<Vtype>}, %1"
5845 [(set_attr "type" "neon_load1_all_lanes")]
5846 )
5847
5848 (define_insn "aarch64_simd_ld1<mode>_x2"
5849 [(set (match_operand:OI 0 "register_operand" "=w")
5850 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5851 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5852 UNSPEC_LD1))]
5853 "TARGET_SIMD"
5854 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5855 [(set_attr "type" "neon_load1_2reg<q>")]
5856 )
5857
5858 (define_insn "aarch64_simd_ld1<mode>_x2"
5859 [(set (match_operand:OI 0 "register_operand" "=w")
5860 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5861 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5862 UNSPEC_LD1))]
5863 "TARGET_SIMD"
5864 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5865 [(set_attr "type" "neon_load1_2reg<q>")]
5866 )
5867
5868
5869 (define_insn "aarch64_frecpe<mode>"
5870 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5871 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5872 UNSPEC_FRECPE))]
5873 "TARGET_SIMD"
5874 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5875 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5876 )
5877
5878 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5879 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5880 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5881 FRECP))]
5882 "TARGET_SIMD"
5883 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5884 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5885 )
5886
5887 (define_insn "aarch64_frecps<mode>"
5888 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5889 (unspec:VHSDF_HSDF
5890 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5891 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5892 UNSPEC_FRECPS))]
5893 "TARGET_SIMD"
5894 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5895 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5896 )
5897
5898 (define_insn "aarch64_urecpe<mode>"
5899 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5900 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5901 UNSPEC_URECPE))]
5902 "TARGET_SIMD"
5903 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5904 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5905
5906 ;; Standard pattern name vec_extract<mode><Vel>.
5907
5908 (define_expand "vec_extract<mode><Vel>"
5909 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5910 (match_operand:VALL_F16 1 "register_operand" "")
5911 (match_operand:SI 2 "immediate_operand" "")]
5912 "TARGET_SIMD"
5913 {
5914 emit_insn
5915 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5916 DONE;
5917 })
5918
5919 ;; aes
5920
5921 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5922 [(set (match_operand:V16QI 0 "register_operand" "=w")
5923 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5924 (match_operand:V16QI 2 "register_operand" "w")]
5925 CRYPTO_AES))]
5926 "TARGET_SIMD && TARGET_AES"
5927 "aes<aes_op>\\t%0.16b, %2.16b"
5928 [(set_attr "type" "crypto_aese")]
5929 )
5930
5931 ;; When AES/AESMC fusion is enabled we want the register allocation to
5932 ;; look like:
5933 ;; AESE Vn, _
5934 ;; AESMC Vn, Vn
5935 ;; So prefer to tie operand 1 to operand 0 when fusing.
5936
5937 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5938 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5939 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5940 CRYPTO_AESMC))]
5941 "TARGET_SIMD && TARGET_AES"
5942 "aes<aesmc_op>\\t%0.16b, %1.16b"
5943 [(set_attr "type" "crypto_aesmc")
5944 (set_attr_alternative "enabled"
5945 [(if_then_else (match_test
5946 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5947 (const_string "yes" )
5948 (const_string "no"))
5949 (const_string "yes")])]
5950 )
5951
5952 ;; sha1
5953
5954 (define_insn "aarch64_crypto_sha1hsi"
5955 [(set (match_operand:SI 0 "register_operand" "=w")
5956 (unspec:SI [(match_operand:SI 1
5957 "register_operand" "w")]
5958 UNSPEC_SHA1H))]
5959 "TARGET_SIMD && TARGET_SHA2"
5960 "sha1h\\t%s0, %s1"
5961 [(set_attr "type" "crypto_sha1_fast")]
5962 )
5963
5964 (define_insn "aarch64_crypto_sha1hv4si"
5965 [(set (match_operand:SI 0 "register_operand" "=w")
5966 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5967 (parallel [(const_int 0)]))]
5968 UNSPEC_SHA1H))]
5969 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5970 "sha1h\\t%s0, %s1"
5971 [(set_attr "type" "crypto_sha1_fast")]
5972 )
5973
5974 (define_insn "aarch64_be_crypto_sha1hv4si"
5975 [(set (match_operand:SI 0 "register_operand" "=w")
5976 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5977 (parallel [(const_int 3)]))]
5978 UNSPEC_SHA1H))]
5979 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5980 "sha1h\\t%s0, %s1"
5981 [(set_attr "type" "crypto_sha1_fast")]
5982 )
5983
5984 (define_insn "aarch64_crypto_sha1su1v4si"
5985 [(set (match_operand:V4SI 0 "register_operand" "=w")
5986 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5987 (match_operand:V4SI 2 "register_operand" "w")]
5988 UNSPEC_SHA1SU1))]
5989 "TARGET_SIMD && TARGET_SHA2"
5990 "sha1su1\\t%0.4s, %2.4s"
5991 [(set_attr "type" "crypto_sha1_fast")]
5992 )
5993
5994 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5995 [(set (match_operand:V4SI 0 "register_operand" "=w")
5996 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5997 (match_operand:SI 2 "register_operand" "w")
5998 (match_operand:V4SI 3 "register_operand" "w")]
5999 CRYPTO_SHA1))]
6000 "TARGET_SIMD && TARGET_SHA2"
6001 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6002 [(set_attr "type" "crypto_sha1_slow")]
6003 )
6004
6005 (define_insn "aarch64_crypto_sha1su0v4si"
6006 [(set (match_operand:V4SI 0 "register_operand" "=w")
6007 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6008 (match_operand:V4SI 2 "register_operand" "w")
6009 (match_operand:V4SI 3 "register_operand" "w")]
6010 UNSPEC_SHA1SU0))]
6011 "TARGET_SIMD && TARGET_SHA2"
6012 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6013 [(set_attr "type" "crypto_sha1_xor")]
6014 )
6015
6016 ;; sha256
6017
6018 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6019 [(set (match_operand:V4SI 0 "register_operand" "=w")
6020 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6021 (match_operand:V4SI 2 "register_operand" "w")
6022 (match_operand:V4SI 3 "register_operand" "w")]
6023 CRYPTO_SHA256))]
6024 "TARGET_SIMD && TARGET_SHA2"
6025 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6026 [(set_attr "type" "crypto_sha256_slow")]
6027 )
6028
6029 (define_insn "aarch64_crypto_sha256su0v4si"
6030 [(set (match_operand:V4SI 0 "register_operand" "=w")
6031 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6032 (match_operand:V4SI 2 "register_operand" "w")]
6033 UNSPEC_SHA256SU0))]
6034 "TARGET_SIMD && TARGET_SHA2"
6035 "sha256su0\\t%0.4s, %2.4s"
6036 [(set_attr "type" "crypto_sha256_fast")]
6037 )
6038
6039 (define_insn "aarch64_crypto_sha256su1v4si"
6040 [(set (match_operand:V4SI 0 "register_operand" "=w")
6041 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6042 (match_operand:V4SI 2 "register_operand" "w")
6043 (match_operand:V4SI 3 "register_operand" "w")]
6044 UNSPEC_SHA256SU1))]
6045 "TARGET_SIMD && TARGET_SHA2"
6046 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6047 [(set_attr "type" "crypto_sha256_slow")]
6048 )
6049
6050 ;; sha512
6051
6052 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6053 [(set (match_operand:V2DI 0 "register_operand" "=w")
6054 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6055 (match_operand:V2DI 2 "register_operand" "w")
6056 (match_operand:V2DI 3 "register_operand" "w")]
6057 CRYPTO_SHA512))]
6058 "TARGET_SIMD && TARGET_SHA3"
6059 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6060 [(set_attr "type" "crypto_sha512")]
6061 )
6062
6063 (define_insn "aarch64_crypto_sha512su0qv2di"
6064 [(set (match_operand:V2DI 0 "register_operand" "=w")
6065 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6066 (match_operand:V2DI 2 "register_operand" "w")]
6067 UNSPEC_SHA512SU0))]
6068 "TARGET_SIMD && TARGET_SHA3"
6069 "sha512su0\\t%0.2d, %2.2d"
6070 [(set_attr "type" "crypto_sha512")]
6071 )
6072
6073 (define_insn "aarch64_crypto_sha512su1qv2di"
6074 [(set (match_operand:V2DI 0 "register_operand" "=w")
6075 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6076 (match_operand:V2DI 2 "register_operand" "w")
6077 (match_operand:V2DI 3 "register_operand" "w")]
6078 UNSPEC_SHA512SU1))]
6079 "TARGET_SIMD && TARGET_SHA3"
6080 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6081 [(set_attr "type" "crypto_sha512")]
6082 )
6083
6084 ;; sha3
6085
6086 (define_insn "aarch64_eor3qv8hi"
6087 [(set (match_operand:V8HI 0 "register_operand" "=w")
6088 (xor:V8HI
6089 (xor:V8HI
6090 (match_operand:V8HI 2 "register_operand" "%w")
6091 (match_operand:V8HI 3 "register_operand" "w"))
6092 (match_operand:V8HI 1 "register_operand" "w")))]
6093 "TARGET_SIMD && TARGET_SHA3"
6094 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6095 [(set_attr "type" "crypto_sha3")]
6096 )
6097
6098 (define_insn "aarch64_rax1qv2di"
6099 [(set (match_operand:V2DI 0 "register_operand" "=w")
6100 (xor:V2DI
6101 (rotate:V2DI
6102 (match_operand:V2DI 2 "register_operand" "w")
6103 (const_int 1))
6104 (match_operand:V2DI 1 "register_operand" "w")))]
6105 "TARGET_SIMD && TARGET_SHA3"
6106 "rax1\\t%0.2d, %1.2d, %2.2d"
6107 [(set_attr "type" "crypto_sha3")]
6108 )
6109
6110 (define_insn "aarch64_xarqv2di"
6111 [(set (match_operand:V2DI 0 "register_operand" "=w")
6112 (rotatert:V2DI
6113 (xor:V2DI
6114 (match_operand:V2DI 1 "register_operand" "%w")
6115 (match_operand:V2DI 2 "register_operand" "w"))
6116 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6117 "TARGET_SIMD && TARGET_SHA3"
6118 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6119 [(set_attr "type" "crypto_sha3")]
6120 )
6121
6122 (define_insn "aarch64_bcaxqv8hi"
6123 [(set (match_operand:V8HI 0 "register_operand" "=w")
6124 (xor:V8HI
6125 (and:V8HI
6126 (not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
6127 (match_operand:V8HI 2 "register_operand" "w"))
6128 (match_operand:V8HI 1 "register_operand" "w")))]
6129 "TARGET_SIMD && TARGET_SHA3"
6130 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6131 [(set_attr "type" "crypto_sha3")]
6132 )
6133
6134 ;; SM3
6135
6136 (define_insn "aarch64_sm3ss1qv4si"
6137 [(set (match_operand:V4SI 0 "register_operand" "=w")
6138 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6139 (match_operand:V4SI 2 "register_operand" "w")
6140 (match_operand:V4SI 3 "register_operand" "w")]
6141 UNSPEC_SM3SS1))]
6142 "TARGET_SIMD && TARGET_SM4"
6143 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6144 [(set_attr "type" "crypto_sm3")]
6145 )
6146
6147
6148 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6149 [(set (match_operand:V4SI 0 "register_operand" "=w")
6150 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6151 (match_operand:V4SI 2 "register_operand" "w")
6152 (match_operand:V4SI 3 "register_operand" "w")
6153 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6154 CRYPTO_SM3TT))]
6155 "TARGET_SIMD && TARGET_SM4"
6156 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6157 [(set_attr "type" "crypto_sm3")]
6158 )
6159
6160 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6161 [(set (match_operand:V4SI 0 "register_operand" "=w")
6162 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6163 (match_operand:V4SI 2 "register_operand" "w")
6164 (match_operand:V4SI 3 "register_operand" "w")]
6165 CRYPTO_SM3PART))]
6166 "TARGET_SIMD && TARGET_SM4"
6167 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6168 [(set_attr "type" "crypto_sm3")]
6169 )
6170
6171 ;; SM4
6172
6173 (define_insn "aarch64_sm4eqv4si"
6174 [(set (match_operand:V4SI 0 "register_operand" "=w")
6175 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6176 (match_operand:V4SI 2 "register_operand" "w")]
6177 UNSPEC_SM4E))]
6178 "TARGET_SIMD && TARGET_SM4"
6179 "sm4e\\t%0.4s, %2.4s"
6180 [(set_attr "type" "crypto_sm4")]
6181 )
6182
6183 (define_insn "aarch64_sm4ekeyqv4si"
6184 [(set (match_operand:V4SI 0 "register_operand" "=w")
6185 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6186 (match_operand:V4SI 2 "register_operand" "w")]
6187 UNSPEC_SM4EKEY))]
6188 "TARGET_SIMD && TARGET_SM4"
6189 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6190 [(set_attr "type" "crypto_sm4")]
6191 )
6192
6193 ;; fp16fml
6194
6195 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6196 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6197 (unspec:VDQSF
6198 [(match_operand:VDQSF 1 "register_operand" "0")
6199 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6200 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6201 VFMLA16_LOW))]
6202 "TARGET_F16FML"
6203 {
6204 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6205 <nunits> * 2, false);
6206 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6207 <nunits> * 2, false);
6208
6209 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6210 operands[1],
6211 operands[2],
6212 operands[3],
6213 p1, p2));
6214 DONE;
6215
6216 })
6217
6218 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6219 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6220 (unspec:VDQSF
6221 [(match_operand:VDQSF 1 "register_operand" "0")
6222 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6223 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6224 VFMLA16_HIGH))]
6225 "TARGET_F16FML"
6226 {
6227 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6228 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6229
6230 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6231 operands[1],
6232 operands[2],
6233 operands[3],
6234 p1, p2));
6235 DONE;
6236 })
6237
6238 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6239 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6240 (fma:VDQSF
6241 (float_extend:VDQSF
6242 (vec_select:<VFMLA_SEL_W>
6243 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6244 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6245 (float_extend:VDQSF
6246 (vec_select:<VFMLA_SEL_W>
6247 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6248 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6249 (match_operand:VDQSF 1 "register_operand" "0")))]
6250 "TARGET_F16FML"
6251 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6252 [(set_attr "type" "neon_fp_mul_s")]
6253 )
6254
6255 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6256 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6257 (fma:VDQSF
6258 (float_extend:VDQSF
6259 (neg:<VFMLA_SEL_W>
6260 (vec_select:<VFMLA_SEL_W>
6261 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6262 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6263 (float_extend:VDQSF
6264 (vec_select:<VFMLA_SEL_W>
6265 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6266 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6267 (match_operand:VDQSF 1 "register_operand" "0")))]
6268 "TARGET_F16FML"
6269 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6270 [(set_attr "type" "neon_fp_mul_s")]
6271 )
6272
6273 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6274 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6275 (fma:VDQSF
6276 (float_extend:VDQSF
6277 (vec_select:<VFMLA_SEL_W>
6278 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6279 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6280 (float_extend:VDQSF
6281 (vec_select:<VFMLA_SEL_W>
6282 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6283 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6284 (match_operand:VDQSF 1 "register_operand" "0")))]
6285 "TARGET_F16FML"
6286 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6287 [(set_attr "type" "neon_fp_mul_s")]
6288 )
6289
6290 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6291 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6292 (fma:VDQSF
6293 (float_extend:VDQSF
6294 (neg:<VFMLA_SEL_W>
6295 (vec_select:<VFMLA_SEL_W>
6296 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6297 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6298 (float_extend:VDQSF
6299 (vec_select:<VFMLA_SEL_W>
6300 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6301 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6302 (match_operand:VDQSF 1 "register_operand" "0")))]
6303 "TARGET_F16FML"
6304 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6305 [(set_attr "type" "neon_fp_mul_s")]
6306 )
6307
6308 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6309 [(set (match_operand:V2SF 0 "register_operand" "")
6310 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6311 (match_operand:V4HF 2 "register_operand" "")
6312 (match_operand:V4HF 3 "register_operand" "")
6313 (match_operand:SI 4 "aarch64_imm2" "")]
6314 VFMLA16_LOW))]
6315 "TARGET_F16FML"
6316 {
6317 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6318 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6319
6320 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6321 operands[1],
6322 operands[2],
6323 operands[3],
6324 p1, lane));
6325 DONE;
6326 }
6327 )
6328
6329 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6330 [(set (match_operand:V2SF 0 "register_operand" "")
6331 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6332 (match_operand:V4HF 2 "register_operand" "")
6333 (match_operand:V4HF 3 "register_operand" "")
6334 (match_operand:SI 4 "aarch64_imm2" "")]
6335 VFMLA16_HIGH))]
6336 "TARGET_F16FML"
6337 {
6338 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6339 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6340
6341 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6342 operands[1],
6343 operands[2],
6344 operands[3],
6345 p1, lane));
6346 DONE;
6347 })
6348
6349 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6350 [(set (match_operand:V2SF 0 "register_operand" "=w")
6351 (fma:V2SF
6352 (float_extend:V2SF
6353 (vec_select:V2HF
6354 (match_operand:V4HF 2 "register_operand" "w")
6355 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6356 (float_extend:V2SF
6357 (vec_duplicate:V2HF
6358 (vec_select:HF
6359 (match_operand:V4HF 3 "register_operand" "x")
6360 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6361 (match_operand:V2SF 1 "register_operand" "0")))]
6362 "TARGET_F16FML"
6363 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6364 [(set_attr "type" "neon_fp_mul_s")]
6365 )
6366
6367 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6368 [(set (match_operand:V2SF 0 "register_operand" "=w")
6369 (fma:V2SF
6370 (float_extend:V2SF
6371 (neg:V2HF
6372 (vec_select:V2HF
6373 (match_operand:V4HF 2 "register_operand" "w")
6374 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6375 (float_extend:V2SF
6376 (vec_duplicate:V2HF
6377 (vec_select:HF
6378 (match_operand:V4HF 3 "register_operand" "x")
6379 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6380 (match_operand:V2SF 1 "register_operand" "0")))]
6381 "TARGET_F16FML"
6382 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6383 [(set_attr "type" "neon_fp_mul_s")]
6384 )
6385
6386 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6387 [(set (match_operand:V2SF 0 "register_operand" "=w")
6388 (fma:V2SF
6389 (float_extend:V2SF
6390 (vec_select:V2HF
6391 (match_operand:V4HF 2 "register_operand" "w")
6392 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6393 (float_extend:V2SF
6394 (vec_duplicate:V2HF
6395 (vec_select:HF
6396 (match_operand:V4HF 3 "register_operand" "x")
6397 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6398 (match_operand:V2SF 1 "register_operand" "0")))]
6399 "TARGET_F16FML"
6400 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6401 [(set_attr "type" "neon_fp_mul_s")]
6402 )
6403
6404 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6405 [(set (match_operand:V2SF 0 "register_operand" "=w")
6406 (fma:V2SF
6407 (float_extend:V2SF
6408 (neg:V2HF
6409 (vec_select:V2HF
6410 (match_operand:V4HF 2 "register_operand" "w")
6411 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6412 (float_extend:V2SF
6413 (vec_duplicate:V2HF
6414 (vec_select:HF
6415 (match_operand:V4HF 3 "register_operand" "x")
6416 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6417 (match_operand:V2SF 1 "register_operand" "0")))]
6418 "TARGET_F16FML"
6419 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6420 [(set_attr "type" "neon_fp_mul_s")]
6421 )
6422
6423 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6424 [(set (match_operand:V4SF 0 "register_operand" "")
6425 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6426 (match_operand:V8HF 2 "register_operand" "")
6427 (match_operand:V8HF 3 "register_operand" "")
6428 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6429 VFMLA16_LOW))]
6430 "TARGET_F16FML"
6431 {
6432 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6433 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6434
6435 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6436 operands[1],
6437 operands[2],
6438 operands[3],
6439 p1, lane));
6440 DONE;
6441 })
6442
6443 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6444 [(set (match_operand:V4SF 0 "register_operand" "")
6445 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6446 (match_operand:V8HF 2 "register_operand" "")
6447 (match_operand:V8HF 3 "register_operand" "")
6448 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6449 VFMLA16_HIGH))]
6450 "TARGET_F16FML"
6451 {
6452 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6453 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6454
6455 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6456 operands[1],
6457 operands[2],
6458 operands[3],
6459 p1, lane));
6460 DONE;
6461 })
6462
6463 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6464 [(set (match_operand:V4SF 0 "register_operand" "=w")
6465 (fma:V4SF
6466 (float_extend:V4SF
6467 (vec_select:V4HF
6468 (match_operand:V8HF 2 "register_operand" "w")
6469 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6470 (float_extend:V4SF
6471 (vec_duplicate:V4HF
6472 (vec_select:HF
6473 (match_operand:V8HF 3 "register_operand" "x")
6474 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6475 (match_operand:V4SF 1 "register_operand" "0")))]
6476 "TARGET_F16FML"
6477 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6478 [(set_attr "type" "neon_fp_mul_s")]
6479 )
6480
6481 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6482 [(set (match_operand:V4SF 0 "register_operand" "=w")
6483 (fma:V4SF
6484 (float_extend:V4SF
6485 (neg:V4HF
6486 (vec_select:V4HF
6487 (match_operand:V8HF 2 "register_operand" "w")
6488 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6489 (float_extend:V4SF
6490 (vec_duplicate:V4HF
6491 (vec_select:HF
6492 (match_operand:V8HF 3 "register_operand" "x")
6493 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6494 (match_operand:V4SF 1 "register_operand" "0")))]
6495 "TARGET_F16FML"
6496 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6497 [(set_attr "type" "neon_fp_mul_s")]
6498 )
6499
6500 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6501 [(set (match_operand:V4SF 0 "register_operand" "=w")
6502 (fma:V4SF
6503 (float_extend:V4SF
6504 (vec_select:V4HF
6505 (match_operand:V8HF 2 "register_operand" "w")
6506 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6507 (float_extend:V4SF
6508 (vec_duplicate:V4HF
6509 (vec_select:HF
6510 (match_operand:V8HF 3 "register_operand" "x")
6511 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6512 (match_operand:V4SF 1 "register_operand" "0")))]
6513 "TARGET_F16FML"
6514 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6515 [(set_attr "type" "neon_fp_mul_s")]
6516 )
6517
6518 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6519 [(set (match_operand:V4SF 0 "register_operand" "=w")
6520 (fma:V4SF
6521 (float_extend:V4SF
6522 (neg:V4HF
6523 (vec_select:V4HF
6524 (match_operand:V8HF 2 "register_operand" "w")
6525 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6526 (float_extend:V4SF
6527 (vec_duplicate:V4HF
6528 (vec_select:HF
6529 (match_operand:V8HF 3 "register_operand" "x")
6530 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6531 (match_operand:V4SF 1 "register_operand" "0")))]
6532 "TARGET_F16FML"
6533 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6534 [(set_attr "type" "neon_fp_mul_s")]
6535 )
6536
6537 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6538 [(set (match_operand:V2SF 0 "register_operand" "")
6539 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6540 (match_operand:V4HF 2 "register_operand" "")
6541 (match_operand:V8HF 3 "register_operand" "")
6542 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6543 VFMLA16_LOW))]
6544 "TARGET_F16FML"
6545 {
6546 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6547 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6548
6549 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6550 operands[1],
6551 operands[2],
6552 operands[3],
6553 p1, lane));
6554 DONE;
6555
6556 })
6557
6558 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6559 [(set (match_operand:V2SF 0 "register_operand" "")
6560 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6561 (match_operand:V4HF 2 "register_operand" "")
6562 (match_operand:V8HF 3 "register_operand" "")
6563 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6564 VFMLA16_HIGH))]
6565 "TARGET_F16FML"
6566 {
6567 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6568 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6569
6570 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6571 operands[1],
6572 operands[2],
6573 operands[3],
6574 p1, lane));
6575 DONE;
6576
6577 })
6578
6579 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6580 [(set (match_operand:V2SF 0 "register_operand" "=w")
6581 (fma:V2SF
6582 (float_extend:V2SF
6583 (vec_select:V2HF
6584 (match_operand:V4HF 2 "register_operand" "w")
6585 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6586 (float_extend:V2SF
6587 (vec_duplicate:V2HF
6588 (vec_select:HF
6589 (match_operand:V8HF 3 "register_operand" "x")
6590 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6591 (match_operand:V2SF 1 "register_operand" "0")))]
6592 "TARGET_F16FML"
6593 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6594 [(set_attr "type" "neon_fp_mul_s")]
6595 )
6596
6597 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6598 [(set (match_operand:V2SF 0 "register_operand" "=w")
6599 (fma:V2SF
6600 (float_extend:V2SF
6601 (neg:V2HF
6602 (vec_select:V2HF
6603 (match_operand:V4HF 2 "register_operand" "w")
6604 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6605 (float_extend:V2SF
6606 (vec_duplicate:V2HF
6607 (vec_select:HF
6608 (match_operand:V8HF 3 "register_operand" "x")
6609 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6610 (match_operand:V2SF 1 "register_operand" "0")))]
6611 "TARGET_F16FML"
6612 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6613 [(set_attr "type" "neon_fp_mul_s")]
6614 )
6615
6616 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6617 [(set (match_operand:V2SF 0 "register_operand" "=w")
6618 (fma:V2SF
6619 (float_extend:V2SF
6620 (vec_select:V2HF
6621 (match_operand:V4HF 2 "register_operand" "w")
6622 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6623 (float_extend:V2SF
6624 (vec_duplicate:V2HF
6625 (vec_select:HF
6626 (match_operand:V8HF 3 "register_operand" "x")
6627 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6628 (match_operand:V2SF 1 "register_operand" "0")))]
6629 "TARGET_F16FML"
6630 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6631 [(set_attr "type" "neon_fp_mul_s")]
6632 )
6633
6634 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6635 [(set (match_operand:V2SF 0 "register_operand" "=w")
6636 (fma:V2SF
6637 (float_extend:V2SF
6638 (neg:V2HF
6639 (vec_select:V2HF
6640 (match_operand:V4HF 2 "register_operand" "w")
6641 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6642 (float_extend:V2SF
6643 (vec_duplicate:V2HF
6644 (vec_select:HF
6645 (match_operand:V8HF 3 "register_operand" "x")
6646 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6647 (match_operand:V2SF 1 "register_operand" "0")))]
6648 "TARGET_F16FML"
6649 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6650 [(set_attr "type" "neon_fp_mul_s")]
6651 )
6652
6653 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6654 [(set (match_operand:V4SF 0 "register_operand" "")
6655 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6656 (match_operand:V8HF 2 "register_operand" "")
6657 (match_operand:V4HF 3 "register_operand" "")
6658 (match_operand:SI 4 "aarch64_imm2" "")]
6659 VFMLA16_LOW))]
6660 "TARGET_F16FML"
6661 {
6662 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6663 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6664
6665 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6666 operands[1],
6667 operands[2],
6668 operands[3],
6669 p1, lane));
6670 DONE;
6671 })
6672
6673 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6674 [(set (match_operand:V4SF 0 "register_operand" "")
6675 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6676 (match_operand:V8HF 2 "register_operand" "")
6677 (match_operand:V4HF 3 "register_operand" "")
6678 (match_operand:SI 4 "aarch64_imm2" "")]
6679 VFMLA16_HIGH))]
6680 "TARGET_F16FML"
6681 {
6682 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6683 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6684
6685 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6686 operands[1],
6687 operands[2],
6688 operands[3],
6689 p1, lane));
6690 DONE;
6691 })
6692
6693 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6694 [(set (match_operand:V4SF 0 "register_operand" "=w")
6695 (fma:V4SF
6696 (float_extend:V4SF
6697 (vec_select:V4HF
6698 (match_operand:V8HF 2 "register_operand" "w")
6699 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6700 (float_extend:V4SF
6701 (vec_duplicate:V4HF
6702 (vec_select:HF
6703 (match_operand:V4HF 3 "register_operand" "x")
6704 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6705 (match_operand:V4SF 1 "register_operand" "0")))]
6706 "TARGET_F16FML"
6707 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6708 [(set_attr "type" "neon_fp_mul_s")]
6709 )
6710
6711 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6712 [(set (match_operand:V4SF 0 "register_operand" "=w")
6713 (fma:V4SF
6714 (float_extend:V4SF
6715 (neg:V4HF
6716 (vec_select:V4HF
6717 (match_operand:V8HF 2 "register_operand" "w")
6718 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6719 (float_extend:V4SF
6720 (vec_duplicate:V4HF
6721 (vec_select:HF
6722 (match_operand:V4HF 3 "register_operand" "x")
6723 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6724 (match_operand:V4SF 1 "register_operand" "0")))]
6725 "TARGET_F16FML"
6726 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6727 [(set_attr "type" "neon_fp_mul_s")]
6728 )
6729
6730 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6731 [(set (match_operand:V4SF 0 "register_operand" "=w")
6732 (fma:V4SF
6733 (float_extend:V4SF
6734 (vec_select:V4HF
6735 (match_operand:V8HF 2 "register_operand" "w")
6736 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6737 (float_extend:V4SF
6738 (vec_duplicate:V4HF
6739 (vec_select:HF
6740 (match_operand:V4HF 3 "register_operand" "x")
6741 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6742 (match_operand:V4SF 1 "register_operand" "0")))]
6743 "TARGET_F16FML"
6744 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6745 [(set_attr "type" "neon_fp_mul_s")]
6746 )
6747
6748 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6749 [(set (match_operand:V4SF 0 "register_operand" "=w")
6750 (fma:V4SF
6751 (float_extend:V4SF
6752 (neg:V4HF
6753 (vec_select:V4HF
6754 (match_operand:V8HF 2 "register_operand" "w")
6755 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6756 (float_extend:V4SF
6757 (vec_duplicate:V4HF
6758 (vec_select:HF
6759 (match_operand:V4HF 3 "register_operand" "x")
6760 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6761 (match_operand:V4SF 1 "register_operand" "0")))]
6762 "TARGET_F16FML"
6763 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6764 [(set_attr "type" "neon_fp_mul_s")]
6765 )
6766
6767 ;; pmull
6768
6769 (define_insn "aarch64_crypto_pmulldi"
6770 [(set (match_operand:TI 0 "register_operand" "=w")
6771 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6772 (match_operand:DI 2 "register_operand" "w")]
6773 UNSPEC_PMULL))]
6774 "TARGET_SIMD && TARGET_AES"
6775 "pmull\\t%0.1q, %1.1d, %2.1d"
6776 [(set_attr "type" "crypto_pmull")]
6777 )
6778
6779 (define_insn "aarch64_crypto_pmullv2di"
6780 [(set (match_operand:TI 0 "register_operand" "=w")
6781 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6782 (match_operand:V2DI 2 "register_operand" "w")]
6783 UNSPEC_PMULL2))]
6784 "TARGET_SIMD && TARGET_AES"
6785 "pmull2\\t%0.1q, %1.2d, %2.2d"
6786 [(set_attr "type" "crypto_pmull")]
6787 )
This page took 0.35775 seconds and 5 git commands to generate.