gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umq,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<mode>"
 181   [(set (match_operand:VD 0 "register_operand" "=w")
 182         (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:VD 2 "register_operand" "=w")
 184         (match_operand:VD 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "store_pair<mode>"
 195   [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:VD 1 "register_operand" "w"))
 197    (set (match_operand:VD 2 "memory_operand" "=m")
 198         (match_operand:VD 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_split
 209   [(set (match_operand:VQ 0 "register_operand" "")
 210       (match_operand:VQ 1 "register_operand" ""))]
 211   "TARGET_SIMD && reload_completed
 212    && GP_REGNUM_P (REGNO (operands[0]))
 213    && GP_REGNUM_P (REGNO (operands[1]))"
 214   [(const_int 0)]
 215 {
 216   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 217   DONE;
 218 })
 219
 220 (define_split
 221   [(set (match_operand:VQ 0 "register_operand" "")
 222         (match_operand:VQ 1 "register_operand" ""))]
 223   "TARGET_SIMD && reload_completed
 224    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 225        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 226   [(const_int 0)]
 227 {
 228   aarch64_split_simd_move (operands[0], operands[1]);
 229   DONE;
 230 })
 231
 232 (define_expand "aarch64_split_simd_mov<mode>"
 233   [(set (match_operand:VQ 0)
 234         (match_operand:VQ 1))]
 235   "TARGET_SIMD"
 236   {
 237     rtx dst = operands[0];
 238     rtx src = operands[1];
 239
 240     if (GP_REGNUM_P (REGNO (src)))
 241       {
 242         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 243         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 244
 245         emit_insn
 246           (gen_move_lo_quad_<mode> (dst, src_low_part));
 247         emit_insn
 248           (gen_move_hi_quad_<mode> (dst, src_high_part));
 249       }
 250
 251     else
 252       {
 253         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 254         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 255         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 256         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 257
 258         emit_insn
 259           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 260         emit_insn
 261           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 262       }
 263     DONE;
 264   }
 265 )
 266
 267 (define_insn "aarch64_simd_mov_from_<mode>low"
 268   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 269         (vec_select:<VHALF>
 270           (match_operand:VQ 1 "register_operand" "w")
 271           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 272   "TARGET_SIMD && reload_completed"
 273   "umov\t%0, %1.d[0]"
 274   [(set_attr "type" "neon_to_gp<q>")
 275    (set_attr "length" "4")
 276   ])
 277
 278 (define_insn "aarch64_simd_mov_from_<mode>high"
 279   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 280         (vec_select:<VHALF>
 281           (match_operand:VQ 1 "register_operand" "w")
 282           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 283   "TARGET_SIMD && reload_completed"
 284   "umov\t%0, %1.d[1]"
 285   [(set_attr "type" "neon_to_gp<q>")
 286    (set_attr "length" "4")
 287   ])
 288
 289 (define_insn "orn<mode>3"
 290  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 291        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 292                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 293  "TARGET_SIMD"
 294  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 295   [(set_attr "type" "neon_logic<q>")]
 296 )
 297
 298 (define_insn "bic<mode>3"
 299  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 300        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 301                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 302  "TARGET_SIMD"
 303  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 304   [(set_attr "type" "neon_logic<q>")]
 305 )
 306
 307 (define_insn "add<mode>3"
 308   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 309         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 310                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 311   "TARGET_SIMD"
 312   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 313   [(set_attr "type" "neon_add<q>")]
 314 )
 315
 316 (define_insn "sub<mode>3"
 317   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 318         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 319                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 320   "TARGET_SIMD"
 321   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 322   [(set_attr "type" "neon_sub<q>")]
 323 )
 324
 325 (define_insn "mul<mode>3"
 326   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 327         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 328                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 329   "TARGET_SIMD"
 330   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 331   [(set_attr "type" "neon_mul_<Vetype><q>")]
 332 )
 333
 334 (define_insn "bswap<mode>2"
 335   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 336         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 337   "TARGET_SIMD"
 338   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 339   [(set_attr "type" "neon_rev<q>")]
 340 )
 341
 342 (define_insn "aarch64_rbit<mode>"
 343   [(set (match_operand:VB 0 "register_operand" "=w")
 344         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 345                    UNSPEC_RBIT))]
 346   "TARGET_SIMD"
 347   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 348   [(set_attr "type" "neon_rbit")]
 349 )
 350
 351 (define_expand "ctz<mode>2"
 352   [(set (match_operand:VS 0 "register_operand")
 353         (ctz:VS (match_operand:VS 1 "register_operand")))]
 354   "TARGET_SIMD"
 355   {
 356      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 357      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 358                                              <MODE>mode, 0);
 359      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 360      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 361      DONE;
 362   }
 363 )
 364
 365 (define_expand "xorsign<mode>3"
 366   [(match_operand:VHSDF 0 "register_operand")
 367    (match_operand:VHSDF 1 "register_operand")
 368    (match_operand:VHSDF 2 "register_operand")]
 369   "TARGET_SIMD"
 370 {
 371
 372   machine_mode imode = <V_INT_EQUIV>mode;
 373   rtx v_bitmask = gen_reg_rtx (imode);
 374   rtx op1x = gen_reg_rtx (imode);
 375   rtx op2x = gen_reg_rtx (imode);
 376
 377   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 378   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 379
 380   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 381
 382   emit_move_insn (v_bitmask,
 383                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 384                                                      HOST_WIDE_INT_M1U << bits));
 385
 386   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 387   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 388   emit_move_insn (operands[0],
 389                   lowpart_subreg (<MODE>mode, op1x, imode));
 390   DONE;
 391 }
 392 )
 393
 394 ;; These instructions map to the __builtins for the Dot Product operations.
 395 (define_insn "aarch64_<sur>dot<vsi2qi>"
 396   [(set (match_operand:VS 0 "register_operand" "=w")
 397         (plus:VS (match_operand:VS 1 "register_operand" "0")
 398                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 399                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 400                 DOTPROD)))]
 401   "TARGET_DOTPROD"
 402   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 403   [(set_attr "type" "neon_dot")]
 404 )
 405
 406 ;; These expands map to the Dot Product optab the vectorizer checks for.
 407 ;; The auto-vectorizer expects a dot product builtin that also does an
 408 ;; accumulation into the provided register.
 409 ;; Given the following pattern
 410 ;;
 411 ;; for (i=0; i<len; i++) {
 412 ;;     c = a[i] * b[i];
 413 ;;     r += c;
 414 ;; }
 415 ;; return result;
 416 ;;
 417 ;; This can be auto-vectorized to
 418 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 419 ;;
 420 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 421 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 422 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 423 ;; ...
 424 ;;
 425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 426 (define_expand "<sur>dot_prod<vsi2qi>"
 427   [(set (match_operand:VS 0 "register_operand")
 428         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 429                             (match_operand:<VSI2QI> 2 "register_operand")]
 430                  DOTPROD)
 431                 (match_operand:VS 3 "register_operand")))]
 432   "TARGET_DOTPROD"
 433 {
 434   emit_insn (
 435     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 436                                     operands[2]));
 437   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 438   DONE;
 439 })
 440
 441 ;; These instructions map to the __builtins for the Dot Product
 442 ;; indexed operations.
 443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 444   [(set (match_operand:VS 0 "register_operand" "=w")
 445         (plus:VS (match_operand:VS 1 "register_operand" "0")
 446                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 447                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 448                             (match_operand:SI 4 "immediate_operand" "i")]
 449                 DOTPROD)))]
 450   "TARGET_DOTPROD"
 451   {
 452     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 453     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 454   }
 455   [(set_attr "type" "neon_dot")]
 456 )
 457
 458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 459   [(set (match_operand:VS 0 "register_operand" "=w")
 460         (plus:VS (match_operand:VS 1 "register_operand" "0")
 461                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 462                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 463                             (match_operand:SI 4 "immediate_operand" "i")]
 464                 DOTPROD)))]
 465   "TARGET_DOTPROD"
 466   {
 467     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 468     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 469   }
 470   [(set_attr "type" "neon_dot")]
 471 )
 472
 473 (define_expand "copysign<mode>3"
 474   [(match_operand:VHSDF 0 "register_operand")
 475    (match_operand:VHSDF 1 "register_operand")
 476    (match_operand:VHSDF 2 "register_operand")]
 477   "TARGET_FLOAT && TARGET_SIMD"
 478 {
 479   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 480   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 481
 482   emit_move_insn (v_bitmask,
 483                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 484                                                      HOST_WIDE_INT_M1U << bits));
 485   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 486                                          operands[2], operands[1]));
 487   DONE;
 488 }
 489 )
 490
 491 (define_insn "*aarch64_mul3_elt<mode>"
 492  [(set (match_operand:VMUL 0 "register_operand" "=w")
 493     (mult:VMUL
 494       (vec_duplicate:VMUL
 495           (vec_select:<VEL>
 496             (match_operand:VMUL 1 "register_operand" "<h_con>")
 497             (parallel [(match_operand:SI 2 "immediate_operand")])))
 498       (match_operand:VMUL 3 "register_operand" "w")))]
 499   "TARGET_SIMD"
 500   {
 501     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 502     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 503   }
 504   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 505 )
 506
 507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 508   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 509      (mult:VMUL_CHANGE_NLANES
 510        (vec_duplicate:VMUL_CHANGE_NLANES
 511           (vec_select:<VEL>
 512             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 513             (parallel [(match_operand:SI 2 "immediate_operand")])))
 514       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 515   "TARGET_SIMD"
 516   {
 517     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 518     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 519   }
 520   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 521 )
 522
 523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 524  [(set (match_operand:VMUL 0 "register_operand" "=w")
 525     (mult:VMUL
 526       (vec_duplicate:VMUL
 527             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 528       (match_operand:VMUL 2 "register_operand" "w")))]
 529   "TARGET_SIMD"
 530   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 531   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 532 )
 533
 534 (define_insn "aarch64_rsqrte<mode>"
 535   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 536         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 537                      UNSPEC_RSQRTE))]
 538   "TARGET_SIMD"
 539   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 540   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 541
 542 (define_insn "aarch64_rsqrts<mode>"
 543   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 544         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 545                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 546          UNSPEC_RSQRTS))]
 547   "TARGET_SIMD"
 548   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 549   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 550
 551 (define_expand "rsqrt<mode>2"
 552   [(set (match_operand:VALLF 0 "register_operand" "=w")
 553         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 554                      UNSPEC_RSQRT))]
 555   "TARGET_SIMD"
 556 {
 557   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 558   DONE;
 559 })
 560
 561 (define_insn "*aarch64_mul3_elt_to_64v2df"
 562   [(set (match_operand:DF 0 "register_operand" "=w")
 563      (mult:DF
 564        (vec_select:DF
 565          (match_operand:V2DF 1 "register_operand" "w")
 566          (parallel [(match_operand:SI 2 "immediate_operand")]))
 567        (match_operand:DF 3 "register_operand" "w")))]
 568   "TARGET_SIMD"
 569   {
 570     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 571     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 572   }
 573   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 574 )
 575
 576 (define_insn "neg<mode>2"
 577   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 578         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 579   "TARGET_SIMD"
 580   "neg\t%0.<Vtype>, %1.<Vtype>"
 581   [(set_attr "type" "neon_neg<q>")]
 582 )
 583
 584 (define_insn "abs<mode>2"
 585   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 586         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 587   "TARGET_SIMD"
 588   "abs\t%0.<Vtype>, %1.<Vtype>"
 589   [(set_attr "type" "neon_abs<q>")]
 590 )
 591
 592 ;; The intrinsic version of integer ABS must not be allowed to
 593 ;; combine with any operation with an integerated ABS step, such
 594 ;; as SABD.
 595 (define_insn "aarch64_abs<mode>"
 596   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 597           (unspec:VSDQ_I_DI
 598             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 599            UNSPEC_ABS))]
 600   "TARGET_SIMD"
 601   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 602   [(set_attr "type" "neon_abs<q>")]
 603 )
 604
 605 (define_insn "abd<mode>_3"
 606   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 607         (abs:VDQ_BHSI (minus:VDQ_BHSI
 608                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 609                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 610   "TARGET_SIMD"
 611   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 612   [(set_attr "type" "neon_abd<q>")]
 613 )
 614
 615 (define_insn "aba<mode>_3"
 616   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 617         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 618                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 619                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 620                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 621   "TARGET_SIMD"
 622   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 623   [(set_attr "type" "neon_arith_acc<q>")]
 624 )
 625
 626 (define_insn "fabd<mode>3"
 627   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 628         (abs:VHSDF_HSDF
 629           (minus:VHSDF_HSDF
 630             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 631             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 632   "TARGET_SIMD"
 633   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 634   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 635 )
 636
 637 ;; For AND (vector, register) and BIC (vector, immediate)
 638 (define_insn "and<mode>3"
 639   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 640         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 641                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 642   "TARGET_SIMD"
 643   {
 644     switch (which_alternative)
 645       {
 646       case 0:
 647         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 648       case 1:
 649         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 650                                                   AARCH64_CHECK_BIC);
 651       default:
 652         gcc_unreachable ();
 653       }
 654   }
 655   [(set_attr "type" "neon_logic<q>")]
 656 )
 657
 658 ;; For ORR (vector, register) and ORR (vector, immediate)
 659 (define_insn "ior<mode>3"
 660   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 661         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 662                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 663   "TARGET_SIMD"
 664   {
 665     switch (which_alternative)
 666       {
 667       case 0:
 668         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 669       case 1:
 670         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 671                                                   AARCH64_CHECK_ORR);
 672       default:
 673         gcc_unreachable ();
 674       }
 675   }
 676   [(set_attr "type" "neon_logic<q>")]
 677 )
 678
 679 (define_insn "xor<mode>3"
 680   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 681         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 682                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 683   "TARGET_SIMD"
 684   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 685   [(set_attr "type" "neon_logic<q>")]
 686 )
 687
 688 (define_insn "one_cmpl<mode>2"
 689   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 690         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 691   "TARGET_SIMD"
 692   "not\t%0.<Vbtype>, %1.<Vbtype>"
 693   [(set_attr "type" "neon_logic<q>")]
 694 )
 695
 696 (define_insn "aarch64_simd_vec_set<mode>"
 697   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
 698         (vec_merge:VDQ_BHSI
 699             (vec_duplicate:VDQ_BHSI
 700                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
 701             (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
 702             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 703   "TARGET_SIMD"
 704   {
 705    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 706    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 707    switch (which_alternative)
 708      {
 709      case 0:
 710         return "ins\\t%0.<Vetype>[%p2], %w1";
 711      case 1:
 712         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 713      case 2:
 714         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 715      default:
 716         gcc_unreachable ();
 717      }
 718   }
 719   [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
 720 )
 721
 722 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 723   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 724         (vec_merge:VALL_F16
 725             (vec_duplicate:VALL_F16
 726               (vec_select:<VEL>
 727                 (match_operand:VALL_F16 3 "register_operand" "w")
 728                 (parallel
 729                   [(match_operand:SI 4 "immediate_operand" "i")])))
 730             (match_operand:VALL_F16 1 "register_operand" "0")
 731             (match_operand:SI 2 "immediate_operand" "i")))]
 732   "TARGET_SIMD"
 733   {
 734     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 735     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 736     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 737
 738     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 739   }
 740   [(set_attr "type" "neon_ins<q>")]
 741 )
 742
 743 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 744   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 745         (vec_merge:VALL_F16_NO_V2Q
 746             (vec_duplicate:VALL_F16_NO_V2Q
 747               (vec_select:<VEL>
 748                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 749                 (parallel
 750                   [(match_operand:SI 4 "immediate_operand" "i")])))
 751             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 752             (match_operand:SI 2 "immediate_operand" "i")))]
 753   "TARGET_SIMD"
 754   {
 755     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 756     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 757     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 758                                            INTVAL (operands[4]));
 759
 760     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 761   }
 762   [(set_attr "type" "neon_ins<q>")]
 763 )
 764
 765 (define_insn "aarch64_simd_lshr<mode>"
 766  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 767        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 768                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 769  "TARGET_SIMD"
 770  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 771   [(set_attr "type" "neon_shift_imm<q>")]
 772 )
 773
 774 (define_insn "aarch64_simd_ashr<mode>"
 775  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 776        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 777                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 778  "TARGET_SIMD"
 779  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 780   [(set_attr "type" "neon_shift_imm<q>")]
 781 )
 782
 783 (define_insn "aarch64_simd_imm_shl<mode>"
 784  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 785        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 786                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 787  "TARGET_SIMD"
 788   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 789   [(set_attr "type" "neon_shift_imm<q>")]
 790 )
 791
 792 (define_insn "aarch64_simd_reg_sshl<mode>"
 793  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 794        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 795                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 796  "TARGET_SIMD"
 797  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 798   [(set_attr "type" "neon_shift_reg<q>")]
 799 )
 800
 801 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 802  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 803        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 804                     (match_operand:VDQ_I 2 "register_operand" "w")]
 805                    UNSPEC_ASHIFT_UNSIGNED))]
 806  "TARGET_SIMD"
 807  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 808   [(set_attr "type" "neon_shift_reg<q>")]
 809 )
 810
 811 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 812  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 813        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 814                     (match_operand:VDQ_I 2 "register_operand" "w")]
 815                    UNSPEC_ASHIFT_SIGNED))]
 816  "TARGET_SIMD"
 817  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 818   [(set_attr "type" "neon_shift_reg<q>")]
 819 )
 820
 821 (define_expand "ashl<mode>3"
 822   [(match_operand:VDQ_I 0 "register_operand" "")
 823    (match_operand:VDQ_I 1 "register_operand" "")
 824    (match_operand:SI  2 "general_operand" "")]
 825  "TARGET_SIMD"
 826 {
 827   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 828   int shift_amount;
 829
 830   if (CONST_INT_P (operands[2]))
 831     {
 832       shift_amount = INTVAL (operands[2]);
 833       if (shift_amount >= 0 && shift_amount < bit_width)
 834         {
 835           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 836                                                        shift_amount);
 837           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 838                                                      operands[1],
 839                                                      tmp));
 840           DONE;
 841         }
 842       else
 843         {
 844           operands[2] = force_reg (SImode, operands[2]);
 845         }
 846     }
 847   else if (MEM_P (operands[2]))
 848     {
 849       operands[2] = force_reg (SImode, operands[2]);
 850     }
 851
 852   if (REG_P (operands[2]))
 853     {
 854       rtx tmp = gen_reg_rtx (<MODE>mode);
 855       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 856                                              convert_to_mode (<VEL>mode,
 857                                                               operands[2],
 858                                                               0)));
 859       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 860                                                   tmp));
 861       DONE;
 862     }
 863   else
 864     FAIL;
 865 }
 866 )
 867
 868 (define_expand "lshr<mode>3"
 869   [(match_operand:VDQ_I 0 "register_operand" "")
 870    (match_operand:VDQ_I 1 "register_operand" "")
 871    (match_operand:SI  2 "general_operand" "")]
 872  "TARGET_SIMD"
 873 {
 874   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 875   int shift_amount;
 876
 877   if (CONST_INT_P (operands[2]))
 878     {
 879       shift_amount = INTVAL (operands[2]);
 880       if (shift_amount > 0 && shift_amount <= bit_width)
 881         {
 882           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 883                                                        shift_amount);
 884           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 885                                                   operands[1],
 886                                                   tmp));
 887           DONE;
 888         }
 889       else
 890         operands[2] = force_reg (SImode, operands[2]);
 891     }
 892   else if (MEM_P (operands[2]))
 893     {
 894       operands[2] = force_reg (SImode, operands[2]);
 895     }
 896
 897   if (REG_P (operands[2]))
 898     {
 899       rtx tmp = gen_reg_rtx (SImode);
 900       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 901       emit_insn (gen_negsi2 (tmp, operands[2]));
 902       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 903                                              convert_to_mode (<VEL>mode,
 904                                                               tmp, 0)));
 905       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 906                                                           operands[1],
 907                                                           tmp1));
 908       DONE;
 909     }
 910   else
 911     FAIL;
 912 }
 913 )
 914
 915 (define_expand "ashr<mode>3"
 916   [(match_operand:VDQ_I 0 "register_operand" "")
 917    (match_operand:VDQ_I 1 "register_operand" "")
 918    (match_operand:SI  2 "general_operand" "")]
 919  "TARGET_SIMD"
 920 {
 921   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 922   int shift_amount;
 923
 924   if (CONST_INT_P (operands[2]))
 925     {
 926       shift_amount = INTVAL (operands[2]);
 927       if (shift_amount > 0 && shift_amount <= bit_width)
 928         {
 929           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 930                                                        shift_amount);
 931           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
 932                                                   operands[1],
 933                                                   tmp));
 934           DONE;
 935         }
 936       else
 937         operands[2] = force_reg (SImode, operands[2]);
 938     }
 939   else if (MEM_P (operands[2]))
 940     {
 941       operands[2] = force_reg (SImode, operands[2]);
 942     }
 943
 944   if (REG_P (operands[2]))
 945     {
 946       rtx tmp = gen_reg_rtx (SImode);
 947       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 948       emit_insn (gen_negsi2 (tmp, operands[2]));
 949       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 950                                              convert_to_mode (<VEL>mode,
 951                                                               tmp, 0)));
 952       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
 953                                                         operands[1],
 954                                                         tmp1));
 955       DONE;
 956     }
 957   else
 958     FAIL;
 959 }
 960 )
 961
 962 (define_expand "vashl<mode>3"
 963  [(match_operand:VDQ_I 0 "register_operand" "")
 964   (match_operand:VDQ_I 1 "register_operand" "")
 965   (match_operand:VDQ_I 2 "register_operand" "")]
 966  "TARGET_SIMD"
 967 {
 968   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 969                                               operands[2]));
 970   DONE;
 971 })
 972
 973 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
 974 ;; Negating individual lanes most certainly offsets the
 975 ;; gain from vectorization.
 976 (define_expand "vashr<mode>3"
 977  [(match_operand:VDQ_BHSI 0 "register_operand" "")
 978   (match_operand:VDQ_BHSI 1 "register_operand" "")
 979   (match_operand:VDQ_BHSI 2 "register_operand" "")]
 980  "TARGET_SIMD"
 981 {
 982   rtx neg = gen_reg_rtx (<MODE>mode);
 983   emit (gen_neg<mode>2 (neg, operands[2]));
 984   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
 985                                                     neg));
 986   DONE;
 987 })
 988
 989 ;; DI vector shift
 990 (define_expand "aarch64_ashr_simddi"
 991   [(match_operand:DI 0 "register_operand" "=w")
 992    (match_operand:DI 1 "register_operand" "w")
 993    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
 994   "TARGET_SIMD"
 995   {
 996     /* An arithmetic shift right by 64 fills the result with copies of the sign
 997        bit, just like asr by 63 - however the standard pattern does not handle
 998        a shift by 64.  */
 999     if (INTVAL (operands[2]) == 64)
1000       operands[2] = GEN_INT (63);
1001     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1002     DONE;
1003   }
1004 )
1005
1006 (define_expand "vlshr<mode>3"
1007  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1008   (match_operand:VDQ_BHSI 1 "register_operand" "")
1009   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1010  "TARGET_SIMD"
1011 {
1012   rtx neg = gen_reg_rtx (<MODE>mode);
1013   emit (gen_neg<mode>2 (neg, operands[2]));
1014   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1015                                                       neg));
1016   DONE;
1017 })
1018
1019 (define_expand "aarch64_lshr_simddi"
1020   [(match_operand:DI 0 "register_operand" "=w")
1021    (match_operand:DI 1 "register_operand" "w")
1022    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1023   "TARGET_SIMD"
1024   {
1025     if (INTVAL (operands[2]) == 64)
1026       emit_move_insn (operands[0], const0_rtx);
1027     else
1028       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1029     DONE;
1030   }
1031 )
1032
1033 (define_expand "vec_set<mode>"
1034   [(match_operand:VDQ_BHSI 0 "register_operand")
1035    (match_operand:<VEL> 1 "register_operand")
1036    (match_operand:SI 2 "immediate_operand")]
1037   "TARGET_SIMD"
1038   {
1039     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1040     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1041                                             GEN_INT (elem), operands[0]));
1042     DONE;
1043   }
1044 )
1045
1046 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1047 (define_insn "vec_shr_<mode>"
1048   [(set (match_operand:VD 0 "register_operand" "=w")
1049         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1050                     (match_operand:SI 2 "immediate_operand" "i")]
1051                    UNSPEC_VEC_SHR))]
1052   "TARGET_SIMD"
1053   {
1054     if (BYTES_BIG_ENDIAN)
1055       return "shl %d0, %d1, %2";
1056     else
1057       return "ushr %d0, %d1, %2";
1058   }
1059   [(set_attr "type" "neon_shift_imm")]
1060 )
1061
1062 (define_insn "aarch64_simd_vec_setv2di"
1063   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1064         (vec_merge:V2DI
1065             (vec_duplicate:V2DI
1066                 (match_operand:DI 1 "register_operand" "r,w"))
1067             (match_operand:V2DI 3 "register_operand" "0,0")
1068             (match_operand:SI 2 "immediate_operand" "i,i")))]
1069   "TARGET_SIMD"
1070   {
1071     int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1072     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1073     switch (which_alternative)
1074       {
1075       case 0:
1076         return "ins\\t%0.d[%p2], %1";
1077       case 1:
1078         return "ins\\t%0.d[%p2], %1.d[0]";
1079       default:
1080         gcc_unreachable ();
1081       }
1082   }
1083   [(set_attr "type" "neon_from_gp, neon_ins_q")]
1084 )
1085
1086 (define_expand "vec_setv2di"
1087   [(match_operand:V2DI 0 "register_operand")
1088    (match_operand:DI 1 "register_operand")
1089    (match_operand:SI 2 "immediate_operand")]
1090   "TARGET_SIMD"
1091   {
1092     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1093     emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1094                                           GEN_INT (elem), operands[0]));
1095     DONE;
1096   }
1097 )
1098
1099 (define_insn "aarch64_simd_vec_set<mode>"
1100   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1101         (vec_merge:VDQF_F16
1102             (vec_duplicate:VDQF_F16
1103                 (match_operand:<VEL> 1 "register_operand" "w"))
1104             (match_operand:VDQF_F16 3 "register_operand" "0")
1105             (match_operand:SI 2 "immediate_operand" "i")))]
1106   "TARGET_SIMD"
1107   {
1108     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1109
1110     operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1111     return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1112   }
1113   [(set_attr "type" "neon_ins<q>")]
1114 )
1115
1116 (define_expand "vec_set<mode>"
1117   [(match_operand:VDQF_F16 0 "register_operand" "+w")
1118    (match_operand:<VEL> 1 "register_operand" "w")
1119    (match_operand:SI 2 "immediate_operand" "")]
1120   "TARGET_SIMD"
1121   {
1122     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1123     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1124                                           GEN_INT (elem), operands[0]));
1125     DONE;
1126   }
1127 )
1128
1129
1130 (define_insn "aarch64_mla<mode>"
1131  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1132        (plus:VDQ_BHSI (mult:VDQ_BHSI
1133                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1134                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1135                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1136  "TARGET_SIMD"
1137  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1138   [(set_attr "type" "neon_mla_<Vetype><q>")]
1139 )
1140
1141 (define_insn "*aarch64_mla_elt<mode>"
1142  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1143        (plus:VDQHS
1144          (mult:VDQHS
1145            (vec_duplicate:VDQHS
1146               (vec_select:<VEL>
1147                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1148                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1149            (match_operand:VDQHS 3 "register_operand" "w"))
1150          (match_operand:VDQHS 4 "register_operand" "0")))]
1151  "TARGET_SIMD"
1152   {
1153     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1154     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1155   }
1156   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1157 )
1158
1159 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1160  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1161        (plus:VDQHS
1162          (mult:VDQHS
1163            (vec_duplicate:VDQHS
1164               (vec_select:<VEL>
1165                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1166                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1167            (match_operand:VDQHS 3 "register_operand" "w"))
1168          (match_operand:VDQHS 4 "register_operand" "0")))]
1169  "TARGET_SIMD"
1170   {
1171     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1172     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1173   }
1174   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1175 )
1176
1177 (define_insn "*aarch64_mla_elt_merge<mode>"
1178   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1179         (plus:VDQHS
1180           (mult:VDQHS (vec_duplicate:VDQHS
1181                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1182                 (match_operand:VDQHS 2 "register_operand" "w"))
1183           (match_operand:VDQHS 3 "register_operand" "0")))]
1184  "TARGET_SIMD"
1185  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1186   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1187 )
1188
1189 (define_insn "aarch64_mls<mode>"
1190  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1191        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1192                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1193                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1194  "TARGET_SIMD"
1195  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1196   [(set_attr "type" "neon_mla_<Vetype><q>")]
1197 )
1198
1199 (define_insn "*aarch64_mls_elt<mode>"
1200  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201        (minus:VDQHS
1202          (match_operand:VDQHS 4 "register_operand" "0")
1203          (mult:VDQHS
1204            (vec_duplicate:VDQHS
1205               (vec_select:<VEL>
1206                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1207                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1208            (match_operand:VDQHS 3 "register_operand" "w"))))]
1209  "TARGET_SIMD"
1210   {
1211     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1212     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1213   }
1214   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1215 )
1216
1217 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1218  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1219        (minus:VDQHS
1220          (match_operand:VDQHS 4 "register_operand" "0")
1221          (mult:VDQHS
1222            (vec_duplicate:VDQHS
1223               (vec_select:<VEL>
1224                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1225                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1226            (match_operand:VDQHS 3 "register_operand" "w"))))]
1227  "TARGET_SIMD"
1228   {
1229     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1230     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1231   }
1232   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1233 )
1234
1235 (define_insn "*aarch64_mls_elt_merge<mode>"
1236   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1237         (minus:VDQHS
1238           (match_operand:VDQHS 1 "register_operand" "0")
1239           (mult:VDQHS (vec_duplicate:VDQHS
1240                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1241                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1242   "TARGET_SIMD"
1243   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1244   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1245 )
1246
1247 ;; Max/Min operations.
1248 (define_insn "<su><maxmin><mode>3"
1249  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1250        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1251                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1252  "TARGET_SIMD"
1253  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1254   [(set_attr "type" "neon_minmax<q>")]
1255 )
1256
1257 (define_expand "<su><maxmin>v2di3"
1258  [(set (match_operand:V2DI 0 "register_operand" "")
1259        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1260                     (match_operand:V2DI 2 "register_operand" "")))]
1261  "TARGET_SIMD"
1262 {
1263   enum rtx_code cmp_operator;
1264   rtx cmp_fmt;
1265
1266   switch (<CODE>)
1267     {
1268     case UMIN:
1269       cmp_operator = LTU;
1270       break;
1271     case SMIN:
1272       cmp_operator = LT;
1273       break;
1274     case UMAX:
1275       cmp_operator = GTU;
1276       break;
1277     case SMAX:
1278       cmp_operator = GT;
1279       break;
1280     default:
1281       gcc_unreachable ();
1282     }
1283
1284   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1285   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1286               operands[2], cmp_fmt, operands[1], operands[2]));
1287   DONE;
1288 })
1289
1290 ;; Pairwise Integer Max/Min operations.
1291 (define_insn "aarch64_<maxmin_uns>p<mode>"
1292  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1293        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1294                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1295                         MAXMINV))]
1296  "TARGET_SIMD"
1297  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1298   [(set_attr "type" "neon_minmax<q>")]
1299 )
1300
1301 ;; Pairwise FP Max/Min operations.
1302 (define_insn "aarch64_<maxmin_uns>p<mode>"
1303  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1304        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1305                       (match_operand:VHSDF 2 "register_operand" "w")]
1306                       FMAXMINV))]
1307  "TARGET_SIMD"
1308  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1309   [(set_attr "type" "neon_minmax<q>")]
1310 )
1311
1312 ;; vec_concat gives a new vector with the low elements from operand 1, and
1313 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1314 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1315 ;; What that means, is that the RTL descriptions of the below patterns
1316 ;; need to change depending on endianness.
1317
1318 ;; Move to the low architectural bits of the register.
1319 ;; On little-endian this is { operand, zeroes }
1320 ;; On big-endian this is { zeroes, operand }
1321
1322 (define_insn "move_lo_quad_internal_<mode>"
1323   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1324         (vec_concat:VQ_NO2E
1325           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1326           (vec_duplicate:<VHALF> (const_int 0))))]
1327   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1328   "@
1329    dup\\t%d0, %1.d[0]
1330    fmov\\t%d0, %1
1331    dup\\t%d0, %1"
1332   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1333    (set_attr "simd" "yes,*,yes")
1334    (set_attr "fp" "*,yes,*")
1335    (set_attr "length" "4")]
1336 )
1337
1338 (define_insn "move_lo_quad_internal_<mode>"
1339   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1340         (vec_concat:VQ_2E
1341           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1342           (const_int 0)))]
1343   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1344   "@
1345    dup\\t%d0, %1.d[0]
1346    fmov\\t%d0, %1
1347    dup\\t%d0, %1"
1348   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1349    (set_attr "simd" "yes,*,yes")
1350    (set_attr "fp" "*,yes,*")
1351    (set_attr "length" "4")]
1352 )
1353
1354 (define_insn "move_lo_quad_internal_be_<mode>"
1355   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1356         (vec_concat:VQ_NO2E
1357           (vec_duplicate:<VHALF> (const_int 0))
1358           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1359   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1360   "@
1361    dup\\t%d0, %1.d[0]
1362    fmov\\t%d0, %1
1363    dup\\t%d0, %1"
1364   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1365    (set_attr "simd" "yes,*,yes")
1366    (set_attr "fp" "*,yes,*")
1367    (set_attr "length" "4")]
1368 )
1369
1370 (define_insn "move_lo_quad_internal_be_<mode>"
1371   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1372         (vec_concat:VQ_2E
1373           (const_int 0)
1374           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1375   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1376   "@
1377    dup\\t%d0, %1.d[0]
1378    fmov\\t%d0, %1
1379    dup\\t%d0, %1"
1380   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1381    (set_attr "simd" "yes,*,yes")
1382    (set_attr "fp" "*,yes,*")
1383    (set_attr "length" "4")]
1384 )
1385
1386 (define_expand "move_lo_quad_<mode>"
1387   [(match_operand:VQ 0 "register_operand")
1388    (match_operand:VQ 1 "register_operand")]
1389   "TARGET_SIMD"
1390 {
1391   if (BYTES_BIG_ENDIAN)
1392     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1393   else
1394     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1395   DONE;
1396 }
1397 )
1398
1399 ;; Move operand1 to the high architectural bits of the register, keeping
1400 ;; the low architectural bits of operand2.
1401 ;; For little-endian this is { operand2, operand1 }
1402 ;; For big-endian this is { operand1, operand2 }
1403
1404 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1405   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1406         (vec_concat:VQ
1407           (vec_select:<VHALF>
1408                 (match_dup 0)
1409                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1410           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1411   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1412   "@
1413    ins\\t%0.d[1], %1.d[0]
1414    ins\\t%0.d[1], %1"
1415   [(set_attr "type" "neon_ins")]
1416 )
1417
1418 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1419   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1420         (vec_concat:VQ
1421           (match_operand:<VHALF> 1 "register_operand" "w,r")
1422           (vec_select:<VHALF>
1423                 (match_dup 0)
1424                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1425   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1426   "@
1427    ins\\t%0.d[1], %1.d[0]
1428    ins\\t%0.d[1], %1"
1429   [(set_attr "type" "neon_ins")]
1430 )
1431
1432 (define_expand "move_hi_quad_<mode>"
1433  [(match_operand:VQ 0 "register_operand" "")
1434   (match_operand:<VHALF> 1 "register_operand" "")]
1435  "TARGET_SIMD"
1436 {
1437   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1438   if (BYTES_BIG_ENDIAN)
1439     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1440                     operands[1], p));
1441   else
1442     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1443                     operands[1], p));
1444   DONE;
1445 })
1446
1447 ;; Narrowing operations.
1448
1449 ;; For doubles.
1450 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1451  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1452        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1453  "TARGET_SIMD"
1454  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1455   [(set_attr "type" "neon_shift_imm_narrow_q")]
1456 )
1457
1458 (define_expand "vec_pack_trunc_<mode>"
1459  [(match_operand:<VNARROWD> 0 "register_operand" "")
1460   (match_operand:VDN 1 "register_operand" "")
1461   (match_operand:VDN 2 "register_operand" "")]
1462  "TARGET_SIMD"
1463 {
1464   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1465   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1466   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1467
1468   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1469   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1470   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1471   DONE;
1472 })
1473
1474 ;; For quads.
1475
1476 (define_insn "vec_pack_trunc_<mode>"
1477  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1478        (vec_concat:<VNARROWQ2>
1479          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1480          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1481  "TARGET_SIMD"
1482  {
1483    if (BYTES_BIG_ENDIAN)
1484      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1485    else
1486      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1487  }
1488   [(set_attr "type" "multiple")
1489    (set_attr "length" "8")]
1490 )
1491
1492 ;; Widening operations.
1493
1494 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1495   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1496         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1497                                (match_operand:VQW 1 "register_operand" "w")
1498                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1499                             )))]
1500   "TARGET_SIMD"
1501   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1502   [(set_attr "type" "neon_shift_imm_long")]
1503 )
1504
1505 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1506   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1507         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1508                                (match_operand:VQW 1 "register_operand" "w")
1509                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1510                             )))]
1511   "TARGET_SIMD"
1512   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1513   [(set_attr "type" "neon_shift_imm_long")]
1514 )
1515
1516 (define_expand "vec_unpack<su>_hi_<mode>"
1517   [(match_operand:<VWIDE> 0 "register_operand" "")
1518    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1519   "TARGET_SIMD"
1520   {
1521     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1522     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1523                                                           operands[1], p));
1524     DONE;
1525   }
1526 )
1527
1528 (define_expand "vec_unpack<su>_lo_<mode>"
1529   [(match_operand:<VWIDE> 0 "register_operand" "")
1530    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1531   "TARGET_SIMD"
1532   {
1533     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1534     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1535                                                           operands[1], p));
1536     DONE;
1537   }
1538 )
1539
1540 ;; Widening arithmetic.
1541
1542 (define_insn "*aarch64_<su>mlal_lo<mode>"
1543   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1544         (plus:<VWIDE>
1545           (mult:<VWIDE>
1546               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1547                  (match_operand:VQW 2 "register_operand" "w")
1548                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1549               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1550                  (match_operand:VQW 4 "register_operand" "w")
1551                  (match_dup 3))))
1552           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1553   "TARGET_SIMD"
1554   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1555   [(set_attr "type" "neon_mla_<Vetype>_long")]
1556 )
1557
1558 (define_insn "*aarch64_<su>mlal_hi<mode>"
1559   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1560         (plus:<VWIDE>
1561           (mult:<VWIDE>
1562               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1563                  (match_operand:VQW 2 "register_operand" "w")
1564                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1565               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1566                  (match_operand:VQW 4 "register_operand" "w")
1567                  (match_dup 3))))
1568           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1569   "TARGET_SIMD"
1570   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1571   [(set_attr "type" "neon_mla_<Vetype>_long")]
1572 )
1573
1574 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1575   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1576         (minus:<VWIDE>
1577           (match_operand:<VWIDE> 1 "register_operand" "0")
1578           (mult:<VWIDE>
1579               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1580                  (match_operand:VQW 2 "register_operand" "w")
1581                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1582               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1583                  (match_operand:VQW 4 "register_operand" "w")
1584                  (match_dup 3))))))]
1585   "TARGET_SIMD"
1586   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1587   [(set_attr "type" "neon_mla_<Vetype>_long")]
1588 )
1589
1590 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1591   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1592         (minus:<VWIDE>
1593           (match_operand:<VWIDE> 1 "register_operand" "0")
1594           (mult:<VWIDE>
1595               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1596                  (match_operand:VQW 2 "register_operand" "w")
1597                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1598               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599                  (match_operand:VQW 4 "register_operand" "w")
1600                  (match_dup 3))))))]
1601   "TARGET_SIMD"
1602   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1603   [(set_attr "type" "neon_mla_<Vetype>_long")]
1604 )
1605
1606 (define_insn "*aarch64_<su>mlal<mode>"
1607   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1608         (plus:<VWIDE>
1609           (mult:<VWIDE>
1610             (ANY_EXTEND:<VWIDE>
1611               (match_operand:VD_BHSI 1 "register_operand" "w"))
1612             (ANY_EXTEND:<VWIDE>
1613               (match_operand:VD_BHSI 2 "register_operand" "w")))
1614           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1615   "TARGET_SIMD"
1616   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1617   [(set_attr "type" "neon_mla_<Vetype>_long")]
1618 )
1619
1620 (define_insn "*aarch64_<su>mlsl<mode>"
1621   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1622         (minus:<VWIDE>
1623           (match_operand:<VWIDE> 1 "register_operand" "0")
1624           (mult:<VWIDE>
1625             (ANY_EXTEND:<VWIDE>
1626               (match_operand:VD_BHSI 2 "register_operand" "w"))
1627             (ANY_EXTEND:<VWIDE>
1628               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1629   "TARGET_SIMD"
1630   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1631   [(set_attr "type" "neon_mla_<Vetype>_long")]
1632 )
1633
1634 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1635  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1636        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1637                            (match_operand:VQW 1 "register_operand" "w")
1638                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1639                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1640                            (match_operand:VQW 2 "register_operand" "w")
1641                            (match_dup 3)))))]
1642   "TARGET_SIMD"
1643   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1644   [(set_attr "type" "neon_mul_<Vetype>_long")]
1645 )
1646
1647 (define_expand "vec_widen_<su>mult_lo_<mode>"
1648   [(match_operand:<VWIDE> 0 "register_operand" "")
1649    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1650    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1651  "TARGET_SIMD"
1652  {
1653    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1654    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1655                                                        operands[1],
1656                                                        operands[2], p));
1657    DONE;
1658  }
1659 )
1660
1661 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1662  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1663       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1664                             (match_operand:VQW 1 "register_operand" "w")
1665                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1666                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1667                             (match_operand:VQW 2 "register_operand" "w")
1668                             (match_dup 3)))))]
1669   "TARGET_SIMD"
1670   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1671   [(set_attr "type" "neon_mul_<Vetype>_long")]
1672 )
1673
1674 (define_expand "vec_widen_<su>mult_hi_<mode>"
1675   [(match_operand:<VWIDE> 0 "register_operand" "")
1676    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1677    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1678  "TARGET_SIMD"
1679  {
1680    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1681    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1682                                                        operands[1],
1683                                                        operands[2], p));
1684    DONE;
1685
1686  }
1687 )
1688
1689 ;; FP vector operations.
1690 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1691 ;; double-precision (64-bit) floating-point data types and arithmetic as
1692 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1693 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1694 ;;
1695 ;; Floating-point operations can raise an exception.  Vectorizing such
1696 ;; operations are safe because of reasons explained below.
1697 ;;
1698 ;; ARMv8 permits an extension to enable trapped floating-point
1699 ;; exception handling, however this is an optional feature.  In the
1700 ;; event of a floating-point exception being raised by vectorised
1701 ;; code then:
1702 ;; 1.  If trapped floating-point exceptions are available, then a trap
1703 ;;     will be taken when any lane raises an enabled exception.  A trap
1704 ;;     handler may determine which lane raised the exception.
1705 ;; 2.  Alternatively a sticky exception flag is set in the
1706 ;;     floating-point status register (FPSR).  Software may explicitly
1707 ;;     test the exception flags, in which case the tests will either
1708 ;;     prevent vectorisation, allowing precise identification of the
1709 ;;     failing operation, or if tested outside of vectorisable regions
1710 ;;     then the specific operation and lane are not of interest.
1711
1712 ;; FP arithmetic operations.
1713
1714 (define_insn "add<mode>3"
1715  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1716        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1717                    (match_operand:VHSDF 2 "register_operand" "w")))]
1718  "TARGET_SIMD"
1719  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1720   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1721 )
1722
1723 (define_insn "sub<mode>3"
1724  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1725        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1726                     (match_operand:VHSDF 2 "register_operand" "w")))]
1727  "TARGET_SIMD"
1728  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1729   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1730 )
1731
1732 (define_insn "mul<mode>3"
1733  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1734        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1735                    (match_operand:VHSDF 2 "register_operand" "w")))]
1736  "TARGET_SIMD"
1737  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1738   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1739 )
1740
1741 (define_expand "div<mode>3"
1742  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1743        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1744                   (match_operand:VHSDF 2 "register_operand" "w")))]
1745  "TARGET_SIMD"
1746 {
1747   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1748     DONE;
1749
1750   operands[1] = force_reg (<MODE>mode, operands[1]);
1751 })
1752
1753 (define_insn "*div<mode>3"
1754  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1755        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1756                  (match_operand:VHSDF 2 "register_operand" "w")))]
1757  "TARGET_SIMD"
1758  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1759   [(set_attr "type" "neon_fp_div_<stype><q>")]
1760 )
1761
1762 (define_insn "neg<mode>2"
1763  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1764        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1765  "TARGET_SIMD"
1766  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1767   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1768 )
1769
1770 (define_insn "abs<mode>2"
1771  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1772        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1773  "TARGET_SIMD"
1774  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1775   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1776 )
1777
1778 (define_insn "fma<mode>4"
1779   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1780        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1781                   (match_operand:VHSDF 2 "register_operand" "w")
1782                   (match_operand:VHSDF 3 "register_operand" "0")))]
1783   "TARGET_SIMD"
1784  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1785   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1786 )
1787
1788 (define_insn "*aarch64_fma4_elt<mode>"
1789   [(set (match_operand:VDQF 0 "register_operand" "=w")
1790     (fma:VDQF
1791       (vec_duplicate:VDQF
1792         (vec_select:<VEL>
1793           (match_operand:VDQF 1 "register_operand" "<h_con>")
1794           (parallel [(match_operand:SI 2 "immediate_operand")])))
1795       (match_operand:VDQF 3 "register_operand" "w")
1796       (match_operand:VDQF 4 "register_operand" "0")))]
1797   "TARGET_SIMD"
1798   {
1799     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1800     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1801   }
1802   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1803 )
1804
1805 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1806   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1807     (fma:VDQSF
1808       (vec_duplicate:VDQSF
1809         (vec_select:<VEL>
1810           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1811           (parallel [(match_operand:SI 2 "immediate_operand")])))
1812       (match_operand:VDQSF 3 "register_operand" "w")
1813       (match_operand:VDQSF 4 "register_operand" "0")))]
1814   "TARGET_SIMD"
1815   {
1816     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1817     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1818   }
1819   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1820 )
1821
1822 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1823   [(set (match_operand:VMUL 0 "register_operand" "=w")
1824     (fma:VMUL
1825       (vec_duplicate:VMUL
1826           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1827       (match_operand:VMUL 2 "register_operand" "w")
1828       (match_operand:VMUL 3 "register_operand" "0")))]
1829   "TARGET_SIMD"
1830   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1831   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1832 )
1833
1834 (define_insn "*aarch64_fma4_elt_to_64v2df"
1835   [(set (match_operand:DF 0 "register_operand" "=w")
1836     (fma:DF
1837         (vec_select:DF
1838           (match_operand:V2DF 1 "register_operand" "w")
1839           (parallel [(match_operand:SI 2 "immediate_operand")]))
1840       (match_operand:DF 3 "register_operand" "w")
1841       (match_operand:DF 4 "register_operand" "0")))]
1842   "TARGET_SIMD"
1843   {
1844     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1845     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1846   }
1847   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1848 )
1849
1850 (define_insn "fnma<mode>4"
1851   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1852         (fma:VHSDF
1853           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1854           (match_operand:VHSDF 2 "register_operand" "w")
1855           (match_operand:VHSDF 3 "register_operand" "0")))]
1856   "TARGET_SIMD"
1857   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1858   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1859 )
1860
1861 (define_insn "*aarch64_fnma4_elt<mode>"
1862   [(set (match_operand:VDQF 0 "register_operand" "=w")
1863     (fma:VDQF
1864       (neg:VDQF
1865         (match_operand:VDQF 3 "register_operand" "w"))
1866       (vec_duplicate:VDQF
1867         (vec_select:<VEL>
1868           (match_operand:VDQF 1 "register_operand" "<h_con>")
1869           (parallel [(match_operand:SI 2 "immediate_operand")])))
1870       (match_operand:VDQF 4 "register_operand" "0")))]
1871   "TARGET_SIMD"
1872   {
1873     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1874     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1875   }
1876   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1877 )
1878
1879 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1880   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1881     (fma:VDQSF
1882       (neg:VDQSF
1883         (match_operand:VDQSF 3 "register_operand" "w"))
1884       (vec_duplicate:VDQSF
1885         (vec_select:<VEL>
1886           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1887           (parallel [(match_operand:SI 2 "immediate_operand")])))
1888       (match_operand:VDQSF 4 "register_operand" "0")))]
1889   "TARGET_SIMD"
1890   {
1891     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1892     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1893   }
1894   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1895 )
1896
1897 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1898   [(set (match_operand:VMUL 0 "register_operand" "=w")
1899     (fma:VMUL
1900       (neg:VMUL
1901         (match_operand:VMUL 2 "register_operand" "w"))
1902       (vec_duplicate:VMUL
1903         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1904       (match_operand:VMUL 3 "register_operand" "0")))]
1905   "TARGET_SIMD"
1906   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1907   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1908 )
1909
1910 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1911   [(set (match_operand:DF 0 "register_operand" "=w")
1912     (fma:DF
1913       (vec_select:DF
1914         (match_operand:V2DF 1 "register_operand" "w")
1915         (parallel [(match_operand:SI 2 "immediate_operand")]))
1916       (neg:DF
1917         (match_operand:DF 3 "register_operand" "w"))
1918       (match_operand:DF 4 "register_operand" "0")))]
1919   "TARGET_SIMD"
1920   {
1921     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1922     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1923   }
1924   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1925 )
1926
1927 ;; Vector versions of the floating-point frint patterns.
1928 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1929 (define_insn "<frint_pattern><mode>2"
1930   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1931         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1932                        FRINT))]
1933   "TARGET_SIMD"
1934   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1935   [(set_attr "type" "neon_fp_round_<stype><q>")]
1936 )
1937
1938 ;; Vector versions of the fcvt standard patterns.
1939 ;; Expands to lbtrunc, lround, lceil, lfloor
1940 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1941   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1942         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1943                                [(match_operand:VHSDF 1 "register_operand" "w")]
1944                                FCVT)))]
1945   "TARGET_SIMD"
1946   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1947   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1948 )
1949
1950 ;; HF Scalar variants of related SIMD instructions.
1951 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1952   [(set (match_operand:HI 0 "register_operand" "=w")
1953         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1954                       FCVT)))]
1955   "TARGET_SIMD_F16INST"
1956   "fcvt<frint_suffix><su>\t%h0, %h1"
1957   [(set_attr "type" "neon_fp_to_int_s")]
1958 )
1959
1960 (define_insn "<optab>_trunchfhi2"
1961   [(set (match_operand:HI 0 "register_operand" "=w")
1962         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1963   "TARGET_SIMD_F16INST"
1964   "fcvtz<su>\t%h0, %h1"
1965   [(set_attr "type" "neon_fp_to_int_s")]
1966 )
1967
1968 (define_insn "<optab>hihf2"
1969   [(set (match_operand:HF 0 "register_operand" "=w")
1970         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1971   "TARGET_SIMD_F16INST"
1972   "<su_optab>cvtf\t%h0, %h1"
1973   [(set_attr "type" "neon_int_to_fp_s")]
1974 )
1975
1976 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1977   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1978         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1979                                [(mult:VDQF
1980          (match_operand:VDQF 1 "register_operand" "w")
1981          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1982                                UNSPEC_FRINTZ)))]
1983   "TARGET_SIMD
1984    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1985                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1986   {
1987     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1988     char buf[64];
1989     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1990     output_asm_insn (buf, operands);
1991     return "";
1992   }
1993   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1994 )
1995
1996 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1997   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1998         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1999                                [(match_operand:VHSDF 1 "register_operand")]
2000                                 UNSPEC_FRINTZ)))]
2001   "TARGET_SIMD"
2002   {})
2003
2004 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2005   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2006         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2007                                [(match_operand:VHSDF 1 "register_operand")]
2008                                 UNSPEC_FRINTZ)))]
2009   "TARGET_SIMD"
2010   {})
2011
2012 (define_expand "ftrunc<VHSDF:mode>2"
2013   [(set (match_operand:VHSDF 0 "register_operand")
2014         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2015                        UNSPEC_FRINTZ))]
2016   "TARGET_SIMD"
2017   {})
2018
2019 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2020   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2021         (FLOATUORS:VHSDF
2022           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2023   "TARGET_SIMD"
2024   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2025   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2026 )
2027
2028 ;; Conversions between vectors of floats and doubles.
2029 ;; Contains a mix of patterns to match standard pattern names
2030 ;; and those for intrinsics.
2031
2032 ;; Float widening operations.
2033
2034 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2035   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2036         (float_extend:<VWIDE> (vec_select:<VHALF>
2037                                (match_operand:VQ_HSF 1 "register_operand" "w")
2038                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2039                             )))]
2040   "TARGET_SIMD"
2041   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2042   [(set_attr "type" "neon_fp_cvt_widen_s")]
2043 )
2044
2045 ;; Convert between fixed-point and floating-point (vector modes)
2046
2047 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2048   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2049         (unspec:<VHSDF:FCVT_TARGET>
2050           [(match_operand:VHSDF 1 "register_operand" "w")
2051            (match_operand:SI 2 "immediate_operand" "i")]
2052          FCVT_F2FIXED))]
2053   "TARGET_SIMD"
2054   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2055   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2056 )
2057
2058 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2059   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2060         (unspec:<VDQ_HSDI:FCVT_TARGET>
2061           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2062            (match_operand:SI 2 "immediate_operand" "i")]
2063          FCVT_FIXED2F))]
2064   "TARGET_SIMD"
2065   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2066   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2067 )
2068
2069 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2070 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2071 ;; the meaning of HI and LO changes depending on the target endianness.
2072 ;; While elsewhere we map the higher numbered elements of a vector to
2073 ;; the lower architectural lanes of the vector, for these patterns we want
2074 ;; to always treat "hi" as referring to the higher architectural lanes.
2075 ;; Consequently, while the patterns below look inconsistent with our
2076 ;; other big-endian patterns their behavior is as required.
2077
2078 (define_expand "vec_unpacks_lo_<mode>"
2079   [(match_operand:<VWIDE> 0 "register_operand" "")
2080    (match_operand:VQ_HSF 1 "register_operand" "")]
2081   "TARGET_SIMD"
2082   {
2083     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2084     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2085                                                        operands[1], p));
2086     DONE;
2087   }
2088 )
2089
2090 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2091   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2092         (float_extend:<VWIDE> (vec_select:<VHALF>
2093                                (match_operand:VQ_HSF 1 "register_operand" "w")
2094                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2095                             )))]
2096   "TARGET_SIMD"
2097   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2098   [(set_attr "type" "neon_fp_cvt_widen_s")]
2099 )
2100
2101 (define_expand "vec_unpacks_hi_<mode>"
2102   [(match_operand:<VWIDE> 0 "register_operand" "")
2103    (match_operand:VQ_HSF 1 "register_operand" "")]
2104   "TARGET_SIMD"
2105   {
2106     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2107     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2108                                                        operands[1], p));
2109     DONE;
2110   }
2111 )
2112 (define_insn "aarch64_float_extend_lo_<Vwide>"
2113   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114         (float_extend:<VWIDE>
2115           (match_operand:VDF 1 "register_operand" "w")))]
2116   "TARGET_SIMD"
2117   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2118   [(set_attr "type" "neon_fp_cvt_widen_s")]
2119 )
2120
2121 ;; Float narrowing operations.
2122
2123 (define_insn "aarch64_float_truncate_lo_<mode>"
2124   [(set (match_operand:VDF 0 "register_operand" "=w")
2125       (float_truncate:VDF
2126         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2127   "TARGET_SIMD"
2128   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2129   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2130 )
2131
2132 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2133   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2134     (vec_concat:<VDBL>
2135       (match_operand:VDF 1 "register_operand" "0")
2136       (float_truncate:VDF
2137         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2138   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2139   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2140   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2141 )
2142
2143 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2144   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2145     (vec_concat:<VDBL>
2146       (float_truncate:VDF
2147         (match_operand:<VWIDE> 2 "register_operand" "w"))
2148       (match_operand:VDF 1 "register_operand" "0")))]
2149   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2150   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2151   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2152 )
2153
2154 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2155   [(match_operand:<VDBL> 0 "register_operand" "=w")
2156    (match_operand:VDF 1 "register_operand" "0")
2157    (match_operand:<VWIDE> 2 "register_operand" "w")]
2158   "TARGET_SIMD"
2159 {
2160   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2161                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2162                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2163   emit_insn (gen (operands[0], operands[1], operands[2]));
2164   DONE;
2165 }
2166 )
2167
2168 (define_expand "vec_pack_trunc_v2df"
2169   [(set (match_operand:V4SF 0 "register_operand")
2170       (vec_concat:V4SF
2171         (float_truncate:V2SF
2172             (match_operand:V2DF 1 "register_operand"))
2173         (float_truncate:V2SF
2174             (match_operand:V2DF 2 "register_operand"))
2175           ))]
2176   "TARGET_SIMD"
2177   {
2178     rtx tmp = gen_reg_rtx (V2SFmode);
2179     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2180     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2181
2182     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2183     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2184                                                    tmp, operands[hi]));
2185     DONE;
2186   }
2187 )
2188
2189 (define_expand "vec_pack_trunc_df"
2190   [(set (match_operand:V2SF 0 "register_operand")
2191       (vec_concat:V2SF
2192         (float_truncate:SF
2193             (match_operand:DF 1 "register_operand"))
2194         (float_truncate:SF
2195             (match_operand:DF 2 "register_operand"))
2196           ))]
2197   "TARGET_SIMD"
2198   {
2199     rtx tmp = gen_reg_rtx (V2SFmode);
2200     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2201     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2202
2203     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2204     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2205     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2206     DONE;
2207   }
2208 )
2209
2210 ;; FP Max/Min
2211 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2212 ;; expression like:
2213 ;;      a = (b < c) ? b : c;
2214 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2215 ;; either explicitly or indirectly via -ffast-math.
2216 ;;
2217 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2218 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2219 ;; operand will be returned when both operands are zero (i.e. they may not
2220 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2221 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2222 ;; NaNs.
2223
2224 (define_insn "<su><maxmin><mode>3"
2225   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2226         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2227                        (match_operand:VHSDF 2 "register_operand" "w")))]
2228   "TARGET_SIMD"
2229   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2230   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2231 )
2232
2233 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2234 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2235 ;; which implement the IEEE fmax ()/fmin () functions.
2236 (define_insn "<maxmin_uns><mode>3"
2237   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2238        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2239                       (match_operand:VHSDF 2 "register_operand" "w")]
2240                       FMAXMIN_UNS))]
2241   "TARGET_SIMD"
2242   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2243   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2244 )
2245
2246 ;; 'across lanes' add.
2247
2248 (define_expand "reduc_plus_scal_<mode>"
2249   [(match_operand:<VEL> 0 "register_operand" "=w")
2250    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2251                UNSPEC_ADDV)]
2252   "TARGET_SIMD"
2253   {
2254     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2255     rtx scratch = gen_reg_rtx (<MODE>mode);
2256     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2257     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2258     DONE;
2259   }
2260 )
2261
2262 (define_insn "aarch64_faddp<mode>"
2263  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2264        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2265                       (match_operand:VHSDF 2 "register_operand" "w")]
2266         UNSPEC_FADDV))]
2267  "TARGET_SIMD"
2268  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2269   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2270 )
2271
2272 (define_insn "aarch64_reduc_plus_internal<mode>"
2273  [(set (match_operand:VDQV 0 "register_operand" "=w")
2274        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2275                     UNSPEC_ADDV))]
2276  "TARGET_SIMD"
2277  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2278   [(set_attr "type" "neon_reduc_add<q>")]
2279 )
2280
2281 (define_insn "aarch64_reduc_plus_internalv2si"
2282  [(set (match_operand:V2SI 0 "register_operand" "=w")
2283        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2284                     UNSPEC_ADDV))]
2285  "TARGET_SIMD"
2286  "addp\\t%0.2s, %1.2s, %1.2s"
2287   [(set_attr "type" "neon_reduc_add")]
2288 )
2289
2290 (define_insn "reduc_plus_scal_<mode>"
2291  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2292        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2293                    UNSPEC_FADDV))]
2294  "TARGET_SIMD"
2295  "faddp\\t%<Vetype>0, %1.<Vtype>"
2296   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2297 )
2298
2299 (define_expand "reduc_plus_scal_v4sf"
2300  [(set (match_operand:SF 0 "register_operand")
2301        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2302                     UNSPEC_FADDV))]
2303  "TARGET_SIMD"
2304 {
2305   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2306   rtx scratch = gen_reg_rtx (V4SFmode);
2307   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2308   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2309   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2310   DONE;
2311 })
2312
2313 (define_insn "clrsb<mode>2"
2314   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2315         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2316   "TARGET_SIMD"
2317   "cls\\t%0.<Vtype>, %1.<Vtype>"
2318   [(set_attr "type" "neon_cls<q>")]
2319 )
2320
2321 (define_insn "clz<mode>2"
2322  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2323        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2324  "TARGET_SIMD"
2325  "clz\\t%0.<Vtype>, %1.<Vtype>"
2326   [(set_attr "type" "neon_cls<q>")]
2327 )
2328
2329 (define_insn "popcount<mode>2"
2330   [(set (match_operand:VB 0 "register_operand" "=w")
2331         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2332   "TARGET_SIMD"
2333   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2334   [(set_attr "type" "neon_cnt<q>")]
2335 )
2336
2337 ;; 'across lanes' max and min ops.
2338
2339 ;; Template for outputting a scalar, so we can create __builtins which can be
2340 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2341 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2342   [(match_operand:<VEL> 0 "register_operand")
2343    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2344                   FMAXMINV)]
2345   "TARGET_SIMD"
2346   {
2347     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2348     rtx scratch = gen_reg_rtx (<MODE>mode);
2349     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2350                                                               operands[1]));
2351     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2352     DONE;
2353   }
2354 )
2355
2356 ;; Likewise for integer cases, signed and unsigned.
2357 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2358   [(match_operand:<VEL> 0 "register_operand")
2359    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2360                     MAXMINV)]
2361   "TARGET_SIMD"
2362   {
2363     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2364     rtx scratch = gen_reg_rtx (<MODE>mode);
2365     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2366                                                               operands[1]));
2367     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2368     DONE;
2369   }
2370 )
2371
2372 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2373  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2374        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2375                     MAXMINV))]
2376  "TARGET_SIMD"
2377  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2378   [(set_attr "type" "neon_reduc_minmax<q>")]
2379 )
2380
2381 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2382  [(set (match_operand:V2SI 0 "register_operand" "=w")
2383        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2384                     MAXMINV))]
2385  "TARGET_SIMD"
2386  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2387   [(set_attr "type" "neon_reduc_minmax")]
2388 )
2389
2390 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2391  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2392        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2393                       FMAXMINV))]
2394  "TARGET_SIMD"
2395  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2396   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2397 )
2398
2399 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2400 ;; allocation.
2401 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2402 ;; to select.
2403 ;;
2404 ;; Thus our BSL is of the form:
2405 ;;   op0 = bsl (mask, op2, op3)
2406 ;; We can use any of:
2407 ;;
2408 ;;   if (op0 = mask)
2409 ;;     bsl mask, op1, op2
2410 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2411 ;;     bit op0, op2, mask
2412 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2413 ;;     bif op0, op1, mask
2414 ;;
2415 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2416 ;; Some forms of straight-line code may generate the equivalent form
2417 ;; in *aarch64_simd_bsl<mode>_alt.
2418
2419 (define_insn "aarch64_simd_bsl<mode>_internal"
2420   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2421         (xor:VDQ_I
2422            (and:VDQ_I
2423              (xor:VDQ_I
2424                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2425                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2426              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2427           (match_dup:<V_INT_EQUIV> 3)
2428         ))]
2429   "TARGET_SIMD"
2430   "@
2431   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2432   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2433   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2434   [(set_attr "type" "neon_bsl<q>")]
2435 )
2436
2437 ;; We need this form in addition to the above pattern to match the case
2438 ;; when combine tries merging three insns such that the second operand of
2439 ;; the outer XOR matches the second operand of the inner XOR rather than
2440 ;; the first.  The two are equivalent but since recog doesn't try all
2441 ;; permutations of commutative operations, we have to have a separate pattern.
2442
2443 (define_insn "*aarch64_simd_bsl<mode>_alt"
2444   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2445         (xor:VDQ_I
2446            (and:VDQ_I
2447              (xor:VDQ_I
2448                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2449                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2450               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2451           (match_dup:<V_INT_EQUIV> 2)))]
2452   "TARGET_SIMD"
2453   "@
2454   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2455   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2456   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2457   [(set_attr "type" "neon_bsl<q>")]
2458 )
2459
2460 ;; DImode is special, we want to avoid computing operations which are
2461 ;; more naturally computed in general purpose registers in the vector
2462 ;; registers.  If we do that, we need to move all three operands from general
2463 ;; purpose registers to vector registers, then back again.  However, we
2464 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2465 ;; optimizations based on the component operations of a BSL.
2466 ;;
2467 ;; That means we need a splitter back to the individual operations, if they
2468 ;; would be better calculated on the integer side.
2469
2470 (define_insn_and_split "aarch64_simd_bsldi_internal"
2471   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2472         (xor:DI
2473            (and:DI
2474              (xor:DI
2475                (match_operand:DI 3 "register_operand" "w,0,w,r")
2476                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2477              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2478           (match_dup:DI 3)
2479         ))]
2480   "TARGET_SIMD"
2481   "@
2482   bsl\\t%0.8b, %2.8b, %3.8b
2483   bit\\t%0.8b, %2.8b, %1.8b
2484   bif\\t%0.8b, %3.8b, %1.8b
2485   #"
2486   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2487   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2488 {
2489   /* Split back to individual operations.  If we're before reload, and
2490      able to create a temporary register, do so.  If we're after reload,
2491      we've got an early-clobber destination register, so use that.
2492      Otherwise, we can't create pseudos and we can't yet guarantee that
2493      operands[0] is safe to write, so FAIL to split.  */
2494
2495   rtx scratch;
2496   if (reload_completed)
2497     scratch = operands[0];
2498   else if (can_create_pseudo_p ())
2499     scratch = gen_reg_rtx (DImode);
2500   else
2501     FAIL;
2502
2503   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2504   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2505   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2506   DONE;
2507 }
2508   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2509    (set_attr "length" "4,4,4,12")]
2510 )
2511
2512 (define_insn_and_split "aarch64_simd_bsldi_alt"
2513   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2514         (xor:DI
2515            (and:DI
2516              (xor:DI
2517                (match_operand:DI 3 "register_operand" "w,w,0,r")
2518                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2519              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2520           (match_dup:DI 2)
2521         ))]
2522   "TARGET_SIMD"
2523   "@
2524   bsl\\t%0.8b, %3.8b, %2.8b
2525   bit\\t%0.8b, %3.8b, %1.8b
2526   bif\\t%0.8b, %2.8b, %1.8b
2527   #"
2528   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2529   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2530 {
2531   /* Split back to individual operations.  If we're before reload, and
2532      able to create a temporary register, do so.  If we're after reload,
2533      we've got an early-clobber destination register, so use that.
2534      Otherwise, we can't create pseudos and we can't yet guarantee that
2535      operands[0] is safe to write, so FAIL to split.  */
2536
2537   rtx scratch;
2538   if (reload_completed)
2539     scratch = operands[0];
2540   else if (can_create_pseudo_p ())
2541     scratch = gen_reg_rtx (DImode);
2542   else
2543     FAIL;
2544
2545   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2546   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2547   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2548   DONE;
2549 }
2550   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2551    (set_attr "length" "4,4,4,12")]
2552 )
2553
2554 (define_expand "aarch64_simd_bsl<mode>"
2555   [(match_operand:VALLDIF 0 "register_operand")
2556    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2557    (match_operand:VALLDIF 2 "register_operand")
2558    (match_operand:VALLDIF 3 "register_operand")]
2559  "TARGET_SIMD"
2560 {
2561   /* We can't alias operands together if they have different modes.  */
2562   rtx tmp = operands[0];
2563   if (FLOAT_MODE_P (<MODE>mode))
2564     {
2565       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2566       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2567       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2568     }
2569   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2570   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2571                                                          operands[1],
2572                                                          operands[2],
2573                                                          operands[3]));
2574   if (tmp != operands[0])
2575     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2576
2577   DONE;
2578 })
2579
2580 (define_expand "vcond_mask_<mode><v_int_equiv>"
2581   [(match_operand:VALLDI 0 "register_operand")
2582    (match_operand:VALLDI 1 "nonmemory_operand")
2583    (match_operand:VALLDI 2 "nonmemory_operand")
2584    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2585   "TARGET_SIMD"
2586 {
2587   /* If we have (a = (P) ? -1 : 0);
2588      Then we can simply move the generated mask (result must be int).  */
2589   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2590       && operands[2] == CONST0_RTX (<MODE>mode))
2591     emit_move_insn (operands[0], operands[3]);
2592   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2593   else if (operands[1] == CONST0_RTX (<MODE>mode)
2594            && operands[2] == CONSTM1_RTX (<MODE>mode))
2595     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2596   else
2597     {
2598       if (!REG_P (operands[1]))
2599         operands[1] = force_reg (<MODE>mode, operands[1]);
2600       if (!REG_P (operands[2]))
2601         operands[2] = force_reg (<MODE>mode, operands[2]);
2602       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2603                                              operands[1], operands[2]));
2604     }
2605
2606   DONE;
2607 })
2608
2609 ;; Patterns comparing two vectors to produce a mask.
2610
2611 (define_expand "vec_cmp<mode><mode>"
2612   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2613           (match_operator 1 "comparison_operator"
2614             [(match_operand:VSDQ_I_DI 2 "register_operand")
2615              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2616   "TARGET_SIMD"
2617 {
2618   rtx mask = operands[0];
2619   enum rtx_code code = GET_CODE (operands[1]);
2620
2621   switch (code)
2622     {
2623     case NE:
2624     case LE:
2625     case LT:
2626     case GE:
2627     case GT:
2628     case EQ:
2629       if (operands[3] == CONST0_RTX (<MODE>mode))
2630         break;
2631
2632       /* Fall through.  */
2633     default:
2634       if (!REG_P (operands[3]))
2635         operands[3] = force_reg (<MODE>mode, operands[3]);
2636
2637       break;
2638     }
2639
2640   switch (code)
2641     {
2642     case LT:
2643       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2644       break;
2645
2646     case GE:
2647       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2648       break;
2649
2650     case LE:
2651       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2652       break;
2653
2654     case GT:
2655       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2656       break;
2657
2658     case LTU:
2659       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2660       break;
2661
2662     case GEU:
2663       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2664       break;
2665
2666     case LEU:
2667       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2668       break;
2669
2670     case GTU:
2671       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2672       break;
2673
2674     case NE:
2675       /* Handle NE as !EQ.  */
2676       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2677       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2678       break;
2679
2680     case EQ:
2681       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2682       break;
2683
2684     default:
2685       gcc_unreachable ();
2686     }
2687
2688   DONE;
2689 })
2690
2691 (define_expand "vec_cmp<mode><v_int_equiv>"
2692   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2693         (match_operator 1 "comparison_operator"
2694             [(match_operand:VDQF 2 "register_operand")
2695              (match_operand:VDQF 3 "nonmemory_operand")]))]
2696   "TARGET_SIMD"
2697 {
2698   int use_zero_form = 0;
2699   enum rtx_code code = GET_CODE (operands[1]);
2700   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2701
2702   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2703
2704   switch (code)
2705     {
2706     case LE:
2707     case LT:
2708     case GE:
2709     case GT:
2710     case EQ:
2711       if (operands[3] == CONST0_RTX (<MODE>mode))
2712         {
2713           use_zero_form = 1;
2714           break;
2715         }
2716       /* Fall through.  */
2717     default:
2718       if (!REG_P (operands[3]))
2719         operands[3] = force_reg (<MODE>mode, operands[3]);
2720
2721       break;
2722     }
2723
2724   switch (code)
2725     {
2726     case LT:
2727       if (use_zero_form)
2728         {
2729           comparison = gen_aarch64_cmlt<mode>;
2730           break;
2731         }
2732       /* Fall through.  */
2733     case UNLT:
2734       std::swap (operands[2], operands[3]);
2735       /* Fall through.  */
2736     case UNGT:
2737     case GT:
2738       comparison = gen_aarch64_cmgt<mode>;
2739       break;
2740     case LE:
2741       if (use_zero_form)
2742         {
2743           comparison = gen_aarch64_cmle<mode>;
2744           break;
2745         }
2746       /* Fall through.  */
2747     case UNLE:
2748       std::swap (operands[2], operands[3]);
2749       /* Fall through.  */
2750     case UNGE:
2751     case GE:
2752       comparison = gen_aarch64_cmge<mode>;
2753       break;
2754     case NE:
2755     case EQ:
2756       comparison = gen_aarch64_cmeq<mode>;
2757       break;
2758     case UNEQ:
2759     case ORDERED:
2760     case UNORDERED:
2761     case LTGT:
2762       break;
2763     default:
2764       gcc_unreachable ();
2765     }
2766
2767   switch (code)
2768     {
2769     case UNGE:
2770     case UNGT:
2771     case UNLE:
2772     case UNLT:
2773       {
2774         /* All of the above must not raise any FP exceptions.  Thus we first
2775            check each operand for NaNs and force any elements containing NaN to
2776            zero before using them in the compare.
2777            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2778                                      (cm<cc> (isnan (a) ? 0.0 : a,
2779                                               isnan (b) ? 0.0 : b))
2780            We use the following transformations for doing the comparisions:
2781            a UNGE b -> a GE b
2782            a UNGT b -> a GT b
2783            a UNLE b -> b GE a
2784            a UNLT b -> b GT a.  */
2785
2786         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2787         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2788         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2789         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2790         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2791         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2792         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2793                                           lowpart_subreg (<V_INT_EQUIV>mode,
2794                                                           operands[2],
2795                                                           <MODE>mode)));
2796         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2797                                           lowpart_subreg (<V_INT_EQUIV>mode,
2798                                                           operands[3],
2799                                                           <MODE>mode)));
2800         gcc_assert (comparison != NULL);
2801         emit_insn (comparison (operands[0],
2802                                lowpart_subreg (<MODE>mode,
2803                                                tmp0, <V_INT_EQUIV>mode),
2804                                lowpart_subreg (<MODE>mode,
2805                                                tmp1, <V_INT_EQUIV>mode)));
2806         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2807       }
2808       break;
2809
2810     case LT:
2811     case LE:
2812     case GT:
2813     case GE:
2814     case EQ:
2815     case NE:
2816       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2817          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2818          a GE b -> a GE b
2819          a GT b -> a GT b
2820          a LE b -> b GE a
2821          a LT b -> b GT a
2822          a EQ b -> a EQ b
2823          a NE b -> ~(a EQ b)  */
2824       gcc_assert (comparison != NULL);
2825       emit_insn (comparison (operands[0], operands[2], operands[3]));
2826       if (code == NE)
2827         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2828       break;
2829
2830     case LTGT:
2831       /* LTGT is not guranteed to not generate a FP exception.  So let's
2832          go the faster way : ((a > b) || (b > a)).  */
2833       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2834                                          operands[2], operands[3]));
2835       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2836       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2837       break;
2838
2839     case ORDERED:
2840     case UNORDERED:
2841     case UNEQ:
2842       /* cmeq (a, a) & cmeq (b, b).  */
2843       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2844                                          operands[2], operands[2]));
2845       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2846       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2847
2848       if (code == UNORDERED)
2849         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2850       else if (code == UNEQ)
2851         {
2852           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2853           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2854         }
2855       break;
2856
2857     default:
2858       gcc_unreachable ();
2859     }
2860
2861   DONE;
2862 })
2863
2864 (define_expand "vec_cmpu<mode><mode>"
2865   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2866           (match_operator 1 "comparison_operator"
2867             [(match_operand:VSDQ_I_DI 2 "register_operand")
2868              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2869   "TARGET_SIMD"
2870 {
2871   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2872                                       operands[2], operands[3]));
2873   DONE;
2874 })
2875
2876 (define_expand "vcond<mode><mode>"
2877   [(set (match_operand:VALLDI 0 "register_operand")
2878         (if_then_else:VALLDI
2879           (match_operator 3 "comparison_operator"
2880             [(match_operand:VALLDI 4 "register_operand")
2881              (match_operand:VALLDI 5 "nonmemory_operand")])
2882           (match_operand:VALLDI 1 "nonmemory_operand")
2883           (match_operand:VALLDI 2 "nonmemory_operand")))]
2884   "TARGET_SIMD"
2885 {
2886   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2887   enum rtx_code code = GET_CODE (operands[3]);
2888
2889   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2890      it as well as switch operands 1/2 in order to avoid the additional
2891      NOT instruction.  */
2892   if (code == NE)
2893     {
2894       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2895                                     operands[4], operands[5]);
2896       std::swap (operands[1], operands[2]);
2897     }
2898   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2899                                              operands[4], operands[5]));
2900   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2901                                                  operands[2], mask));
2902
2903   DONE;
2904 })
2905
2906 (define_expand "vcond<v_cmp_mixed><mode>"
2907   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2908         (if_then_else:<V_cmp_mixed>
2909           (match_operator 3 "comparison_operator"
2910             [(match_operand:VDQF_COND 4 "register_operand")
2911              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2912           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2913           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2914   "TARGET_SIMD"
2915 {
2916   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2917   enum rtx_code code = GET_CODE (operands[3]);
2918
2919   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2920      it as well as switch operands 1/2 in order to avoid the additional
2921      NOT instruction.  */
2922   if (code == NE)
2923     {
2924       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2925                                     operands[4], operands[5]);
2926       std::swap (operands[1], operands[2]);
2927     }
2928   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2929                                              operands[4], operands[5]));
2930   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2931                                                 operands[0], operands[1],
2932                                                 operands[2], mask));
2933
2934   DONE;
2935 })
2936
2937 (define_expand "vcondu<mode><mode>"
2938   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2939         (if_then_else:VSDQ_I_DI
2940           (match_operator 3 "comparison_operator"
2941             [(match_operand:VSDQ_I_DI 4 "register_operand")
2942              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2943           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2944           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2945   "TARGET_SIMD"
2946 {
2947   rtx mask = gen_reg_rtx (<MODE>mode);
2948   enum rtx_code code = GET_CODE (operands[3]);
2949
2950   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2951      it as well as switch operands 1/2 in order to avoid the additional
2952      NOT instruction.  */
2953   if (code == NE)
2954     {
2955       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2956                                     operands[4], operands[5]);
2957       std::swap (operands[1], operands[2]);
2958     }
2959   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2960                                       operands[4], operands[5]));
2961   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2962                                                  operands[2], mask));
2963   DONE;
2964 })
2965
2966 (define_expand "vcondu<mode><v_cmp_mixed>"
2967   [(set (match_operand:VDQF 0 "register_operand")
2968         (if_then_else:VDQF
2969           (match_operator 3 "comparison_operator"
2970             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2971              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2972           (match_operand:VDQF 1 "nonmemory_operand")
2973           (match_operand:VDQF 2 "nonmemory_operand")))]
2974   "TARGET_SIMD"
2975 {
2976   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2977   enum rtx_code code = GET_CODE (operands[3]);
2978
2979   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2980      it as well as switch operands 1/2 in order to avoid the additional
2981      NOT instruction.  */
2982   if (code == NE)
2983     {
2984       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2985                                     operands[4], operands[5]);
2986       std::swap (operands[1], operands[2]);
2987     }
2988   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2989                                                   mask, operands[3],
2990                                                   operands[4], operands[5]));
2991   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2992                                                  operands[2], mask));
2993   DONE;
2994 })
2995
2996 ;; Patterns for AArch64 SIMD Intrinsics.
2997
2998 ;; Lane extraction with sign extension to general purpose register.
2999 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3000   [(set (match_operand:GPI 0 "register_operand" "=r")
3001         (sign_extend:GPI
3002           (vec_select:<VEL>
3003             (match_operand:VDQQH 1 "register_operand" "w")
3004             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3005   "TARGET_SIMD"
3006   {
3007     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3008     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3009   }
3010   [(set_attr "type" "neon_to_gp<q>")]
3011 )
3012
3013 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3014   [(set (match_operand:SI 0 "register_operand" "=r")
3015         (zero_extend:SI
3016           (vec_select:<VEL>
3017             (match_operand:VDQQH 1 "register_operand" "w")
3018             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3019   "TARGET_SIMD"
3020   {
3021     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3022     return "umov\\t%w0, %1.<Vetype>[%2]";
3023   }
3024   [(set_attr "type" "neon_to_gp<q>")]
3025 )
3026
3027 ;; Lane extraction of a value, neither sign nor zero extension
3028 ;; is guaranteed so upper bits should be considered undefined.
3029 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3030 (define_insn "aarch64_get_lane<mode>"
3031   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3032         (vec_select:<VEL>
3033           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3034           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3035   "TARGET_SIMD"
3036   {
3037     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3038     switch (which_alternative)
3039       {
3040         case 0:
3041           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3042         case 1:
3043           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3044         case 2:
3045           return "st1\\t{%1.<Vetype>}[%2], %0";
3046         default:
3047           gcc_unreachable ();
3048       }
3049   }
3050   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3051 )
3052
3053 (define_insn "load_pair_lanes<mode>"
3054   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3055         (vec_concat:<VDBL>
3056            (match_operand:VDC 1 "memory_operand" "Utq")
3057            (match_operand:VDC 2 "memory_operand" "m")))]
3058   "TARGET_SIMD && !STRICT_ALIGNMENT
3059    && rtx_equal_p (XEXP (operands[2], 0),
3060                    plus_constant (Pmode,
3061                                   XEXP (operands[1], 0),
3062                                   GET_MODE_SIZE (<MODE>mode)))"
3063   "ldr\\t%q0, %1"
3064   [(set_attr "type" "neon_load1_1reg_q")]
3065 )
3066
3067 (define_insn "store_pair_lanes<mode>"
3068   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3069         (vec_concat:<VDBL>
3070            (match_operand:VDC 1 "register_operand" "w, r")
3071            (match_operand:VDC 2 "register_operand" "w, r")))]
3072   "TARGET_SIMD"
3073   "@
3074    stp\\t%d1, %d2, %y0
3075    stp\\t%x1, %x2, %y0"
3076   [(set_attr "type" "neon_stp, store_16")]
3077 )
3078
3079 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3080 ;; dest vector.
3081
3082 (define_insn "*aarch64_combinez<mode>"
3083   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3084         (vec_concat:<VDBL>
3085           (match_operand:VDC 1 "general_operand" "w,?r,m")
3086           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3087   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3088   "@
3089    mov\\t%0.8b, %1.8b
3090    fmov\t%d0, %1
3091    ldr\\t%d0, %1"
3092   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3093    (set_attr "simd" "yes,*,yes")
3094    (set_attr "fp" "*,yes,*")]
3095 )
3096
3097 (define_insn "*aarch64_combinez_be<mode>"
3098   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3099         (vec_concat:<VDBL>
3100           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3101           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3102   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3103   "@
3104    mov\\t%0.8b, %1.8b
3105    fmov\t%d0, %1
3106    ldr\\t%d0, %1"
3107   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3108    (set_attr "simd" "yes,*,yes")
3109    (set_attr "fp" "*,yes,*")]
3110 )
3111
3112 (define_expand "aarch64_combine<mode>"
3113   [(match_operand:<VDBL> 0 "register_operand")
3114    (match_operand:VDC 1 "register_operand")
3115    (match_operand:VDC 2 "register_operand")]
3116   "TARGET_SIMD"
3117 {
3118   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3119
3120   DONE;
3121 }
3122 )
3123
3124 (define_expand "aarch64_simd_combine<mode>"
3125   [(match_operand:<VDBL> 0 "register_operand")
3126    (match_operand:VDC 1 "register_operand")
3127    (match_operand:VDC 2 "register_operand")]
3128   "TARGET_SIMD"
3129   {
3130     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3131     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3132     DONE;
3133   }
3134 [(set_attr "type" "multiple")]
3135 )
3136
3137 ;; <su><addsub>l<q>.
3138
3139 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3140  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3141        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3142                            (match_operand:VQW 1 "register_operand" "w")
3143                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3144                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3145                            (match_operand:VQW 2 "register_operand" "w")
3146                            (match_dup 3)))))]
3147   "TARGET_SIMD"
3148   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3149   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3150 )
3151
3152 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3153  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3154        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3155                            (match_operand:VQW 1 "register_operand" "w")
3156                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3157                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3158                            (match_operand:VQW 2 "register_operand" "w")
3159                            (match_dup 3)))))]
3160   "TARGET_SIMD"
3161   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3162   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3163 )
3164
3165
3166 (define_expand "aarch64_saddl2<mode>"
3167   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3168    (match_operand:VQW 1 "register_operand" "w")
3169    (match_operand:VQW 2 "register_operand" "w")]
3170   "TARGET_SIMD"
3171 {
3172   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3173   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3174                                                   operands[2], p));
3175   DONE;
3176 })
3177
3178 (define_expand "aarch64_uaddl2<mode>"
3179   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3180    (match_operand:VQW 1 "register_operand" "w")
3181    (match_operand:VQW 2 "register_operand" "w")]
3182   "TARGET_SIMD"
3183 {
3184   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3185   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3186                                                   operands[2], p));
3187   DONE;
3188 })
3189
3190 (define_expand "aarch64_ssubl2<mode>"
3191   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3192    (match_operand:VQW 1 "register_operand" "w")
3193    (match_operand:VQW 2 "register_operand" "w")]
3194   "TARGET_SIMD"
3195 {
3196   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3197   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3198                                                 operands[2], p));
3199   DONE;
3200 })
3201
3202 (define_expand "aarch64_usubl2<mode>"
3203   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3204    (match_operand:VQW 1 "register_operand" "w")
3205    (match_operand:VQW 2 "register_operand" "w")]
3206   "TARGET_SIMD"
3207 {
3208   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3209   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3210                                                 operands[2], p));
3211   DONE;
3212 })
3213
3214 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3215  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3216        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3217                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3218                        (ANY_EXTEND:<VWIDE>
3219                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3220   "TARGET_SIMD"
3221   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3222   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3223 )
3224
3225 ;; <su><addsub>w<q>.
3226
3227 (define_expand "widen_ssum<mode>3"
3228   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3229         (plus:<VDBLW> (sign_extend:<VDBLW>
3230                         (match_operand:VQW 1 "register_operand" ""))
3231                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3232   "TARGET_SIMD"
3233   {
3234     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3235     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3236
3237     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3238                                                 operands[1], p));
3239     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3240     DONE;
3241   }
3242 )
3243
3244 (define_expand "widen_ssum<mode>3"
3245   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3246         (plus:<VWIDE> (sign_extend:<VWIDE>
3247                         (match_operand:VD_BHSI 1 "register_operand" ""))
3248                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3249   "TARGET_SIMD"
3250 {
3251   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3252   DONE;
3253 })
3254
3255 (define_expand "widen_usum<mode>3"
3256   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3257         (plus:<VDBLW> (zero_extend:<VDBLW>
3258                         (match_operand:VQW 1 "register_operand" ""))
3259                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3260   "TARGET_SIMD"
3261   {
3262     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3263     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3264
3265     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3266                                                  operands[1], p));
3267     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3268     DONE;
3269   }
3270 )
3271
3272 (define_expand "widen_usum<mode>3"
3273   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3274         (plus:<VWIDE> (zero_extend:<VWIDE>
3275                         (match_operand:VD_BHSI 1 "register_operand" ""))
3276                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3277   "TARGET_SIMD"
3278 {
3279   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3280   DONE;
3281 })
3282
3283 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3284   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3285         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3286                         (ANY_EXTEND:<VWIDE>
3287                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3288   "TARGET_SIMD"
3289   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3290   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3291 )
3292
3293 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3294   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3295         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3296                         (ANY_EXTEND:<VWIDE>
3297                           (vec_select:<VHALF>
3298                            (match_operand:VQW 2 "register_operand" "w")
3299                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3300   "TARGET_SIMD"
3301   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3302   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3303 )
3304
3305 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3306   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3307         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3308                         (ANY_EXTEND:<VWIDE>
3309                           (vec_select:<VHALF>
3310                            (match_operand:VQW 2 "register_operand" "w")
3311                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3312   "TARGET_SIMD"
3313   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3314   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3315 )
3316
3317 (define_expand "aarch64_saddw2<mode>"
3318   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3319    (match_operand:<VWIDE> 1 "register_operand" "w")
3320    (match_operand:VQW 2 "register_operand" "w")]
3321   "TARGET_SIMD"
3322 {
3323   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3324   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3325                                                 operands[2], p));
3326   DONE;
3327 })
3328
3329 (define_expand "aarch64_uaddw2<mode>"
3330   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3331    (match_operand:<VWIDE> 1 "register_operand" "w")
3332    (match_operand:VQW 2 "register_operand" "w")]
3333   "TARGET_SIMD"
3334 {
3335   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3336   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3337                                                 operands[2], p));
3338   DONE;
3339 })
3340
3341
3342 (define_expand "aarch64_ssubw2<mode>"
3343   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3344    (match_operand:<VWIDE> 1 "register_operand" "w")
3345    (match_operand:VQW 2 "register_operand" "w")]
3346   "TARGET_SIMD"
3347 {
3348   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3349   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3350                                                 operands[2], p));
3351   DONE;
3352 })
3353
3354 (define_expand "aarch64_usubw2<mode>"
3355   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3356    (match_operand:<VWIDE> 1 "register_operand" "w")
3357    (match_operand:VQW 2 "register_operand" "w")]
3358   "TARGET_SIMD"
3359 {
3360   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3361   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3362                                                 operands[2], p));
3363   DONE;
3364 })
3365
3366 ;; <su><r>h<addsub>.
3367
3368 (define_insn "aarch64_<sur>h<addsub><mode>"
3369   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3370         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3371                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3372                      HADDSUB))]
3373   "TARGET_SIMD"
3374   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3375   [(set_attr "type" "neon_<addsub>_halve<q>")]
3376 )
3377
3378 ;; <r><addsub>hn<q>.
3379
3380 (define_insn "aarch64_<sur><addsub>hn<mode>"
3381   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3382         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3383                             (match_operand:VQN 2 "register_operand" "w")]
3384                            ADDSUBHN))]
3385   "TARGET_SIMD"
3386   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3387   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3388 )
3389
3390 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3391   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3392         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3393                              (match_operand:VQN 2 "register_operand" "w")
3394                              (match_operand:VQN 3 "register_operand" "w")]
3395                             ADDSUBHN2))]
3396   "TARGET_SIMD"
3397   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3398   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3399 )
3400
3401 ;; pmul.
3402
3403 (define_insn "aarch64_pmul<mode>"
3404   [(set (match_operand:VB 0 "register_operand" "=w")
3405         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3406                     (match_operand:VB 2 "register_operand" "w")]
3407                    UNSPEC_PMUL))]
3408  "TARGET_SIMD"
3409  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3410   [(set_attr "type" "neon_mul_<Vetype><q>")]
3411 )
3412
3413 ;; fmulx.
3414
3415 (define_insn "aarch64_fmulx<mode>"
3416   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3417         (unspec:VHSDF_HSDF
3418           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3419            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3420            UNSPEC_FMULX))]
3421  "TARGET_SIMD"
3422  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3423  [(set_attr "type" "neon_fp_mul_<stype>")]
3424 )
3425
3426 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3427
3428 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3429   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3430         (unspec:VDQSF
3431          [(match_operand:VDQSF 1 "register_operand" "w")
3432           (vec_duplicate:VDQSF
3433            (vec_select:<VEL>
3434             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3435             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3436          UNSPEC_FMULX))]
3437   "TARGET_SIMD"
3438   {
3439     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3440     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3441   }
3442   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3443 )
3444
3445 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3446
3447 (define_insn "*aarch64_mulx_elt<mode>"
3448   [(set (match_operand:VDQF 0 "register_operand" "=w")
3449         (unspec:VDQF
3450          [(match_operand:VDQF 1 "register_operand" "w")
3451           (vec_duplicate:VDQF
3452            (vec_select:<VEL>
3453             (match_operand:VDQF 2 "register_operand" "w")
3454             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3455          UNSPEC_FMULX))]
3456   "TARGET_SIMD"
3457   {
3458     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3459     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3460   }
3461   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3462 )
3463
3464 ;; vmulxq_lane
3465
3466 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3467   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3468         (unspec:VHSDF
3469          [(match_operand:VHSDF 1 "register_operand" "w")
3470           (vec_duplicate:VHSDF
3471             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3472          UNSPEC_FMULX))]
3473   "TARGET_SIMD"
3474   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3475   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3476 )
3477
3478 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3479 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3480 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3481
3482 (define_insn "*aarch64_vgetfmulx<mode>"
3483   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3484         (unspec:<VEL>
3485          [(match_operand:<VEL> 1 "register_operand" "w")
3486           (vec_select:<VEL>
3487            (match_operand:VDQF 2 "register_operand" "w")
3488             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3489          UNSPEC_FMULX))]
3490   "TARGET_SIMD"
3491   {
3492     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3493     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3494   }
3495   [(set_attr "type" "fmul<Vetype>")]
3496 )
3497 ;; <su>q<addsub>
3498
3499 (define_insn "aarch64_<su_optab><optab><mode>"
3500   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3501         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3502                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3503   "TARGET_SIMD"
3504   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3505   [(set_attr "type" "neon_<optab><q>")]
3506 )
3507
3508 ;; suqadd and usqadd
3509
3510 (define_insn "aarch64_<sur>qadd<mode>"
3511   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3512         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3513                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3514                        USSUQADD))]
3515   "TARGET_SIMD"
3516   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3517   [(set_attr "type" "neon_qadd<q>")]
3518 )
3519
3520 ;; sqmovun
3521
3522 (define_insn "aarch64_sqmovun<mode>"
3523   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3524         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3525                             UNSPEC_SQXTUN))]
3526    "TARGET_SIMD"
3527    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3528    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3529 )
3530
3531 ;; sqmovn and uqmovn
3532
3533 (define_insn "aarch64_<sur>qmovn<mode>"
3534   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3535         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3536                             SUQMOVN))]
3537   "TARGET_SIMD"
3538   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3539    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3540 )
3541
3542 ;; <su>q<absneg>
3543
3544 (define_insn "aarch64_s<optab><mode>"
3545   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3546         (UNQOPS:VSDQ_I
3547           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3548   "TARGET_SIMD"
3549   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3550   [(set_attr "type" "neon_<optab><q>")]
3551 )
3552
3553 ;; sq<r>dmulh.
3554
3555 (define_insn "aarch64_sq<r>dmulh<mode>"
3556   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3557         (unspec:VSDQ_HSI
3558           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3559            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3560          VQDMULH))]
3561   "TARGET_SIMD"
3562   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3563   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3564 )
3565
3566 ;; sq<r>dmulh_lane
3567
3568 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3569   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3570         (unspec:VDQHS
3571           [(match_operand:VDQHS 1 "register_operand" "w")
3572            (vec_select:<VEL>
3573              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3574              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3575          VQDMULH))]
3576   "TARGET_SIMD"
3577   "*
3578    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3579    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3580   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3581 )
3582
3583 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3584   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3585         (unspec:VDQHS
3586           [(match_operand:VDQHS 1 "register_operand" "w")
3587            (vec_select:<VEL>
3588              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3589              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3590          VQDMULH))]
3591   "TARGET_SIMD"
3592   "*
3593    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3594    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3595   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3596 )
3597
3598 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3599   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3600         (unspec:SD_HSI
3601           [(match_operand:SD_HSI 1 "register_operand" "w")
3602            (vec_select:<VEL>
3603              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3604              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3605          VQDMULH))]
3606   "TARGET_SIMD"
3607   "*
3608    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3609    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3610   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3611 )
3612
3613 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3614   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3615         (unspec:SD_HSI
3616           [(match_operand:SD_HSI 1 "register_operand" "w")
3617            (vec_select:<VEL>
3618              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3619              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3620          VQDMULH))]
3621   "TARGET_SIMD"
3622   "*
3623    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3624    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3625   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3626 )
3627
3628 ;; sqrdml[as]h.
3629
3630 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3631   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3632         (unspec:VSDQ_HSI
3633           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3634            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3635            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3636           SQRDMLH_AS))]
3637    "TARGET_SIMD_RDMA"
3638    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3639    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3640 )
3641
3642 ;; sqrdml[as]h_lane.
3643
3644 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3645   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3646         (unspec:VDQHS
3647           [(match_operand:VDQHS 1 "register_operand" "0")
3648            (match_operand:VDQHS 2 "register_operand" "w")
3649            (vec_select:<VEL>
3650              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3651              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3652           SQRDMLH_AS))]
3653    "TARGET_SIMD_RDMA"
3654    {
3655      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3656      return
3657       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3658    }
3659    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3660 )
3661
3662 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3663   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3664         (unspec:SD_HSI
3665           [(match_operand:SD_HSI 1 "register_operand" "0")
3666            (match_operand:SD_HSI 2 "register_operand" "w")
3667            (vec_select:<VEL>
3668              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3669              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3670           SQRDMLH_AS))]
3671    "TARGET_SIMD_RDMA"
3672    {
3673      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3674      return
3675       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3676    }
3677    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3678 )
3679
3680 ;; sqrdml[as]h_laneq.
3681
3682 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3683   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3684         (unspec:VDQHS
3685           [(match_operand:VDQHS 1 "register_operand" "0")
3686            (match_operand:VDQHS 2 "register_operand" "w")
3687            (vec_select:<VEL>
3688              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3689              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3690           SQRDMLH_AS))]
3691    "TARGET_SIMD_RDMA"
3692    {
3693      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3694      return
3695       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3696    }
3697    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3698 )
3699
3700 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3701   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3702         (unspec:SD_HSI
3703           [(match_operand:SD_HSI 1 "register_operand" "0")
3704            (match_operand:SD_HSI 2 "register_operand" "w")
3705            (vec_select:<VEL>
3706              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3707              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3708           SQRDMLH_AS))]
3709    "TARGET_SIMD_RDMA"
3710    {
3711      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3712      return
3713       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3714    }
3715    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3716 )
3717
3718 ;; vqdml[sa]l
3719
3720 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3721   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3722         (SBINQOPS:<VWIDE>
3723           (match_operand:<VWIDE> 1 "register_operand" "0")
3724           (ss_ashift:<VWIDE>
3725               (mult:<VWIDE>
3726                 (sign_extend:<VWIDE>
3727                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3728                 (sign_extend:<VWIDE>
3729                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3730               (const_int 1))))]
3731   "TARGET_SIMD"
3732   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3733   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3734 )
3735
3736 ;; vqdml[sa]l_lane
3737
3738 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3739   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3740         (SBINQOPS:<VWIDE>
3741           (match_operand:<VWIDE> 1 "register_operand" "0")
3742           (ss_ashift:<VWIDE>
3743             (mult:<VWIDE>
3744               (sign_extend:<VWIDE>
3745                 (match_operand:VD_HSI 2 "register_operand" "w"))
3746               (sign_extend:<VWIDE>
3747                 (vec_duplicate:VD_HSI
3748                   (vec_select:<VEL>
3749                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3750                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3751               ))
3752             (const_int 1))))]
3753   "TARGET_SIMD"
3754   {
3755     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3756     return
3757       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3758   }
3759   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3760 )
3761
3762 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3763   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3764         (SBINQOPS:<VWIDE>
3765           (match_operand:<VWIDE> 1 "register_operand" "0")
3766           (ss_ashift:<VWIDE>
3767             (mult:<VWIDE>
3768               (sign_extend:<VWIDE>
3769                 (match_operand:VD_HSI 2 "register_operand" "w"))
3770               (sign_extend:<VWIDE>
3771                 (vec_duplicate:VD_HSI
3772                   (vec_select:<VEL>
3773                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3774                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3775               ))
3776             (const_int 1))))]
3777   "TARGET_SIMD"
3778   {
3779     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3780     return
3781       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3782   }
3783   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3784 )
3785
3786 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3787   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3788         (SBINQOPS:<VWIDE>
3789           (match_operand:<VWIDE> 1 "register_operand" "0")
3790           (ss_ashift:<VWIDE>
3791             (mult:<VWIDE>
3792               (sign_extend:<VWIDE>
3793                 (match_operand:SD_HSI 2 "register_operand" "w"))
3794               (sign_extend:<VWIDE>
3795                 (vec_select:<VEL>
3796                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3797                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3798               )
3799             (const_int 1))))]
3800   "TARGET_SIMD"
3801   {
3802     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3803     return
3804       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3805   }
3806   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3807 )
3808
3809 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3810   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3811         (SBINQOPS:<VWIDE>
3812           (match_operand:<VWIDE> 1 "register_operand" "0")
3813           (ss_ashift:<VWIDE>
3814             (mult:<VWIDE>
3815               (sign_extend:<VWIDE>
3816                 (match_operand:SD_HSI 2 "register_operand" "w"))
3817               (sign_extend:<VWIDE>
3818                 (vec_select:<VEL>
3819                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3820                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3821               )
3822             (const_int 1))))]
3823   "TARGET_SIMD"
3824   {
3825     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3826     return
3827       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3828   }
3829   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3830 )
3831
3832 ;; vqdml[sa]l_n
3833
3834 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3835   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3836         (SBINQOPS:<VWIDE>
3837           (match_operand:<VWIDE> 1 "register_operand" "0")
3838           (ss_ashift:<VWIDE>
3839               (mult:<VWIDE>
3840                 (sign_extend:<VWIDE>
3841                       (match_operand:VD_HSI 2 "register_operand" "w"))
3842                 (sign_extend:<VWIDE>
3843                   (vec_duplicate:VD_HSI
3844                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3845               (const_int 1))))]
3846   "TARGET_SIMD"
3847   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3848   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3849 )
3850
3851 ;; sqdml[as]l2
3852
3853 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3854   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3855         (SBINQOPS:<VWIDE>
3856          (match_operand:<VWIDE> 1 "register_operand" "0")
3857          (ss_ashift:<VWIDE>
3858              (mult:<VWIDE>
3859                (sign_extend:<VWIDE>
3860                  (vec_select:<VHALF>
3861                      (match_operand:VQ_HSI 2 "register_operand" "w")
3862                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3863                (sign_extend:<VWIDE>
3864                  (vec_select:<VHALF>
3865                      (match_operand:VQ_HSI 3 "register_operand" "w")
3866                      (match_dup 4))))
3867              (const_int 1))))]
3868   "TARGET_SIMD"
3869   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3870   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3871 )
3872
3873 (define_expand "aarch64_sqdmlal2<mode>"
3874   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3875    (match_operand:<VWIDE> 1 "register_operand" "w")
3876    (match_operand:VQ_HSI 2 "register_operand" "w")
3877    (match_operand:VQ_HSI 3 "register_operand" "w")]
3878   "TARGET_SIMD"
3879 {
3880   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3881   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3882                                                   operands[2], operands[3], p));
3883   DONE;
3884 })
3885
3886 (define_expand "aarch64_sqdmlsl2<mode>"
3887   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3888    (match_operand:<VWIDE> 1 "register_operand" "w")
3889    (match_operand:VQ_HSI 2 "register_operand" "w")
3890    (match_operand:VQ_HSI 3 "register_operand" "w")]
3891   "TARGET_SIMD"
3892 {
3893   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3894   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3895                                                   operands[2], operands[3], p));
3896   DONE;
3897 })
3898
3899 ;; vqdml[sa]l2_lane
3900
3901 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3902   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3903         (SBINQOPS:<VWIDE>
3904           (match_operand:<VWIDE> 1 "register_operand" "0")
3905           (ss_ashift:<VWIDE>
3906               (mult:<VWIDE>
3907                 (sign_extend:<VWIDE>
3908                   (vec_select:<VHALF>
3909                     (match_operand:VQ_HSI 2 "register_operand" "w")
3910                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3911                 (sign_extend:<VWIDE>
3912                   (vec_duplicate:<VHALF>
3913                     (vec_select:<VEL>
3914                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3915                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3916                     ))))
3917               (const_int 1))))]
3918   "TARGET_SIMD"
3919   {
3920     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3921     return
3922      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3923   }
3924   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3925 )
3926
3927 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3928   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3929         (SBINQOPS:<VWIDE>
3930           (match_operand:<VWIDE> 1 "register_operand" "0")
3931           (ss_ashift:<VWIDE>
3932               (mult:<VWIDE>
3933                 (sign_extend:<VWIDE>
3934                   (vec_select:<VHALF>
3935                     (match_operand:VQ_HSI 2 "register_operand" "w")
3936                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3937                 (sign_extend:<VWIDE>
3938                   (vec_duplicate:<VHALF>
3939                     (vec_select:<VEL>
3940                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3941                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3942                     ))))
3943               (const_int 1))))]
3944   "TARGET_SIMD"
3945   {
3946     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3947     return
3948      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3949   }
3950   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3951 )
3952
3953 (define_expand "aarch64_sqdmlal2_lane<mode>"
3954   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3955    (match_operand:<VWIDE> 1 "register_operand" "w")
3956    (match_operand:VQ_HSI 2 "register_operand" "w")
3957    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3958    (match_operand:SI 4 "immediate_operand" "i")]
3959   "TARGET_SIMD"
3960 {
3961   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3962   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3963                                                        operands[2], operands[3],
3964                                                        operands[4], p));
3965   DONE;
3966 })
3967
3968 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3969   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3970    (match_operand:<VWIDE> 1 "register_operand" "w")
3971    (match_operand:VQ_HSI 2 "register_operand" "w")
3972    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3973    (match_operand:SI 4 "immediate_operand" "i")]
3974   "TARGET_SIMD"
3975 {
3976   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3977   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3978                                                        operands[2], operands[3],
3979                                                        operands[4], p));
3980   DONE;
3981 })
3982
3983 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3984   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3985    (match_operand:<VWIDE> 1 "register_operand" "w")
3986    (match_operand:VQ_HSI 2 "register_operand" "w")
3987    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3988    (match_operand:SI 4 "immediate_operand" "i")]
3989   "TARGET_SIMD"
3990 {
3991   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3992   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3993                                                        operands[2], operands[3],
3994                                                        operands[4], p));
3995   DONE;
3996 })
3997
3998 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3999   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4000    (match_operand:<VWIDE> 1 "register_operand" "w")
4001    (match_operand:VQ_HSI 2 "register_operand" "w")
4002    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4003    (match_operand:SI 4 "immediate_operand" "i")]
4004   "TARGET_SIMD"
4005 {
4006   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4007   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4008                                                        operands[2], operands[3],
4009                                                        operands[4], p));
4010   DONE;
4011 })
4012
4013 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4014   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4015         (SBINQOPS:<VWIDE>
4016           (match_operand:<VWIDE> 1 "register_operand" "0")
4017           (ss_ashift:<VWIDE>
4018             (mult:<VWIDE>
4019               (sign_extend:<VWIDE>
4020                 (vec_select:<VHALF>
4021                   (match_operand:VQ_HSI 2 "register_operand" "w")
4022                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4023               (sign_extend:<VWIDE>
4024                 (vec_duplicate:<VHALF>
4025                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4026             (const_int 1))))]
4027   "TARGET_SIMD"
4028   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4029   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4030 )
4031
4032 (define_expand "aarch64_sqdmlal2_n<mode>"
4033   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4034    (match_operand:<VWIDE> 1 "register_operand" "w")
4035    (match_operand:VQ_HSI 2 "register_operand" "w")
4036    (match_operand:<VEL> 3 "register_operand" "w")]
4037   "TARGET_SIMD"
4038 {
4039   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4040   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4041                                                     operands[2], operands[3],
4042                                                     p));
4043   DONE;
4044 })
4045
4046 (define_expand "aarch64_sqdmlsl2_n<mode>"
4047   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4048    (match_operand:<VWIDE> 1 "register_operand" "w")
4049    (match_operand:VQ_HSI 2 "register_operand" "w")
4050    (match_operand:<VEL> 3 "register_operand" "w")]
4051   "TARGET_SIMD"
4052 {
4053   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4054   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4055                                                     operands[2], operands[3],
4056                                                     p));
4057   DONE;
4058 })
4059
4060 ;; vqdmull
4061
4062 (define_insn "aarch64_sqdmull<mode>"
4063   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4064         (ss_ashift:<VWIDE>
4065              (mult:<VWIDE>
4066                (sign_extend:<VWIDE>
4067                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4068                (sign_extend:<VWIDE>
4069                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4070              (const_int 1)))]
4071   "TARGET_SIMD"
4072   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4073   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4074 )
4075
4076 ;; vqdmull_lane
4077
4078 (define_insn "aarch64_sqdmull_lane<mode>"
4079   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4080         (ss_ashift:<VWIDE>
4081              (mult:<VWIDE>
4082                (sign_extend:<VWIDE>
4083                  (match_operand:VD_HSI 1 "register_operand" "w"))
4084                (sign_extend:<VWIDE>
4085                  (vec_duplicate:VD_HSI
4086                    (vec_select:<VEL>
4087                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4088                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4089                ))
4090              (const_int 1)))]
4091   "TARGET_SIMD"
4092   {
4093     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4094     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4095   }
4096   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4097 )
4098
4099 (define_insn "aarch64_sqdmull_laneq<mode>"
4100   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4101         (ss_ashift:<VWIDE>
4102              (mult:<VWIDE>
4103                (sign_extend:<VWIDE>
4104                  (match_operand:VD_HSI 1 "register_operand" "w"))
4105                (sign_extend:<VWIDE>
4106                  (vec_duplicate:VD_HSI
4107                    (vec_select:<VEL>
4108                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4109                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4110                ))
4111              (const_int 1)))]
4112   "TARGET_SIMD"
4113   {
4114     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4115     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4116   }
4117   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4118 )
4119
4120 (define_insn "aarch64_sqdmull_lane<mode>"
4121   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4122         (ss_ashift:<VWIDE>
4123              (mult:<VWIDE>
4124                (sign_extend:<VWIDE>
4125                  (match_operand:SD_HSI 1 "register_operand" "w"))
4126                (sign_extend:<VWIDE>
4127                  (vec_select:<VEL>
4128                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4129                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4130                ))
4131              (const_int 1)))]
4132   "TARGET_SIMD"
4133   {
4134     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4135     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4136   }
4137   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4138 )
4139
4140 (define_insn "aarch64_sqdmull_laneq<mode>"
4141   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4142         (ss_ashift:<VWIDE>
4143              (mult:<VWIDE>
4144                (sign_extend:<VWIDE>
4145                  (match_operand:SD_HSI 1 "register_operand" "w"))
4146                (sign_extend:<VWIDE>
4147                  (vec_select:<VEL>
4148                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4149                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4150                ))
4151              (const_int 1)))]
4152   "TARGET_SIMD"
4153   {
4154     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4155     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4156   }
4157   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4158 )
4159
4160 ;; vqdmull_n
4161
4162 (define_insn "aarch64_sqdmull_n<mode>"
4163   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4164         (ss_ashift:<VWIDE>
4165              (mult:<VWIDE>
4166                (sign_extend:<VWIDE>
4167                  (match_operand:VD_HSI 1 "register_operand" "w"))
4168                (sign_extend:<VWIDE>
4169                  (vec_duplicate:VD_HSI
4170                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4171                )
4172              (const_int 1)))]
4173   "TARGET_SIMD"
4174   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4175   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4176 )
4177
4178 ;; vqdmull2
4179
4180
4181
4182 (define_insn "aarch64_sqdmull2<mode>_internal"
4183   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4184         (ss_ashift:<VWIDE>
4185              (mult:<VWIDE>
4186                (sign_extend:<VWIDE>
4187                  (vec_select:<VHALF>
4188                    (match_operand:VQ_HSI 1 "register_operand" "w")
4189                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4190                (sign_extend:<VWIDE>
4191                  (vec_select:<VHALF>
4192                    (match_operand:VQ_HSI 2 "register_operand" "w")
4193                    (match_dup 3)))
4194                )
4195              (const_int 1)))]
4196   "TARGET_SIMD"
4197   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4198   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4199 )
4200
4201 (define_expand "aarch64_sqdmull2<mode>"
4202   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4203    (match_operand:VQ_HSI 1 "register_operand" "w")
4204    (match_operand:VQ_HSI 2 "register_operand" "w")]
4205   "TARGET_SIMD"
4206 {
4207   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4208   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4209                                                   operands[2], p));
4210   DONE;
4211 })
4212
4213 ;; vqdmull2_lane
4214
4215 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4216   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4217         (ss_ashift:<VWIDE>
4218              (mult:<VWIDE>
4219                (sign_extend:<VWIDE>
4220                  (vec_select:<VHALF>
4221                    (match_operand:VQ_HSI 1 "register_operand" "w")
4222                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4223                (sign_extend:<VWIDE>
4224                  (vec_duplicate:<VHALF>
4225                    (vec_select:<VEL>
4226                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4227                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4228                ))
4229              (const_int 1)))]
4230   "TARGET_SIMD"
4231   {
4232     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4233     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4234   }
4235   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4236 )
4237
4238 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4239   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4240         (ss_ashift:<VWIDE>
4241              (mult:<VWIDE>
4242                (sign_extend:<VWIDE>
4243                  (vec_select:<VHALF>
4244                    (match_operand:VQ_HSI 1 "register_operand" "w")
4245                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4246                (sign_extend:<VWIDE>
4247                  (vec_duplicate:<VHALF>
4248                    (vec_select:<VEL>
4249                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4250                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4251                ))
4252              (const_int 1)))]
4253   "TARGET_SIMD"
4254   {
4255     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4256     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4257   }
4258   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4259 )
4260
4261 (define_expand "aarch64_sqdmull2_lane<mode>"
4262   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4263    (match_operand:VQ_HSI 1 "register_operand" "w")
4264    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4265    (match_operand:SI 3 "immediate_operand" "i")]
4266   "TARGET_SIMD"
4267 {
4268   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4269   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4270                                                        operands[2], operands[3],
4271                                                        p));
4272   DONE;
4273 })
4274
4275 (define_expand "aarch64_sqdmull2_laneq<mode>"
4276   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4277    (match_operand:VQ_HSI 1 "register_operand" "w")
4278    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4279    (match_operand:SI 3 "immediate_operand" "i")]
4280   "TARGET_SIMD"
4281 {
4282   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4283   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4284                                                        operands[2], operands[3],
4285                                                        p));
4286   DONE;
4287 })
4288
4289 ;; vqdmull2_n
4290
4291 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4292   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4293         (ss_ashift:<VWIDE>
4294              (mult:<VWIDE>
4295                (sign_extend:<VWIDE>
4296                  (vec_select:<VHALF>
4297                    (match_operand:VQ_HSI 1 "register_operand" "w")
4298                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4299                (sign_extend:<VWIDE>
4300                  (vec_duplicate:<VHALF>
4301                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4302                )
4303              (const_int 1)))]
4304   "TARGET_SIMD"
4305   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4306   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4307 )
4308
4309 (define_expand "aarch64_sqdmull2_n<mode>"
4310   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4311    (match_operand:VQ_HSI 1 "register_operand" "w")
4312    (match_operand:<VEL> 2 "register_operand" "w")]
4313   "TARGET_SIMD"
4314 {
4315   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4316   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4317                                                     operands[2], p));
4318   DONE;
4319 })
4320
4321 ;; vshl
4322
4323 (define_insn "aarch64_<sur>shl<mode>"
4324   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4325         (unspec:VSDQ_I_DI
4326           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4327            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4328          VSHL))]
4329   "TARGET_SIMD"
4330   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4331   [(set_attr "type" "neon_shift_reg<q>")]
4332 )
4333
4334
4335 ;; vqshl
4336
4337 (define_insn "aarch64_<sur>q<r>shl<mode>"
4338   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4339         (unspec:VSDQ_I
4340           [(match_operand:VSDQ_I 1 "register_operand" "w")
4341            (match_operand:VSDQ_I 2 "register_operand" "w")]
4342          VQSHL))]
4343   "TARGET_SIMD"
4344   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4345   [(set_attr "type" "neon_sat_shift_reg<q>")]
4346 )
4347
4348 ;; vshll_n
4349
4350 (define_insn "aarch64_<sur>shll_n<mode>"
4351   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4352         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4353                          (match_operand:SI 2
4354                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4355                          VSHLL))]
4356   "TARGET_SIMD"
4357   {
4358     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4359       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4360     else
4361       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4362   }
4363   [(set_attr "type" "neon_shift_imm_long")]
4364 )
4365
4366 ;; vshll_high_n
4367
4368 (define_insn "aarch64_<sur>shll2_n<mode>"
4369   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4370         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4371                          (match_operand:SI 2 "immediate_operand" "i")]
4372                          VSHLL))]
4373   "TARGET_SIMD"
4374   {
4375     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4376       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4377     else
4378       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4379   }
4380   [(set_attr "type" "neon_shift_imm_long")]
4381 )
4382
4383 ;; vrshr_n
4384
4385 (define_insn "aarch64_<sur>shr_n<mode>"
4386   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4387         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4388                            (match_operand:SI 2
4389                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4390                           VRSHR_N))]
4391   "TARGET_SIMD"
4392   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4393   [(set_attr "type" "neon_sat_shift_imm<q>")]
4394 )
4395
4396 ;; v(r)sra_n
4397
4398 (define_insn "aarch64_<sur>sra_n<mode>"
4399   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4400         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4401                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4402                        (match_operand:SI 3
4403                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4404                       VSRA))]
4405   "TARGET_SIMD"
4406   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4407   [(set_attr "type" "neon_shift_acc<q>")]
4408 )
4409
4410 ;; vs<lr>i_n
4411
4412 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4413   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4414         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4415                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4416                        (match_operand:SI 3
4417                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4418                       VSLRI))]
4419   "TARGET_SIMD"
4420   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4421   [(set_attr "type" "neon_shift_imm<q>")]
4422 )
4423
4424 ;; vqshl(u)
4425
4426 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4427   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4428         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4429                        (match_operand:SI 2
4430                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4431                       VQSHL_N))]
4432   "TARGET_SIMD"
4433   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4434   [(set_attr "type" "neon_sat_shift_imm<q>")]
4435 )
4436
4437
4438 ;; vq(r)shr(u)n_n
4439
4440 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4441   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4442         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4443                             (match_operand:SI 2
4444                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4445                            VQSHRN_N))]
4446   "TARGET_SIMD"
4447   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4448   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4449 )
4450
4451
4452 ;; cm(eq|ge|gt|lt|le)
4453 ;; Note, we have constraints for Dz and Z as different expanders
4454 ;; have different ideas of what should be passed to this pattern.
4455
4456 (define_insn "aarch64_cm<optab><mode>"
4457   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4458         (neg:<V_INT_EQUIV>
4459           (COMPARISONS:<V_INT_EQUIV>
4460             (match_operand:VDQ_I 1 "register_operand" "w,w")
4461             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4462           )))]
4463   "TARGET_SIMD"
4464   "@
4465   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4466   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4467   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4468 )
4469
4470 (define_insn_and_split "aarch64_cm<optab>di"
4471   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4472         (neg:DI
4473           (COMPARISONS:DI
4474             (match_operand:DI 1 "register_operand" "w,w,r")
4475             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4476           )))
4477      (clobber (reg:CC CC_REGNUM))]
4478   "TARGET_SIMD"
4479   "#"
4480   "&& reload_completed"
4481   [(set (match_operand:DI 0 "register_operand")
4482         (neg:DI
4483           (COMPARISONS:DI
4484             (match_operand:DI 1 "register_operand")
4485             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4486           )))]
4487   {
4488     /* If we are in the general purpose register file,
4489        we split to a sequence of comparison and store.  */
4490     if (GP_REGNUM_P (REGNO (operands[0]))
4491         && GP_REGNUM_P (REGNO (operands[1])))
4492       {
4493         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4494         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4495         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4496         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4497         DONE;
4498       }
4499     /* Otherwise, we expand to a similar pattern which does not
4500        clobber CC_REGNUM.  */
4501   }
4502   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4503 )
4504
4505 (define_insn "*aarch64_cm<optab>di"
4506   [(set (match_operand:DI 0 "register_operand" "=w,w")
4507         (neg:DI
4508           (COMPARISONS:DI
4509             (match_operand:DI 1 "register_operand" "w,w")
4510             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4511           )))]
4512   "TARGET_SIMD && reload_completed"
4513   "@
4514   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4515   cm<optab>\t%d0, %d1, #0"
4516   [(set_attr "type" "neon_compare, neon_compare_zero")]
4517 )
4518
4519 ;; cm(hs|hi)
4520
4521 (define_insn "aarch64_cm<optab><mode>"
4522   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4523         (neg:<V_INT_EQUIV>
4524           (UCOMPARISONS:<V_INT_EQUIV>
4525             (match_operand:VDQ_I 1 "register_operand" "w")
4526             (match_operand:VDQ_I 2 "register_operand" "w")
4527           )))]
4528   "TARGET_SIMD"
4529   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4530   [(set_attr "type" "neon_compare<q>")]
4531 )
4532
4533 (define_insn_and_split "aarch64_cm<optab>di"
4534   [(set (match_operand:DI 0 "register_operand" "=w,r")
4535         (neg:DI
4536           (UCOMPARISONS:DI
4537             (match_operand:DI 1 "register_operand" "w,r")
4538             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4539           )))
4540     (clobber (reg:CC CC_REGNUM))]
4541   "TARGET_SIMD"
4542   "#"
4543   "&& reload_completed"
4544   [(set (match_operand:DI 0 "register_operand")
4545         (neg:DI
4546           (UCOMPARISONS:DI
4547             (match_operand:DI 1 "register_operand")
4548             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4549           )))]
4550   {
4551     /* If we are in the general purpose register file,
4552        we split to a sequence of comparison and store.  */
4553     if (GP_REGNUM_P (REGNO (operands[0]))
4554         && GP_REGNUM_P (REGNO (operands[1])))
4555       {
4556         machine_mode mode = CCmode;
4557         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4558         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4559         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4560         DONE;
4561       }
4562     /* Otherwise, we expand to a similar pattern which does not
4563        clobber CC_REGNUM.  */
4564   }
4565   [(set_attr "type" "neon_compare,multiple")]
4566 )
4567
4568 (define_insn "*aarch64_cm<optab>di"
4569   [(set (match_operand:DI 0 "register_operand" "=w")
4570         (neg:DI
4571           (UCOMPARISONS:DI
4572             (match_operand:DI 1 "register_operand" "w")
4573             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4574           )))]
4575   "TARGET_SIMD && reload_completed"
4576   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4577   [(set_attr "type" "neon_compare")]
4578 )
4579
4580 ;; cmtst
4581
4582 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4583 ;; we don't have any insns using ne, and aarch64_vcond outputs
4584 ;; not (neg (eq (and x y) 0))
4585 ;; which is rewritten by simplify_rtx as
4586 ;; plus (eq (and x y) 0) -1.
4587
4588 (define_insn "aarch64_cmtst<mode>"
4589   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4590         (plus:<V_INT_EQUIV>
4591           (eq:<V_INT_EQUIV>
4592             (and:VDQ_I
4593               (match_operand:VDQ_I 1 "register_operand" "w")
4594               (match_operand:VDQ_I 2 "register_operand" "w"))
4595             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4596           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4597   ]
4598   "TARGET_SIMD"
4599   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4600   [(set_attr "type" "neon_tst<q>")]
4601 )
4602
4603 (define_insn_and_split "aarch64_cmtstdi"
4604   [(set (match_operand:DI 0 "register_operand" "=w,r")
4605         (neg:DI
4606           (ne:DI
4607             (and:DI
4608               (match_operand:DI 1 "register_operand" "w,r")
4609               (match_operand:DI 2 "register_operand" "w,r"))
4610             (const_int 0))))
4611     (clobber (reg:CC CC_REGNUM))]
4612   "TARGET_SIMD"
4613   "#"
4614   "&& reload_completed"
4615   [(set (match_operand:DI 0 "register_operand")
4616         (neg:DI
4617           (ne:DI
4618             (and:DI
4619               (match_operand:DI 1 "register_operand")
4620               (match_operand:DI 2 "register_operand"))
4621             (const_int 0))))]
4622   {
4623     /* If we are in the general purpose register file,
4624        we split to a sequence of comparison and store.  */
4625     if (GP_REGNUM_P (REGNO (operands[0]))
4626         && GP_REGNUM_P (REGNO (operands[1])))
4627       {
4628         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4629         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4630         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4631         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4632         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4633         DONE;
4634       }
4635     /* Otherwise, we expand to a similar pattern which does not
4636        clobber CC_REGNUM.  */
4637   }
4638   [(set_attr "type" "neon_tst,multiple")]
4639 )
4640
4641 (define_insn "*aarch64_cmtstdi"
4642   [(set (match_operand:DI 0 "register_operand" "=w")
4643         (neg:DI
4644           (ne:DI
4645             (and:DI
4646               (match_operand:DI 1 "register_operand" "w")
4647               (match_operand:DI 2 "register_operand" "w"))
4648             (const_int 0))))]
4649   "TARGET_SIMD"
4650   "cmtst\t%d0, %d1, %d2"
4651   [(set_attr "type" "neon_tst")]
4652 )
4653
4654 ;; fcm(eq|ge|gt|le|lt)
4655
4656 (define_insn "aarch64_cm<optab><mode>"
4657   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4658         (neg:<V_INT_EQUIV>
4659           (COMPARISONS:<V_INT_EQUIV>
4660             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4661             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4662           )))]
4663   "TARGET_SIMD"
4664   "@
4665   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4666   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4667   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4668 )
4669
4670 ;; fac(ge|gt)
4671 ;; Note we can also handle what would be fac(le|lt) by
4672 ;; generating fac(ge|gt).
4673
4674 (define_insn "aarch64_fac<optab><mode>"
4675   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4676         (neg:<V_INT_EQUIV>
4677           (FAC_COMPARISONS:<V_INT_EQUIV>
4678             (abs:VHSDF_HSDF
4679               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4680             (abs:VHSDF_HSDF
4681               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4682   )))]
4683   "TARGET_SIMD"
4684   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4685   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4686 )
4687
4688 ;; addp
4689
4690 (define_insn "aarch64_addp<mode>"
4691   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4692         (unspec:VD_BHSI
4693           [(match_operand:VD_BHSI 1 "register_operand" "w")
4694            (match_operand:VD_BHSI 2 "register_operand" "w")]
4695           UNSPEC_ADDP))]
4696   "TARGET_SIMD"
4697   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4698   [(set_attr "type" "neon_reduc_add<q>")]
4699 )
4700
4701 (define_insn "aarch64_addpdi"
4702   [(set (match_operand:DI 0 "register_operand" "=w")
4703         (unspec:DI
4704           [(match_operand:V2DI 1 "register_operand" "w")]
4705           UNSPEC_ADDP))]
4706   "TARGET_SIMD"
4707   "addp\t%d0, %1.2d"
4708   [(set_attr "type" "neon_reduc_add")]
4709 )
4710
4711 ;; sqrt
4712
4713 (define_expand "sqrt<mode>2"
4714   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4715         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4716   "TARGET_SIMD"
4717 {
4718   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4719     DONE;
4720 })
4721
4722 (define_insn "*sqrt<mode>2"
4723   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4724         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4725   "TARGET_SIMD"
4726   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4727   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4728 )
4729
4730 ;; Patterns for vector struct loads and stores.
4731
4732 (define_insn "aarch64_simd_ld2<mode>"
4733   [(set (match_operand:OI 0 "register_operand" "=w")
4734         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4735                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4736                    UNSPEC_LD2))]
4737   "TARGET_SIMD"
4738   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4739   [(set_attr "type" "neon_load2_2reg<q>")]
4740 )
4741
4742 (define_insn "aarch64_simd_ld2r<mode>"
4743   [(set (match_operand:OI 0 "register_operand" "=w")
4744        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4745                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4746                   UNSPEC_LD2_DUP))]
4747   "TARGET_SIMD"
4748   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4749   [(set_attr "type" "neon_load2_all_lanes<q>")]
4750 )
4751
4752 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4753   [(set (match_operand:OI 0 "register_operand" "=w")
4754         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4755                     (match_operand:OI 2 "register_operand" "0")
4756                     (match_operand:SI 3 "immediate_operand" "i")
4757                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4758                    UNSPEC_LD2_LANE))]
4759   "TARGET_SIMD"
4760   {
4761     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4762     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4763   }
4764   [(set_attr "type" "neon_load2_one_lane")]
4765 )
4766
4767 (define_expand "vec_load_lanesoi<mode>"
4768   [(set (match_operand:OI 0 "register_operand" "=w")
4769         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4770                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4771                    UNSPEC_LD2))]
4772   "TARGET_SIMD"
4773 {
4774   if (BYTES_BIG_ENDIAN)
4775     {
4776       rtx tmp = gen_reg_rtx (OImode);
4777       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4778       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4779       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4780     }
4781   else
4782     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4783   DONE;
4784 })
4785
4786 (define_insn "aarch64_simd_st2<mode>"
4787   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4788         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4789                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4790                    UNSPEC_ST2))]
4791   "TARGET_SIMD"
4792   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4793   [(set_attr "type" "neon_store2_2reg<q>")]
4794 )
4795
4796 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4797 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4798   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4799         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4800                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4801                     (match_operand:SI 2 "immediate_operand" "i")]
4802                    UNSPEC_ST2_LANE))]
4803   "TARGET_SIMD"
4804   {
4805     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4806     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4807   }
4808   [(set_attr "type" "neon_store2_one_lane<q>")]
4809 )
4810
4811 (define_expand "vec_store_lanesoi<mode>"
4812   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4813         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4814                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4815                    UNSPEC_ST2))]
4816   "TARGET_SIMD"
4817 {
4818   if (BYTES_BIG_ENDIAN)
4819     {
4820       rtx tmp = gen_reg_rtx (OImode);
4821       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4822       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4823       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4824     }
4825   else
4826     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4827   DONE;
4828 })
4829
4830 (define_insn "aarch64_simd_ld3<mode>"
4831   [(set (match_operand:CI 0 "register_operand" "=w")
4832         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4833                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4834                    UNSPEC_LD3))]
4835   "TARGET_SIMD"
4836   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4837   [(set_attr "type" "neon_load3_3reg<q>")]
4838 )
4839
4840 (define_insn "aarch64_simd_ld3r<mode>"
4841   [(set (match_operand:CI 0 "register_operand" "=w")
4842        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4843                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4844                   UNSPEC_LD3_DUP))]
4845   "TARGET_SIMD"
4846   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4847   [(set_attr "type" "neon_load3_all_lanes<q>")]
4848 )
4849
4850 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4851   [(set (match_operand:CI 0 "register_operand" "=w")
4852         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4853                     (match_operand:CI 2 "register_operand" "0")
4854                     (match_operand:SI 3 "immediate_operand" "i")
4855                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4856                    UNSPEC_LD3_LANE))]
4857   "TARGET_SIMD"
4858 {
4859     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4860     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4861 }
4862   [(set_attr "type" "neon_load3_one_lane")]
4863 )
4864
4865 (define_expand "vec_load_lanesci<mode>"
4866   [(set (match_operand:CI 0 "register_operand" "=w")
4867         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4868                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4869                    UNSPEC_LD3))]
4870   "TARGET_SIMD"
4871 {
4872   if (BYTES_BIG_ENDIAN)
4873     {
4874       rtx tmp = gen_reg_rtx (CImode);
4875       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4876       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4877       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4878     }
4879   else
4880     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4881   DONE;
4882 })
4883
4884 (define_insn "aarch64_simd_st3<mode>"
4885   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4886         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4887                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4888                    UNSPEC_ST3))]
4889   "TARGET_SIMD"
4890   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4891   [(set_attr "type" "neon_store3_3reg<q>")]
4892 )
4893
4894 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4895 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4896   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4897         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4898                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4899                      (match_operand:SI 2 "immediate_operand" "i")]
4900                     UNSPEC_ST3_LANE))]
4901   "TARGET_SIMD"
4902   {
4903     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4904     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4905   }
4906   [(set_attr "type" "neon_store3_one_lane<q>")]
4907 )
4908
4909 (define_expand "vec_store_lanesci<mode>"
4910   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4911         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4912                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4913                    UNSPEC_ST3))]
4914   "TARGET_SIMD"
4915 {
4916   if (BYTES_BIG_ENDIAN)
4917     {
4918       rtx tmp = gen_reg_rtx (CImode);
4919       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4920       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4921       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4922     }
4923   else
4924     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4925   DONE;
4926 })
4927
4928 (define_insn "aarch64_simd_ld4<mode>"
4929   [(set (match_operand:XI 0 "register_operand" "=w")
4930         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4931                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4932                    UNSPEC_LD4))]
4933   "TARGET_SIMD"
4934   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4935   [(set_attr "type" "neon_load4_4reg<q>")]
4936 )
4937
4938 (define_insn "aarch64_simd_ld4r<mode>"
4939   [(set (match_operand:XI 0 "register_operand" "=w")
4940        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4941                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4942                   UNSPEC_LD4_DUP))]
4943   "TARGET_SIMD"
4944   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4945   [(set_attr "type" "neon_load4_all_lanes<q>")]
4946 )
4947
4948 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4949   [(set (match_operand:XI 0 "register_operand" "=w")
4950         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4951                     (match_operand:XI 2 "register_operand" "0")
4952                     (match_operand:SI 3 "immediate_operand" "i")
4953                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4954                    UNSPEC_LD4_LANE))]
4955   "TARGET_SIMD"
4956 {
4957     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4958     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4959 }
4960   [(set_attr "type" "neon_load4_one_lane")]
4961 )
4962
4963 (define_expand "vec_load_lanesxi<mode>"
4964   [(set (match_operand:XI 0 "register_operand" "=w")
4965         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4966                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4967                    UNSPEC_LD4))]
4968   "TARGET_SIMD"
4969 {
4970   if (BYTES_BIG_ENDIAN)
4971     {
4972       rtx tmp = gen_reg_rtx (XImode);
4973       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4974       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4975       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4976     }
4977   else
4978     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4979   DONE;
4980 })
4981
4982 (define_insn "aarch64_simd_st4<mode>"
4983   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4984         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4985                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4986                    UNSPEC_ST4))]
4987   "TARGET_SIMD"
4988   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4989   [(set_attr "type" "neon_store4_4reg<q>")]
4990 )
4991
4992 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4993 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4994   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4995         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4996                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4997                      (match_operand:SI 2 "immediate_operand" "i")]
4998                     UNSPEC_ST4_LANE))]
4999   "TARGET_SIMD"
5000   {
5001     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5002     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5003   }
5004   [(set_attr "type" "neon_store4_one_lane<q>")]
5005 )
5006
5007 (define_expand "vec_store_lanesxi<mode>"
5008   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5009         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5010                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5011                    UNSPEC_ST4))]
5012   "TARGET_SIMD"
5013 {
5014   if (BYTES_BIG_ENDIAN)
5015     {
5016       rtx tmp = gen_reg_rtx (XImode);
5017       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5018       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5019       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5020     }
5021   else
5022     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5023   DONE;
5024 })
5025
5026 (define_insn_and_split "aarch64_rev_reglist<mode>"
5027 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5028         (unspec:VSTRUCT
5029                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5030                     (match_operand:V16QI 2 "register_operand" "w")]
5031                    UNSPEC_REV_REGLIST))]
5032   "TARGET_SIMD"
5033   "#"
5034   "&& reload_completed"
5035   [(const_int 0)]
5036 {
5037   int i;
5038   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5039   for (i = 0; i < nregs; i++)
5040     {
5041       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5042       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5043       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5044     }
5045   DONE;
5046 }
5047   [(set_attr "type" "neon_tbl1_q")
5048    (set_attr "length" "<insn_count>")]
5049 )
5050
5051 ;; Reload patterns for AdvSIMD register list operands.
5052
5053 (define_expand "mov<mode>"
5054   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5055         (match_operand:VSTRUCT 1 "general_operand" ""))]
5056   "TARGET_SIMD"
5057 {
5058   if (can_create_pseudo_p ())
5059     {
5060       if (GET_CODE (operands[0]) != REG)
5061         operands[1] = force_reg (<MODE>mode, operands[1]);
5062     }
5063
5064   /* If we have a paradoxical subreg trying to write to <MODE> from and the
5065      registers don't overlap then we need to break it apart.  What it's trying
5066      to do is give two kind of information at the same time.  It's trying to
5067      convey liveness information by saying that the entire register will be
5068      written to eventually, but it also only wants to write a single part of the
5069      register.  Hence the paradoxical subreg.
5070
5071      Instead of allowing this we will split the two concerns.  The liveness
5072      information will be conveyed using a clobber and then we break apart the
5073      paradoxical subreg into just a normal write of the part that it wanted to
5074      write originally.  */
5075
5076   if (REG_P (operands[0]) && paradoxical_subreg_p (operands[1]))
5077     {
5078       if (!reg_overlap_mentioned_p (operands[0], operands[1]))
5079         emit_clobber (operands[0]);
5080       operands[1] = SUBREG_REG (operands[1]);
5081       operands[0] = gen_lowpart (GET_MODE (operands[1]), operands[0]);
5082     }
5083 })
5084
5085
5086 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5087   [(match_operand:CI 0 "register_operand" "=w")
5088    (match_operand:DI 1 "register_operand" "r")
5089    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5090   "TARGET_SIMD"
5091 {
5092   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5093   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5094   DONE;
5095 })
5096
5097 (define_insn "aarch64_ld1_x3_<mode>"
5098   [(set (match_operand:CI 0 "register_operand" "=w")
5099         (unspec:CI
5100           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5101            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5102   "TARGET_SIMD"
5103   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5104   [(set_attr "type" "neon_load1_3reg<q>")]
5105 )
5106
5107 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5108   [(match_operand:XI 0 "register_operand" "=w")
5109    (match_operand:DI 1 "register_operand" "r")
5110    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5111   "TARGET_SIMD"
5112 {
5113   rtx mem = gen_rtx_MEM (XImode, operands[1]);
5114   emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5115   DONE;
5116 })
5117
5118 (define_insn "aarch64_ld1_x4_<mode>"
5119   [(set (match_operand:XI 0 "register_operand" "=w")
5120         (unspec:XI
5121           [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5122            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5123         UNSPEC_LD1))]
5124   "TARGET_SIMD"
5125   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5126   [(set_attr "type" "neon_load1_4reg<q>")]
5127 )
5128
5129 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5130   [(match_operand:DI 0 "register_operand" "")
5131    (match_operand:OI 1 "register_operand" "")
5132    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5133   "TARGET_SIMD"
5134 {
5135   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5136   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5137   DONE;
5138 })
5139
5140 (define_insn "aarch64_st1_x2_<mode>"
5141    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5142          (unspec:OI
5143           [(match_operand:OI 1 "register_operand" "w")
5144           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5145   "TARGET_SIMD"
5146   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5147   [(set_attr "type" "neon_store1_2reg<q>")]
5148 )
5149
5150 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5151   [(match_operand:DI 0 "register_operand" "")
5152    (match_operand:CI 1 "register_operand" "")
5153    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5154   "TARGET_SIMD"
5155 {
5156   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5157   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5158   DONE;
5159 })
5160
5161 (define_insn "aarch64_st1_x3_<mode>"
5162    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5163         (unspec:CI
5164          [(match_operand:CI 1 "register_operand" "w")
5165           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5166   "TARGET_SIMD"
5167   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5168   [(set_attr "type" "neon_store1_3reg<q>")]
5169 )
5170
5171 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5172   [(match_operand:DI 0 "register_operand" "")
5173    (match_operand:XI 1 "register_operand" "")
5174    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5175   "TARGET_SIMD"
5176 {
5177   rtx mem = gen_rtx_MEM (XImode, operands[0]);
5178   emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5179   DONE;
5180 })
5181
5182 (define_insn "aarch64_st1_x4_<mode>"
5183   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5184         (unspec:XI
5185            [(match_operand:XI 1 "register_operand" "w")
5186            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5187         UNSPEC_ST1))]
5188   "TARGET_SIMD"
5189   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5190   [(set_attr "type" "neon_store1_4reg<q>")]
5191 )
5192
5193 (define_insn "*aarch64_mov<mode>"
5194   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5195         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5196   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5197    && (register_operand (operands[0], <MODE>mode)
5198        || register_operand (operands[1], <MODE>mode))"
5199   "@
5200    #
5201    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5202    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5203   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5204                      neon_load<nregs>_<nregs>reg_q")
5205    (set_attr "length" "<insn_count>,4,4")]
5206 )
5207
5208 (define_insn "aarch64_be_ld1<mode>"
5209   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5210         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5211                              "aarch64_simd_struct_operand" "Utv")]
5212         UNSPEC_LD1))]
5213   "TARGET_SIMD"
5214   "ld1\\t{%0<Vmtype>}, %1"
5215   [(set_attr "type" "neon_load1_1reg<q>")]
5216 )
5217
5218 (define_insn "aarch64_be_st1<mode>"
5219   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5220         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5221         UNSPEC_ST1))]
5222   "TARGET_SIMD"
5223   "st1\\t{%1<Vmtype>}, %0"
5224   [(set_attr "type" "neon_store1_1reg<q>")]
5225 )
5226
5227 (define_insn "*aarch64_be_movoi"
5228   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5229         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5230   "TARGET_SIMD && BYTES_BIG_ENDIAN
5231    && (register_operand (operands[0], OImode)
5232        || register_operand (operands[1], OImode))"
5233   "@
5234    #
5235    stp\\t%q1, %R1, %0
5236    ldp\\t%q0, %R0, %1"
5237   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5238    (set_attr "length" "8,4,4")]
5239 )
5240
5241 (define_insn "*aarch64_be_movci"
5242   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5243         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5244   "TARGET_SIMD && BYTES_BIG_ENDIAN
5245    && (register_operand (operands[0], CImode)
5246        || register_operand (operands[1], CImode))"
5247   "#"
5248   [(set_attr "type" "multiple")
5249    (set_attr "length" "12,4,4")]
5250 )
5251
5252 (define_insn "*aarch64_be_movxi"
5253   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5254         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5255   "TARGET_SIMD && BYTES_BIG_ENDIAN
5256    && (register_operand (operands[0], XImode)
5257        || register_operand (operands[1], XImode))"
5258   "#"
5259   [(set_attr "type" "multiple")
5260    (set_attr "length" "16,4,4")]
5261 )
5262
5263 (define_split
5264   [(set (match_operand:OI 0 "register_operand")
5265         (match_operand:OI 1 "register_operand"))]
5266   "TARGET_SIMD && reload_completed"
5267   [(const_int 0)]
5268 {
5269   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5270   DONE;
5271 })
5272
5273 (define_split
5274   [(set (match_operand:CI 0 "nonimmediate_operand")
5275         (match_operand:CI 1 "general_operand"))]
5276   "TARGET_SIMD && reload_completed"
5277   [(const_int 0)]
5278 {
5279   if (register_operand (operands[0], CImode)
5280       && register_operand (operands[1], CImode))
5281     {
5282       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5283       DONE;
5284     }
5285   else if (BYTES_BIG_ENDIAN)
5286     {
5287       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5288                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5289       emit_move_insn (gen_lowpart (V16QImode,
5290                                    simplify_gen_subreg (TImode, operands[0],
5291                                                         CImode, 32)),
5292                       gen_lowpart (V16QImode,
5293                                    simplify_gen_subreg (TImode, operands[1],
5294                                                         CImode, 32)));
5295       DONE;
5296     }
5297   else
5298     FAIL;
5299 })
5300
5301 (define_split
5302   [(set (match_operand:XI 0 "nonimmediate_operand")
5303         (match_operand:XI 1 "general_operand"))]
5304   "TARGET_SIMD && reload_completed"
5305   [(const_int 0)]
5306 {
5307   if (register_operand (operands[0], XImode)
5308       && register_operand (operands[1], XImode))
5309     {
5310       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5311       DONE;
5312     }
5313   else if (BYTES_BIG_ENDIAN)
5314     {
5315       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5316                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5317       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5318                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5319       DONE;
5320     }
5321   else
5322     FAIL;
5323 })
5324
5325 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5326   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5327    (match_operand:DI 1 "register_operand" "w")
5328    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5329   "TARGET_SIMD"
5330 {
5331   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5332   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5333                      * <VSTRUCT:nregs>);
5334
5335   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5336                                                                 mem));
5337   DONE;
5338 })
5339
5340 (define_insn "aarch64_ld2<mode>_dreg"
5341   [(set (match_operand:OI 0 "register_operand" "=w")
5342         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5343                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5344                    UNSPEC_LD2_DREG))]
5345   "TARGET_SIMD"
5346   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5347   [(set_attr "type" "neon_load2_2reg<q>")]
5348 )
5349
5350 (define_insn "aarch64_ld2<mode>_dreg"
5351   [(set (match_operand:OI 0 "register_operand" "=w")
5352         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5353                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5354                    UNSPEC_LD2_DREG))]
5355   "TARGET_SIMD"
5356   "ld1\\t{%S0.1d - %T0.1d}, %1"
5357   [(set_attr "type" "neon_load1_2reg<q>")]
5358 )
5359
5360 (define_insn "aarch64_ld3<mode>_dreg"
5361   [(set (match_operand:CI 0 "register_operand" "=w")
5362         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5363                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5364                    UNSPEC_LD3_DREG))]
5365   "TARGET_SIMD"
5366   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5367   [(set_attr "type" "neon_load3_3reg<q>")]
5368 )
5369
5370 (define_insn "aarch64_ld3<mode>_dreg"
5371   [(set (match_operand:CI 0 "register_operand" "=w")
5372         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5373                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5374                    UNSPEC_LD3_DREG))]
5375   "TARGET_SIMD"
5376   "ld1\\t{%S0.1d - %U0.1d}, %1"
5377   [(set_attr "type" "neon_load1_3reg<q>")]
5378 )
5379
5380 (define_insn "aarch64_ld4<mode>_dreg"
5381   [(set (match_operand:XI 0 "register_operand" "=w")
5382         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5383                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5384                    UNSPEC_LD4_DREG))]
5385   "TARGET_SIMD"
5386   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5387   [(set_attr "type" "neon_load4_4reg<q>")]
5388 )
5389
5390 (define_insn "aarch64_ld4<mode>_dreg"
5391   [(set (match_operand:XI 0 "register_operand" "=w")
5392         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5393                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5394                    UNSPEC_LD4_DREG))]
5395   "TARGET_SIMD"
5396   "ld1\\t{%S0.1d - %V0.1d}, %1"
5397   [(set_attr "type" "neon_load1_4reg<q>")]
5398 )
5399
5400 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5401  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5402   (match_operand:DI 1 "register_operand" "r")
5403   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5404   "TARGET_SIMD"
5405 {
5406   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5407   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5408
5409   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5410   DONE;
5411 })
5412
5413 (define_expand "aarch64_ld1<VALL_F16:mode>"
5414  [(match_operand:VALL_F16 0 "register_operand")
5415   (match_operand:DI 1 "register_operand")]
5416   "TARGET_SIMD"
5417 {
5418   machine_mode mode = <VALL_F16:MODE>mode;
5419   rtx mem = gen_rtx_MEM (mode, operands[1]);
5420
5421   if (BYTES_BIG_ENDIAN)
5422     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5423   else
5424     emit_move_insn (operands[0], mem);
5425   DONE;
5426 })
5427
5428 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5429  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5430   (match_operand:DI 1 "register_operand" "r")
5431   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5432   "TARGET_SIMD"
5433 {
5434   machine_mode mode = <VSTRUCT:MODE>mode;
5435   rtx mem = gen_rtx_MEM (mode, operands[1]);
5436
5437   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5438   DONE;
5439 })
5440
5441 (define_expand "aarch64_ld1x2<VQ:mode>"
5442  [(match_operand:OI 0 "register_operand" "=w")
5443   (match_operand:DI 1 "register_operand" "r")
5444   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5445   "TARGET_SIMD"
5446 {
5447   machine_mode mode = OImode;
5448   rtx mem = gen_rtx_MEM (mode, operands[1]);
5449
5450   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5451   DONE;
5452 })
5453
5454 (define_expand "aarch64_ld1x2<VDC:mode>"
5455  [(match_operand:OI 0 "register_operand" "=w")
5456   (match_operand:DI 1 "register_operand" "r")
5457   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5458   "TARGET_SIMD"
5459 {
5460   machine_mode mode = OImode;
5461   rtx mem = gen_rtx_MEM (mode, operands[1]);
5462
5463   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5464   DONE;
5465 })
5466
5467
5468 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5469   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5470         (match_operand:DI 1 "register_operand" "w")
5471         (match_operand:VSTRUCT 2 "register_operand" "0")
5472         (match_operand:SI 3 "immediate_operand" "i")
5473         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5474   "TARGET_SIMD"
5475 {
5476   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5477   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5478                      * <VSTRUCT:nregs>);
5479
5480   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5481   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5482         operands[0], mem, operands[2], operands[3]));
5483   DONE;
5484 })
5485
5486 ;; Expanders for builtins to extract vector registers from large
5487 ;; opaque integer modes.
5488
5489 ;; D-register list.
5490
5491 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5492  [(match_operand:VDC 0 "register_operand" "=w")
5493   (match_operand:VSTRUCT 1 "register_operand" "w")
5494   (match_operand:SI 2 "immediate_operand" "i")]
5495   "TARGET_SIMD"
5496 {
5497   int part = INTVAL (operands[2]);
5498   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5499   int offset = part * 16;
5500
5501   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5502   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5503   DONE;
5504 })
5505
5506 ;; Q-register list.
5507
5508 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5509  [(match_operand:VQ 0 "register_operand" "=w")
5510   (match_operand:VSTRUCT 1 "register_operand" "w")
5511   (match_operand:SI 2 "immediate_operand" "i")]
5512   "TARGET_SIMD"
5513 {
5514   int part = INTVAL (operands[2]);
5515   int offset = part * 16;
5516
5517   emit_move_insn (operands[0],
5518                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5519   DONE;
5520 })
5521
5522 ;; Permuted-store expanders for neon intrinsics.
5523
5524 ;; Permute instructions
5525
5526 ;; vec_perm support
5527
5528 (define_expand "vec_perm<mode>"
5529   [(match_operand:VB 0 "register_operand")
5530    (match_operand:VB 1 "register_operand")
5531    (match_operand:VB 2 "register_operand")
5532    (match_operand:VB 3 "register_operand")]
5533   "TARGET_SIMD"
5534 {
5535   aarch64_expand_vec_perm (operands[0], operands[1],
5536                            operands[2], operands[3], <nunits>);
5537   DONE;
5538 })
5539
5540 (define_insn "aarch64_tbl1<mode>"
5541   [(set (match_operand:VB 0 "register_operand" "=w")
5542         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5543                     (match_operand:VB 2 "register_operand" "w")]
5544                    UNSPEC_TBL))]
5545   "TARGET_SIMD"
5546   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5547   [(set_attr "type" "neon_tbl1<q>")]
5548 )
5549
5550 ;; Two source registers.
5551
5552 (define_insn "aarch64_tbl2v16qi"
5553   [(set (match_operand:V16QI 0 "register_operand" "=w")
5554         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5555                        (match_operand:V16QI 2 "register_operand" "w")]
5556                       UNSPEC_TBL))]
5557   "TARGET_SIMD"
5558   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5559   [(set_attr "type" "neon_tbl2_q")]
5560 )
5561
5562 (define_insn "aarch64_tbl3<mode>"
5563   [(set (match_operand:VB 0 "register_operand" "=w")
5564         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5565                       (match_operand:VB 2 "register_operand" "w")]
5566                       UNSPEC_TBL))]
5567   "TARGET_SIMD"
5568   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5569   [(set_attr "type" "neon_tbl3")]
5570 )
5571
5572 (define_insn "aarch64_tbx4<mode>"
5573   [(set (match_operand:VB 0 "register_operand" "=w")
5574         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5575                       (match_operand:OI 2 "register_operand" "w")
5576                       (match_operand:VB 3 "register_operand" "w")]
5577                       UNSPEC_TBX))]
5578   "TARGET_SIMD"
5579   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5580   [(set_attr "type" "neon_tbl4")]
5581 )
5582
5583 ;; Three source registers.
5584
5585 (define_insn "aarch64_qtbl3<mode>"
5586   [(set (match_operand:VB 0 "register_operand" "=w")
5587         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5588                       (match_operand:VB 2 "register_operand" "w")]
5589                       UNSPEC_TBL))]
5590   "TARGET_SIMD"
5591   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5592   [(set_attr "type" "neon_tbl3")]
5593 )
5594
5595 (define_insn "aarch64_qtbx3<mode>"
5596   [(set (match_operand:VB 0 "register_operand" "=w")
5597         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5598                       (match_operand:CI 2 "register_operand" "w")
5599                       (match_operand:VB 3 "register_operand" "w")]
5600                       UNSPEC_TBX))]
5601   "TARGET_SIMD"
5602   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5603   [(set_attr "type" "neon_tbl3")]
5604 )
5605
5606 ;; Four source registers.
5607
5608 (define_insn "aarch64_qtbl4<mode>"
5609   [(set (match_operand:VB 0 "register_operand" "=w")
5610         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5611                       (match_operand:VB 2 "register_operand" "w")]
5612                       UNSPEC_TBL))]
5613   "TARGET_SIMD"
5614   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5615   [(set_attr "type" "neon_tbl4")]
5616 )
5617
5618 (define_insn "aarch64_qtbx4<mode>"
5619   [(set (match_operand:VB 0 "register_operand" "=w")
5620         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5621                       (match_operand:XI 2 "register_operand" "w")
5622                       (match_operand:VB 3 "register_operand" "w")]
5623                       UNSPEC_TBX))]
5624   "TARGET_SIMD"
5625   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5626   [(set_attr "type" "neon_tbl4")]
5627 )
5628
5629 (define_insn_and_split "aarch64_combinev16qi"
5630   [(set (match_operand:OI 0 "register_operand" "=w")
5631         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5632                     (match_operand:V16QI 2 "register_operand" "w")]
5633                    UNSPEC_CONCAT))]
5634   "TARGET_SIMD"
5635   "#"
5636   "&& reload_completed"
5637   [(const_int 0)]
5638 {
5639   aarch64_split_combinev16qi (operands);
5640   DONE;
5641 }
5642 [(set_attr "type" "multiple")]
5643 )
5644
5645 ;; This instruction's pattern is generated directly by
5646 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5647 ;; need corresponding changes there.
5648 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5649   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5650         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5651                           (match_operand:VALL_F16 2 "register_operand" "w")]
5652          PERMUTE))]
5653   "TARGET_SIMD"
5654   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5655   [(set_attr "type" "neon_permute<q>")]
5656 )
5657
5658 ;; This instruction's pattern is generated directly by
5659 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5660 ;; need corresponding changes there.  Note that the immediate (third)
5661 ;; operand is a lane index not a byte index.
5662 (define_insn "aarch64_ext<mode>"
5663   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5664         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5665                           (match_operand:VALL_F16 2 "register_operand" "w")
5666                           (match_operand:SI 3 "immediate_operand" "i")]
5667          UNSPEC_EXT))]
5668   "TARGET_SIMD"
5669 {
5670   operands[3] = GEN_INT (INTVAL (operands[3])
5671       * GET_MODE_UNIT_SIZE (<MODE>mode));
5672   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5673 }
5674   [(set_attr "type" "neon_ext<q>")]
5675 )
5676
5677 ;; This instruction's pattern is generated directly by
5678 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5679 ;; need corresponding changes there.
5680 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5681   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5682         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5683                     REVERSE))]
5684   "TARGET_SIMD"
5685   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5686   [(set_attr "type" "neon_rev<q>")]
5687 )
5688
5689 (define_insn "aarch64_st2<mode>_dreg"
5690   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5691         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5692                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5693                    UNSPEC_ST2))]
5694   "TARGET_SIMD"
5695   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5696   [(set_attr "type" "neon_store2_2reg")]
5697 )
5698
5699 (define_insn "aarch64_st2<mode>_dreg"
5700   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5701         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5702                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5703                    UNSPEC_ST2))]
5704   "TARGET_SIMD"
5705   "st1\\t{%S1.1d - %T1.1d}, %0"
5706   [(set_attr "type" "neon_store1_2reg")]
5707 )
5708
5709 (define_insn "aarch64_st3<mode>_dreg"
5710   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5711         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5712                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5713                    UNSPEC_ST3))]
5714   "TARGET_SIMD"
5715   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5716   [(set_attr "type" "neon_store3_3reg")]
5717 )
5718
5719 (define_insn "aarch64_st3<mode>_dreg"
5720   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5721         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5722                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5723                    UNSPEC_ST3))]
5724   "TARGET_SIMD"
5725   "st1\\t{%S1.1d - %U1.1d}, %0"
5726   [(set_attr "type" "neon_store1_3reg")]
5727 )
5728
5729 (define_insn "aarch64_st4<mode>_dreg"
5730   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5731         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5732                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5733                    UNSPEC_ST4))]
5734   "TARGET_SIMD"
5735   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5736   [(set_attr "type" "neon_store4_4reg")]
5737 )
5738
5739 (define_insn "aarch64_st4<mode>_dreg"
5740   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5741         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5742                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5743                    UNSPEC_ST4))]
5744   "TARGET_SIMD"
5745   "st1\\t{%S1.1d - %V1.1d}, %0"
5746   [(set_attr "type" "neon_store1_4reg")]
5747 )
5748
5749 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5750  [(match_operand:DI 0 "register_operand" "r")
5751   (match_operand:VSTRUCT 1 "register_operand" "w")
5752   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5753   "TARGET_SIMD"
5754 {
5755   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5756   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5757
5758   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5759   DONE;
5760 })
5761
5762 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5763  [(match_operand:DI 0 "register_operand" "r")
5764   (match_operand:VSTRUCT 1 "register_operand" "w")
5765   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5766   "TARGET_SIMD"
5767 {
5768   machine_mode mode = <VSTRUCT:MODE>mode;
5769   rtx mem = gen_rtx_MEM (mode, operands[0]);
5770
5771   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5772   DONE;
5773 })
5774
5775 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5776  [(match_operand:DI 0 "register_operand" "r")
5777   (match_operand:VSTRUCT 1 "register_operand" "w")
5778   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5779   (match_operand:SI 2 "immediate_operand")]
5780   "TARGET_SIMD"
5781 {
5782   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5783   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5784                      * <VSTRUCT:nregs>);
5785
5786   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5787                 mem, operands[1], operands[2]));
5788   DONE;
5789 })
5790
5791 (define_expand "aarch64_st1<VALL_F16:mode>"
5792  [(match_operand:DI 0 "register_operand")
5793   (match_operand:VALL_F16 1 "register_operand")]
5794   "TARGET_SIMD"
5795 {
5796   machine_mode mode = <VALL_F16:MODE>mode;
5797   rtx mem = gen_rtx_MEM (mode, operands[0]);
5798
5799   if (BYTES_BIG_ENDIAN)
5800     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5801   else
5802     emit_move_insn (mem, operands[1]);
5803   DONE;
5804 })
5805
5806 ;; Expander for builtins to insert vector registers into large
5807 ;; opaque integer modes.
5808
5809 ;; Q-register list.  We don't need a D-reg inserter as we zero
5810 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5811
5812 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5813  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5814   (match_operand:VSTRUCT 1 "register_operand" "0")
5815   (match_operand:VQ 2 "register_operand" "w")
5816   (match_operand:SI 3 "immediate_operand" "i")]
5817   "TARGET_SIMD"
5818 {
5819   int part = INTVAL (operands[3]);
5820   int offset = part * 16;
5821
5822   emit_move_insn (operands[0], operands[1]);
5823   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5824                   operands[2]);
5825   DONE;
5826 })
5827
5828 ;; Standard pattern name vec_init<mode><Vel>.
5829
5830 (define_expand "vec_init<mode><Vel>"
5831   [(match_operand:VALL_F16 0 "register_operand" "")
5832    (match_operand 1 "" "")]
5833   "TARGET_SIMD"
5834 {
5835   aarch64_expand_vector_init (operands[0], operands[1]);
5836   DONE;
5837 })
5838
5839 (define_insn "*aarch64_simd_ld1r<mode>"
5840   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5841         (vec_duplicate:VALL_F16
5842           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5843   "TARGET_SIMD"
5844   "ld1r\\t{%0.<Vtype>}, %1"
5845   [(set_attr "type" "neon_load1_all_lanes")]
5846 )
5847
5848 (define_insn "aarch64_simd_ld1<mode>_x2"
5849   [(set (match_operand:OI 0 "register_operand" "=w")
5850         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5851                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5852                    UNSPEC_LD1))]
5853   "TARGET_SIMD"
5854   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5855   [(set_attr "type" "neon_load1_2reg<q>")]
5856 )
5857
5858 (define_insn "aarch64_simd_ld1<mode>_x2"
5859   [(set (match_operand:OI 0 "register_operand" "=w")
5860         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5861                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5862                    UNSPEC_LD1))]
5863   "TARGET_SIMD"
5864   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5865   [(set_attr "type" "neon_load1_2reg<q>")]
5866 )
5867
5868
5869 (define_insn "aarch64_frecpe<mode>"
5870   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5871         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5872          UNSPEC_FRECPE))]
5873   "TARGET_SIMD"
5874   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5875   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5876 )
5877
5878 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5879   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5880         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5881          FRECP))]
5882   "TARGET_SIMD"
5883   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5884   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5885 )
5886
5887 (define_insn "aarch64_frecps<mode>"
5888   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5889         (unspec:VHSDF_HSDF
5890           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5891           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5892           UNSPEC_FRECPS))]
5893   "TARGET_SIMD"
5894   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5895   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5896 )
5897
5898 (define_insn "aarch64_urecpe<mode>"
5899   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5900         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5901                 UNSPEC_URECPE))]
5902  "TARGET_SIMD"
5903  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5904   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5905
5906 ;; Standard pattern name vec_extract<mode><Vel>.
5907
5908 (define_expand "vec_extract<mode><Vel>"
5909   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5910    (match_operand:VALL_F16 1 "register_operand" "")
5911    (match_operand:SI 2 "immediate_operand" "")]
5912   "TARGET_SIMD"
5913 {
5914     emit_insn
5915       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5916     DONE;
5917 })
5918
5919 ;; aes
5920
5921 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5922   [(set (match_operand:V16QI 0 "register_operand" "=w")
5923         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5924                        (match_operand:V16QI 2 "register_operand" "w")]
5925          CRYPTO_AES))]
5926   "TARGET_SIMD && TARGET_AES"
5927   "aes<aes_op>\\t%0.16b, %2.16b"
5928   [(set_attr "type" "crypto_aese")]
5929 )
5930
5931 ;; When AES/AESMC fusion is enabled we want the register allocation to
5932 ;; look like:
5933 ;;    AESE Vn, _
5934 ;;    AESMC Vn, Vn
5935 ;; So prefer to tie operand 1 to operand 0 when fusing.
5936
5937 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5938   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5939         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5940          CRYPTO_AESMC))]
5941   "TARGET_SIMD && TARGET_AES"
5942   "aes<aesmc_op>\\t%0.16b, %1.16b"
5943   [(set_attr "type" "crypto_aesmc")
5944    (set_attr_alternative "enabled"
5945      [(if_then_else (match_test
5946                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5947                      (const_string "yes" )
5948                      (const_string "no"))
5949       (const_string "yes")])]
5950 )
5951
5952 ;; sha1
5953
5954 (define_insn "aarch64_crypto_sha1hsi"
5955   [(set (match_operand:SI 0 "register_operand" "=w")
5956         (unspec:SI [(match_operand:SI 1
5957                        "register_operand" "w")]
5958          UNSPEC_SHA1H))]
5959   "TARGET_SIMD && TARGET_SHA2"
5960   "sha1h\\t%s0, %s1"
5961   [(set_attr "type" "crypto_sha1_fast")]
5962 )
5963
5964 (define_insn "aarch64_crypto_sha1hv4si"
5965   [(set (match_operand:SI 0 "register_operand" "=w")
5966         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5967                      (parallel [(const_int 0)]))]
5968          UNSPEC_SHA1H))]
5969   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5970   "sha1h\\t%s0, %s1"
5971   [(set_attr "type" "crypto_sha1_fast")]
5972 )
5973
5974 (define_insn "aarch64_be_crypto_sha1hv4si"
5975   [(set (match_operand:SI 0 "register_operand" "=w")
5976         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5977                      (parallel [(const_int 3)]))]
5978          UNSPEC_SHA1H))]
5979   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5980   "sha1h\\t%s0, %s1"
5981   [(set_attr "type" "crypto_sha1_fast")]
5982 )
5983
5984 (define_insn "aarch64_crypto_sha1su1v4si"
5985   [(set (match_operand:V4SI 0 "register_operand" "=w")
5986         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5987                       (match_operand:V4SI 2 "register_operand" "w")]
5988          UNSPEC_SHA1SU1))]
5989   "TARGET_SIMD && TARGET_SHA2"
5990   "sha1su1\\t%0.4s, %2.4s"
5991   [(set_attr "type" "crypto_sha1_fast")]
5992 )
5993
5994 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5995   [(set (match_operand:V4SI 0 "register_operand" "=w")
5996         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5997                       (match_operand:SI 2 "register_operand" "w")
5998                       (match_operand:V4SI 3 "register_operand" "w")]
5999          CRYPTO_SHA1))]
6000   "TARGET_SIMD && TARGET_SHA2"
6001   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6002   [(set_attr "type" "crypto_sha1_slow")]
6003 )
6004
6005 (define_insn "aarch64_crypto_sha1su0v4si"
6006   [(set (match_operand:V4SI 0 "register_operand" "=w")
6007         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6008                       (match_operand:V4SI 2 "register_operand" "w")
6009                       (match_operand:V4SI 3 "register_operand" "w")]
6010          UNSPEC_SHA1SU0))]
6011   "TARGET_SIMD && TARGET_SHA2"
6012   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6013   [(set_attr "type" "crypto_sha1_xor")]
6014 )
6015
6016 ;; sha256
6017
6018 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6019   [(set (match_operand:V4SI 0 "register_operand" "=w")
6020         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6021                       (match_operand:V4SI 2 "register_operand" "w")
6022                       (match_operand:V4SI 3 "register_operand" "w")]
6023          CRYPTO_SHA256))]
6024   "TARGET_SIMD && TARGET_SHA2"
6025   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6026   [(set_attr "type" "crypto_sha256_slow")]
6027 )
6028
6029 (define_insn "aarch64_crypto_sha256su0v4si"
6030   [(set (match_operand:V4SI 0 "register_operand" "=w")
6031         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6032                       (match_operand:V4SI 2 "register_operand" "w")]
6033          UNSPEC_SHA256SU0))]
6034   "TARGET_SIMD && TARGET_SHA2"
6035   "sha256su0\\t%0.4s, %2.4s"
6036   [(set_attr "type" "crypto_sha256_fast")]
6037 )
6038
6039 (define_insn "aarch64_crypto_sha256su1v4si"
6040   [(set (match_operand:V4SI 0 "register_operand" "=w")
6041         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6042                       (match_operand:V4SI 2 "register_operand" "w")
6043                       (match_operand:V4SI 3 "register_operand" "w")]
6044          UNSPEC_SHA256SU1))]
6045   "TARGET_SIMD && TARGET_SHA2"
6046   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6047   [(set_attr "type" "crypto_sha256_slow")]
6048 )
6049
6050 ;; sha512
6051
6052 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6053   [(set (match_operand:V2DI 0 "register_operand" "=w")
6054         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6055                       (match_operand:V2DI 2 "register_operand" "w")
6056                       (match_operand:V2DI 3 "register_operand" "w")]
6057          CRYPTO_SHA512))]
6058   "TARGET_SIMD && TARGET_SHA3"
6059   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6060   [(set_attr "type" "crypto_sha512")]
6061 )
6062
6063 (define_insn "aarch64_crypto_sha512su0qv2di"
6064   [(set (match_operand:V2DI 0 "register_operand" "=w")
6065         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6066                       (match_operand:V2DI 2 "register_operand" "w")]
6067          UNSPEC_SHA512SU0))]
6068   "TARGET_SIMD && TARGET_SHA3"
6069   "sha512su0\\t%0.2d, %2.2d"
6070   [(set_attr "type" "crypto_sha512")]
6071 )
6072
6073 (define_insn "aarch64_crypto_sha512su1qv2di"
6074   [(set (match_operand:V2DI 0 "register_operand" "=w")
6075         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6076                       (match_operand:V2DI 2 "register_operand" "w")
6077                       (match_operand:V2DI 3 "register_operand" "w")]
6078          UNSPEC_SHA512SU1))]
6079   "TARGET_SIMD && TARGET_SHA3"
6080   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6081   [(set_attr "type" "crypto_sha512")]
6082 )
6083
6084 ;; sha3
6085
6086 (define_insn "aarch64_eor3qv8hi"
6087   [(set (match_operand:V8HI 0 "register_operand" "=w")
6088         (xor:V8HI
6089          (xor:V8HI
6090           (match_operand:V8HI 2 "register_operand" "%w")
6091           (match_operand:V8HI 3 "register_operand" "w"))
6092          (match_operand:V8HI 1 "register_operand" "w")))]
6093   "TARGET_SIMD && TARGET_SHA3"
6094   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6095   [(set_attr "type" "crypto_sha3")]
6096 )
6097
6098 (define_insn "aarch64_rax1qv2di"
6099   [(set (match_operand:V2DI 0 "register_operand" "=w")
6100         (xor:V2DI
6101          (rotate:V2DI
6102           (match_operand:V2DI 2 "register_operand" "w")
6103           (const_int 1))
6104          (match_operand:V2DI 1 "register_operand" "w")))]
6105   "TARGET_SIMD && TARGET_SHA3"
6106   "rax1\\t%0.2d, %1.2d, %2.2d"
6107   [(set_attr "type" "crypto_sha3")]
6108 )
6109
6110 (define_insn "aarch64_xarqv2di"
6111   [(set (match_operand:V2DI 0 "register_operand" "=w")
6112         (rotatert:V2DI
6113          (xor:V2DI
6114           (match_operand:V2DI 1 "register_operand" "%w")
6115           (match_operand:V2DI 2 "register_operand" "w"))
6116          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6117   "TARGET_SIMD && TARGET_SHA3"
6118   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6119   [(set_attr "type" "crypto_sha3")]
6120 )
6121
6122 (define_insn "aarch64_bcaxqv8hi"
6123   [(set (match_operand:V8HI 0 "register_operand" "=w")
6124         (xor:V8HI
6125          (and:V8HI
6126           (not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
6127           (match_operand:V8HI 2 "register_operand" "w"))
6128          (match_operand:V8HI 1 "register_operand" "w")))]
6129   "TARGET_SIMD && TARGET_SHA3"
6130   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6131   [(set_attr "type" "crypto_sha3")]
6132 )
6133
6134 ;; SM3
6135
6136 (define_insn "aarch64_sm3ss1qv4si"
6137   [(set (match_operand:V4SI 0 "register_operand" "=w")
6138         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6139                       (match_operand:V4SI 2 "register_operand" "w")
6140                       (match_operand:V4SI 3 "register_operand" "w")]
6141          UNSPEC_SM3SS1))]
6142   "TARGET_SIMD && TARGET_SM4"
6143   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6144   [(set_attr "type" "crypto_sm3")]
6145 )
6146
6147
6148 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6149   [(set (match_operand:V4SI 0 "register_operand" "=w")
6150         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6151                       (match_operand:V4SI 2 "register_operand" "w")
6152                       (match_operand:V4SI 3 "register_operand" "w")
6153                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6154          CRYPTO_SM3TT))]
6155   "TARGET_SIMD && TARGET_SM4"
6156   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6157   [(set_attr "type" "crypto_sm3")]
6158 )
6159
6160 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6161   [(set (match_operand:V4SI 0 "register_operand" "=w")
6162         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6163                       (match_operand:V4SI 2 "register_operand" "w")
6164                       (match_operand:V4SI 3 "register_operand" "w")]
6165          CRYPTO_SM3PART))]
6166   "TARGET_SIMD && TARGET_SM4"
6167   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6168   [(set_attr "type" "crypto_sm3")]
6169 )
6170
6171 ;; SM4
6172
6173 (define_insn "aarch64_sm4eqv4si"
6174   [(set (match_operand:V4SI 0 "register_operand" "=w")
6175         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6176                       (match_operand:V4SI 2 "register_operand" "w")]
6177          UNSPEC_SM4E))]
6178   "TARGET_SIMD && TARGET_SM4"
6179   "sm4e\\t%0.4s, %2.4s"
6180   [(set_attr "type" "crypto_sm4")]
6181 )
6182
6183 (define_insn "aarch64_sm4ekeyqv4si"
6184   [(set (match_operand:V4SI 0 "register_operand" "=w")
6185         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6186                       (match_operand:V4SI 2 "register_operand" "w")]
6187          UNSPEC_SM4EKEY))]
6188   "TARGET_SIMD && TARGET_SM4"
6189   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6190   [(set_attr "type" "crypto_sm4")]
6191 )
6192
6193 ;; fp16fml
6194
6195 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6196   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6197         (unspec:VDQSF
6198          [(match_operand:VDQSF 1 "register_operand" "0")
6199           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6200           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6201          VFMLA16_LOW))]
6202   "TARGET_F16FML"
6203 {
6204   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6205                                             <nunits> * 2, false);
6206   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6207                                             <nunits> * 2, false);
6208
6209   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6210                                                                 operands[1],
6211                                                                 operands[2],
6212                                                                 operands[3],
6213                                                                 p1, p2));
6214   DONE;
6215
6216 })
6217
6218 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6219   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6220         (unspec:VDQSF
6221          [(match_operand:VDQSF 1 "register_operand" "0")
6222           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6223           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6224          VFMLA16_HIGH))]
6225   "TARGET_F16FML"
6226 {
6227   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6228   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6229
6230   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6231                                                                  operands[1],
6232                                                                  operands[2],
6233                                                                  operands[3],
6234                                                                  p1, p2));
6235   DONE;
6236 })
6237
6238 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6239   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6240         (fma:VDQSF
6241          (float_extend:VDQSF
6242           (vec_select:<VFMLA_SEL_W>
6243            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6244            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6245          (float_extend:VDQSF
6246           (vec_select:<VFMLA_SEL_W>
6247            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6248            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6249          (match_operand:VDQSF 1 "register_operand" "0")))]
6250   "TARGET_F16FML"
6251   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6252   [(set_attr "type" "neon_fp_mul_s")]
6253 )
6254
6255 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6256   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6257         (fma:VDQSF
6258          (float_extend:VDQSF
6259           (neg:<VFMLA_SEL_W>
6260            (vec_select:<VFMLA_SEL_W>
6261             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6262             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6263          (float_extend:VDQSF
6264           (vec_select:<VFMLA_SEL_W>
6265            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6266            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6267          (match_operand:VDQSF 1 "register_operand" "0")))]
6268   "TARGET_F16FML"
6269   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6270   [(set_attr "type" "neon_fp_mul_s")]
6271 )
6272
6273 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6274   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6275         (fma:VDQSF
6276          (float_extend:VDQSF
6277           (vec_select:<VFMLA_SEL_W>
6278            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6279            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6280          (float_extend:VDQSF
6281           (vec_select:<VFMLA_SEL_W>
6282            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6283            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6284          (match_operand:VDQSF 1 "register_operand" "0")))]
6285   "TARGET_F16FML"
6286   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6287   [(set_attr "type" "neon_fp_mul_s")]
6288 )
6289
6290 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6291   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6292         (fma:VDQSF
6293          (float_extend:VDQSF
6294           (neg:<VFMLA_SEL_W>
6295            (vec_select:<VFMLA_SEL_W>
6296             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6297             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6298          (float_extend:VDQSF
6299           (vec_select:<VFMLA_SEL_W>
6300            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6301            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6302          (match_operand:VDQSF 1 "register_operand" "0")))]
6303   "TARGET_F16FML"
6304   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6305   [(set_attr "type" "neon_fp_mul_s")]
6306 )
6307
6308 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6309   [(set (match_operand:V2SF 0 "register_operand" "")
6310         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6311                            (match_operand:V4HF 2 "register_operand" "")
6312                            (match_operand:V4HF 3 "register_operand" "")
6313                            (match_operand:SI 4 "aarch64_imm2" "")]
6314          VFMLA16_LOW))]
6315   "TARGET_F16FML"
6316 {
6317     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6318     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6319
6320     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6321                                                             operands[1],
6322                                                             operands[2],
6323                                                             operands[3],
6324                                                             p1, lane));
6325     DONE;
6326 }
6327 )
6328
6329 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6330   [(set (match_operand:V2SF 0 "register_operand" "")
6331         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6332                            (match_operand:V4HF 2 "register_operand" "")
6333                            (match_operand:V4HF 3 "register_operand" "")
6334                            (match_operand:SI 4 "aarch64_imm2" "")]
6335          VFMLA16_HIGH))]
6336   "TARGET_F16FML"
6337 {
6338     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6339     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6340
6341     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6342                                                              operands[1],
6343                                                              operands[2],
6344                                                              operands[3],
6345                                                              p1, lane));
6346     DONE;
6347 })
6348
6349 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6350   [(set (match_operand:V2SF 0 "register_operand" "=w")
6351         (fma:V2SF
6352          (float_extend:V2SF
6353            (vec_select:V2HF
6354             (match_operand:V4HF 2 "register_operand" "w")
6355             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6356          (float_extend:V2SF
6357            (vec_duplicate:V2HF
6358             (vec_select:HF
6359              (match_operand:V4HF 3 "register_operand" "x")
6360              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6361          (match_operand:V2SF 1 "register_operand" "0")))]
6362   "TARGET_F16FML"
6363   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6364   [(set_attr "type" "neon_fp_mul_s")]
6365 )
6366
6367 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6368   [(set (match_operand:V2SF 0 "register_operand" "=w")
6369         (fma:V2SF
6370          (float_extend:V2SF
6371           (neg:V2HF
6372            (vec_select:V2HF
6373             (match_operand:V4HF 2 "register_operand" "w")
6374             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6375          (float_extend:V2SF
6376           (vec_duplicate:V2HF
6377            (vec_select:HF
6378             (match_operand:V4HF 3 "register_operand" "x")
6379             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6380          (match_operand:V2SF 1 "register_operand" "0")))]
6381   "TARGET_F16FML"
6382   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6383   [(set_attr "type" "neon_fp_mul_s")]
6384 )
6385
6386 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6387   [(set (match_operand:V2SF 0 "register_operand" "=w")
6388         (fma:V2SF
6389          (float_extend:V2SF
6390            (vec_select:V2HF
6391             (match_operand:V4HF 2 "register_operand" "w")
6392             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6393          (float_extend:V2SF
6394            (vec_duplicate:V2HF
6395             (vec_select:HF
6396              (match_operand:V4HF 3 "register_operand" "x")
6397              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6398          (match_operand:V2SF 1 "register_operand" "0")))]
6399   "TARGET_F16FML"
6400   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6401   [(set_attr "type" "neon_fp_mul_s")]
6402 )
6403
6404 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6405   [(set (match_operand:V2SF 0 "register_operand" "=w")
6406         (fma:V2SF
6407          (float_extend:V2SF
6408            (neg:V2HF
6409             (vec_select:V2HF
6410              (match_operand:V4HF 2 "register_operand" "w")
6411              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6412          (float_extend:V2SF
6413            (vec_duplicate:V2HF
6414             (vec_select:HF
6415              (match_operand:V4HF 3 "register_operand" "x")
6416              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6417          (match_operand:V2SF 1 "register_operand" "0")))]
6418   "TARGET_F16FML"
6419   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6420   [(set_attr "type" "neon_fp_mul_s")]
6421 )
6422
6423 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6424   [(set (match_operand:V4SF 0 "register_operand" "")
6425         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6426                            (match_operand:V8HF 2 "register_operand" "")
6427                            (match_operand:V8HF 3 "register_operand" "")
6428                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6429          VFMLA16_LOW))]
6430   "TARGET_F16FML"
6431 {
6432     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6433     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6434
6435     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6436                                                               operands[1],
6437                                                               operands[2],
6438                                                               operands[3],
6439                                                               p1, lane));
6440     DONE;
6441 })
6442
6443 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6444   [(set (match_operand:V4SF 0 "register_operand" "")
6445         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6446                            (match_operand:V8HF 2 "register_operand" "")
6447                            (match_operand:V8HF 3 "register_operand" "")
6448                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6449          VFMLA16_HIGH))]
6450   "TARGET_F16FML"
6451 {
6452     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6453     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6454
6455     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6456                                                                operands[1],
6457                                                                operands[2],
6458                                                                operands[3],
6459                                                                p1, lane));
6460     DONE;
6461 })
6462
6463 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6464   [(set (match_operand:V4SF 0 "register_operand" "=w")
6465         (fma:V4SF
6466          (float_extend:V4SF
6467           (vec_select:V4HF
6468             (match_operand:V8HF 2 "register_operand" "w")
6469             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6470          (float_extend:V4SF
6471           (vec_duplicate:V4HF
6472            (vec_select:HF
6473             (match_operand:V8HF 3 "register_operand" "x")
6474             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6475          (match_operand:V4SF 1 "register_operand" "0")))]
6476   "TARGET_F16FML"
6477   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6478   [(set_attr "type" "neon_fp_mul_s")]
6479 )
6480
6481 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6482   [(set (match_operand:V4SF 0 "register_operand" "=w")
6483         (fma:V4SF
6484           (float_extend:V4SF
6485            (neg:V4HF
6486             (vec_select:V4HF
6487              (match_operand:V8HF 2 "register_operand" "w")
6488              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6489          (float_extend:V4SF
6490           (vec_duplicate:V4HF
6491            (vec_select:HF
6492             (match_operand:V8HF 3 "register_operand" "x")
6493             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6494          (match_operand:V4SF 1 "register_operand" "0")))]
6495   "TARGET_F16FML"
6496   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6497   [(set_attr "type" "neon_fp_mul_s")]
6498 )
6499
6500 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6501   [(set (match_operand:V4SF 0 "register_operand" "=w")
6502         (fma:V4SF
6503          (float_extend:V4SF
6504           (vec_select:V4HF
6505             (match_operand:V8HF 2 "register_operand" "w")
6506             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6507          (float_extend:V4SF
6508           (vec_duplicate:V4HF
6509            (vec_select:HF
6510             (match_operand:V8HF 3 "register_operand" "x")
6511             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6512          (match_operand:V4SF 1 "register_operand" "0")))]
6513   "TARGET_F16FML"
6514   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6515   [(set_attr "type" "neon_fp_mul_s")]
6516 )
6517
6518 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6519   [(set (match_operand:V4SF 0 "register_operand" "=w")
6520         (fma:V4SF
6521          (float_extend:V4SF
6522           (neg:V4HF
6523            (vec_select:V4HF
6524             (match_operand:V8HF 2 "register_operand" "w")
6525             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6526          (float_extend:V4SF
6527           (vec_duplicate:V4HF
6528            (vec_select:HF
6529             (match_operand:V8HF 3 "register_operand" "x")
6530             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6531          (match_operand:V4SF 1 "register_operand" "0")))]
6532   "TARGET_F16FML"
6533   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6534   [(set_attr "type" "neon_fp_mul_s")]
6535 )
6536
6537 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6538   [(set (match_operand:V2SF 0 "register_operand" "")
6539         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6540                       (match_operand:V4HF 2 "register_operand" "")
6541                       (match_operand:V8HF 3 "register_operand" "")
6542                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6543          VFMLA16_LOW))]
6544   "TARGET_F16FML"
6545 {
6546     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6547     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6548
6549     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6550                                                              operands[1],
6551                                                              operands[2],
6552                                                              operands[3],
6553                                                              p1, lane));
6554     DONE;
6555
6556 })
6557
6558 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6559   [(set (match_operand:V2SF 0 "register_operand" "")
6560         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6561                       (match_operand:V4HF 2 "register_operand" "")
6562                       (match_operand:V8HF 3 "register_operand" "")
6563                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6564          VFMLA16_HIGH))]
6565   "TARGET_F16FML"
6566 {
6567     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6568     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6569
6570     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6571                                                               operands[1],
6572                                                               operands[2],
6573                                                               operands[3],
6574                                                               p1, lane));
6575     DONE;
6576
6577 })
6578
6579 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6580   [(set (match_operand:V2SF 0 "register_operand" "=w")
6581         (fma:V2SF
6582          (float_extend:V2SF
6583            (vec_select:V2HF
6584             (match_operand:V4HF 2 "register_operand" "w")
6585             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6586          (float_extend:V2SF
6587           (vec_duplicate:V2HF
6588            (vec_select:HF
6589             (match_operand:V8HF 3 "register_operand" "x")
6590             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6591          (match_operand:V2SF 1 "register_operand" "0")))]
6592   "TARGET_F16FML"
6593   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6594   [(set_attr "type" "neon_fp_mul_s")]
6595 )
6596
6597 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6598   [(set (match_operand:V2SF 0 "register_operand" "=w")
6599         (fma:V2SF
6600          (float_extend:V2SF
6601           (neg:V2HF
6602            (vec_select:V2HF
6603             (match_operand:V4HF 2 "register_operand" "w")
6604             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6605          (float_extend:V2SF
6606           (vec_duplicate:V2HF
6607            (vec_select:HF
6608             (match_operand:V8HF 3 "register_operand" "x")
6609             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6610          (match_operand:V2SF 1 "register_operand" "0")))]
6611   "TARGET_F16FML"
6612   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6613   [(set_attr "type" "neon_fp_mul_s")]
6614 )
6615
6616 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6617   [(set (match_operand:V2SF 0 "register_operand" "=w")
6618         (fma:V2SF
6619          (float_extend:V2SF
6620            (vec_select:V2HF
6621             (match_operand:V4HF 2 "register_operand" "w")
6622             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6623          (float_extend:V2SF
6624           (vec_duplicate:V2HF
6625            (vec_select:HF
6626             (match_operand:V8HF 3 "register_operand" "x")
6627             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6628          (match_operand:V2SF 1 "register_operand" "0")))]
6629   "TARGET_F16FML"
6630   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6631   [(set_attr "type" "neon_fp_mul_s")]
6632 )
6633
6634 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6635   [(set (match_operand:V2SF 0 "register_operand" "=w")
6636         (fma:V2SF
6637          (float_extend:V2SF
6638           (neg:V2HF
6639            (vec_select:V2HF
6640             (match_operand:V4HF 2 "register_operand" "w")
6641             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6642          (float_extend:V2SF
6643           (vec_duplicate:V2HF
6644            (vec_select:HF
6645             (match_operand:V8HF 3 "register_operand" "x")
6646             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6647          (match_operand:V2SF 1 "register_operand" "0")))]
6648   "TARGET_F16FML"
6649   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6650   [(set_attr "type" "neon_fp_mul_s")]
6651 )
6652
6653 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6654   [(set (match_operand:V4SF 0 "register_operand" "")
6655         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6656                       (match_operand:V8HF 2 "register_operand" "")
6657                       (match_operand:V4HF 3 "register_operand" "")
6658                       (match_operand:SI 4 "aarch64_imm2" "")]
6659          VFMLA16_LOW))]
6660   "TARGET_F16FML"
6661 {
6662     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6663     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6664
6665     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6666                                                              operands[1],
6667                                                              operands[2],
6668                                                              operands[3],
6669                                                              p1, lane));
6670     DONE;
6671 })
6672
6673 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6674   [(set (match_operand:V4SF 0 "register_operand" "")
6675         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6676                       (match_operand:V8HF 2 "register_operand" "")
6677                       (match_operand:V4HF 3 "register_operand" "")
6678                       (match_operand:SI 4 "aarch64_imm2" "")]
6679          VFMLA16_HIGH))]
6680   "TARGET_F16FML"
6681 {
6682     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6683     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6684
6685     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6686                                                               operands[1],
6687                                                               operands[2],
6688                                                               operands[3],
6689                                                               p1, lane));
6690     DONE;
6691 })
6692
6693 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6694   [(set (match_operand:V4SF 0 "register_operand" "=w")
6695         (fma:V4SF
6696          (float_extend:V4SF
6697           (vec_select:V4HF
6698            (match_operand:V8HF 2 "register_operand" "w")
6699            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6700          (float_extend:V4SF
6701           (vec_duplicate:V4HF
6702            (vec_select:HF
6703             (match_operand:V4HF 3 "register_operand" "x")
6704             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6705          (match_operand:V4SF 1 "register_operand" "0")))]
6706   "TARGET_F16FML"
6707   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6708   [(set_attr "type" "neon_fp_mul_s")]
6709 )
6710
6711 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6712   [(set (match_operand:V4SF 0 "register_operand" "=w")
6713         (fma:V4SF
6714          (float_extend:V4SF
6715           (neg:V4HF
6716            (vec_select:V4HF
6717             (match_operand:V8HF 2 "register_operand" "w")
6718             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6719          (float_extend:V4SF
6720           (vec_duplicate:V4HF
6721            (vec_select:HF
6722             (match_operand:V4HF 3 "register_operand" "x")
6723             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6724          (match_operand:V4SF 1 "register_operand" "0")))]
6725   "TARGET_F16FML"
6726   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6727   [(set_attr "type" "neon_fp_mul_s")]
6728 )
6729
6730 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6731   [(set (match_operand:V4SF 0 "register_operand" "=w")
6732         (fma:V4SF
6733          (float_extend:V4SF
6734           (vec_select:V4HF
6735            (match_operand:V8HF 2 "register_operand" "w")
6736            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6737          (float_extend:V4SF
6738           (vec_duplicate:V4HF
6739            (vec_select:HF
6740             (match_operand:V4HF 3 "register_operand" "x")
6741             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6742          (match_operand:V4SF 1 "register_operand" "0")))]
6743   "TARGET_F16FML"
6744   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6745   [(set_attr "type" "neon_fp_mul_s")]
6746 )
6747
6748 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6749   [(set (match_operand:V4SF 0 "register_operand" "=w")
6750         (fma:V4SF
6751          (float_extend:V4SF
6752           (neg:V4HF
6753            (vec_select:V4HF
6754             (match_operand:V8HF 2 "register_operand" "w")
6755             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6756          (float_extend:V4SF
6757           (vec_duplicate:V4HF
6758            (vec_select:HF
6759             (match_operand:V4HF 3 "register_operand" "x")
6760             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6761          (match_operand:V4SF 1 "register_operand" "0")))]
6762   "TARGET_F16FML"
6763   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6764   [(set_attr "type" "neon_fp_mul_s")]
6765 )
6766
6767 ;; pmull
6768
6769 (define_insn "aarch64_crypto_pmulldi"
6770   [(set (match_operand:TI 0 "register_operand" "=w")
6771         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6772                      (match_operand:DI 2 "register_operand" "w")]
6773                     UNSPEC_PMULL))]
6774  "TARGET_SIMD && TARGET_AES"
6775  "pmull\\t%0.1q, %1.1d, %2.1d"
6776   [(set_attr "type" "crypto_pmull")]
6777 )
6778
6779 (define_insn "aarch64_crypto_pmullv2di"
6780  [(set (match_operand:TI 0 "register_operand" "=w")
6781        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6782                    (match_operand:V2DI 2 "register_operand" "w")]
6783                   UNSPEC_PMULL2))]
6784   "TARGET_SIMD && TARGET_AES"
6785   "pmull2\\t%0.1q, %1.2d, %2.2d"
6786   [(set_attr "type" "crypto_pmull")]
6787 )