gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2023 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The following define_subst rules are used to produce patterns representing
  22 ;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
  23 ;; a vec_concat with zeroes.  The order of the vec_concat operands differs
  24 ;; for big-endian so we have a separate define_subst rule for each endianness.
  25 (define_subst "add_vec_concat_subst_le"
  26   [(set (match_operand:VDZ 0)
  27         (match_operand:VDZ 1))]
  28   "!BYTES_BIG_ENDIAN"
  29   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
  30         (vec_concat:<VDBL>
  31          (match_dup 1)
  32          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
  33
  34 (define_subst "add_vec_concat_subst_be"
  35   [(set (match_operand:VDZ 0)
  36         (match_operand:VDZ 1))]
  37   "BYTES_BIG_ENDIAN"
  38   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
  39         (vec_concat:<VDBL>
  40          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
  41          (match_dup 1)))])
  42
  43 ;; The subst_attr definitions used to annotate patterns further in the file.
  44 ;; Patterns that need to have the above substitutions added to them should
  45 ;; have <vczle><vczbe> added to their name.
  46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
  47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
  48
  49 (define_expand "mov<mode>"
  50   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  51         (match_operand:VALL_F16 1 "general_operand"))]
  52   "TARGET_FLOAT"
  53   "
  54   /* Force the operand into a register if it is not an
  55      immediate whose use can be replaced with xzr.
  56      If the mode is 16 bytes wide, then we will be doing
  57      a stp in DI mode, so we check the validity of that.
  58      If the mode is 8 bytes wide, then we will do doing a
  59      normal str, so the check need not apply.  */
  60   if (GET_CODE (operands[0]) == MEM
  61       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  62            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  63                 && aarch64_mem_pair_operand (operands[0], DImode))
  64                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  65       operands[1] = force_reg (<MODE>mode, operands[1]);
  66
  67   /* If a constant is too complex to force to memory (e.g. because it
  68      contains CONST_POLY_INTs), build it up from individual elements instead.
  69      We should only need to do this before RA; aarch64_legitimate_constant_p
  70      should ensure that we don't try to rematerialize the constant later.  */
  71   if (GET_CODE (operands[1]) == CONST_VECTOR
  72       && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
  73     {
  74       aarch64_expand_vector_init (operands[0], operands[1]);
  75       DONE;
  76     }
  77   "
  78 )
  79
  80 (define_expand "movmisalign<mode>"
  81   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  82         (match_operand:VALL_F16 1 "general_operand"))]
  83   "TARGET_FLOAT && !STRICT_ALIGNMENT"
  84 {
  85   /* This pattern is not permitted to fail during expansion: if both arguments
  86      are non-registers (e.g. memory := constant, which can be created by the
  87      auto-vectorizer), force operand 1 into a register.  */
  88   if (!register_operand (operands[0], <MODE>mode)
  89       && !register_operand (operands[1], <MODE>mode))
  90     operands[1] = force_reg (<MODE>mode, operands[1]);
  91 })
  92
  93 (define_insn "aarch64_simd_dup<mode>"
  94   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  95         (vec_duplicate:VDQ_I
  96           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  97   "TARGET_SIMD"
  98   "@
  99    dup\\t%0.<Vtype>, %1.<Vetype>[0]
 100    dup\\t%0.<Vtype>, %<vwcore>1"
 101   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
 102 )
 103
 104 (define_insn "aarch64_simd_dup<mode>"
 105   [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
 106         (vec_duplicate:VDQF_F16
 107           (match_operand:<VEL> 1 "register_operand" "w,r")))]
 108   "TARGET_SIMD"
 109   "@
 110    dup\\t%0.<Vtype>, %1.<Vetype>[0]
 111    dup\\t%0.<Vtype>, %<vwcore>1"
 112   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
 113 )
 114
 115 (define_insn "aarch64_dup_lane<mode>"
 116   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 117         (vec_duplicate:VALL_F16
 118           (vec_select:<VEL>
 119             (match_operand:VALL_F16 1 "register_operand" "w")
 120             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
 121           )))]
 122   "TARGET_SIMD"
 123   {
 124     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 125     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 126   }
 127   [(set_attr "type" "neon_dup<q>")]
 128 )
 129
 130 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
 131   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 132         (vec_duplicate:VALL_F16_NO_V2Q
 133           (vec_select:<VEL>
 134             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
 135             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
 136           )))]
 137   "TARGET_SIMD"
 138   {
 139     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 140     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 141   }
 142   [(set_attr "type" "neon_dup<q>")]
 143 )
 144
 145 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
 146   [(set (match_operand:VDMOV 0 "nonimmediate_operand"
 147                 "=w, r, m,  m, m,  w, ?r, ?w, ?r,  w,  w")
 148         (match_operand:VDMOV 1 "general_operand"
 149                 "m,  m, Dz, w, r,  w,  w,  r,  r, Dn, Dz"))]
 150   "TARGET_FLOAT
 151    && (register_operand (operands[0], <MODE>mode)
 152        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 153   "@
 154    ldr\t%d0, %1
 155    ldr\t%x0, %1
 156    str\txzr, %0
 157    str\t%d1, %0
 158    str\t%x1, %0
 159    * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
 160    * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
 161    fmov\t%d0, %1
 162    mov\t%0, %1
 163    * return aarch64_output_simd_mov_immediate (operands[1], 64);
 164    fmov\t%d0, xzr"
 165   [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
 166                      store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
 167                      mov_reg, neon_move<q>, f_mcr")
 168    (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
 169 )
 170
 171 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
 172   [(set (match_operand:VQMOV 0 "nonimmediate_operand"
 173                 "=w, Umn,  m,  w, ?r, ?w, ?r, w,  w")
 174         (match_operand:VQMOV 1 "general_operand"
 175                 "m,  Dz, w,  w,  w,  r,  r, Dn, Dz"))]
 176   "TARGET_FLOAT
 177    && (register_operand (operands[0], <MODE>mode)
 178        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 179   "@
 180    ldr\t%q0, %1
 181    stp\txzr, xzr, %0
 182    str\t%q1, %0
 183    mov\t%0.<Vbtype>, %1.<Vbtype>
 184    #
 185    #
 186    #
 187    * return aarch64_output_simd_mov_immediate (operands[1], 128);
 188    fmov\t%d0, xzr"
 189   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 190                      neon_logic<q>, multiple, multiple,\
 191                      multiple, neon_move<q>, fmov")
 192    (set_attr "length" "4,4,4,4,8,8,8,4,4")
 193    (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
 194 )
 195
 196 ;; When storing lane zero we can use the normal STR and its more permissive
 197 ;; addressing modes.
 198
 199 (define_insn "aarch64_store_lane0<mode>"
 200   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 201         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 202                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 203   "TARGET_SIMD
 204    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 205   "str\\t%<Vetype>1, %0"
 206   [(set_attr "type" "neon_store1_1reg<q>")]
 207 )
 208
 209 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 210   [(set (match_operand:DREG 0 "register_operand" "=w,r")
 211         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
 212    (set (match_operand:DREG2 2 "register_operand" "=w,r")
 213         (match_operand:DREG2 3 "memory_operand" "m,m"))]
 214   "TARGET_FLOAT
 215    && rtx_equal_p (XEXP (operands[3], 0),
 216                    plus_constant (Pmode,
 217                                   XEXP (operands[1], 0),
 218                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 219   "@
 220    ldp\t%d0, %d2, %z1
 221    ldp\t%x0, %x2, %z1"
 222   [(set_attr "type" "neon_ldp,load_16")]
 223 )
 224
 225 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 226   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
 227         (match_operand:DREG 1 "register_operand" "w,r"))
 228    (set (match_operand:DREG2 2 "memory_operand" "=m,m")
 229         (match_operand:DREG2 3 "register_operand" "w,r"))]
 230   "TARGET_FLOAT
 231    && rtx_equal_p (XEXP (operands[2], 0),
 232                    plus_constant (Pmode,
 233                                   XEXP (operands[0], 0),
 234                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 235   "@
 236    stp\t%d1, %d3, %z0
 237    stp\t%x1, %x3, %z0"
 238   [(set_attr "type" "neon_stp,store_16")]
 239 )
 240
 241 (define_insn "aarch64_simd_stp<mode>"
 242   [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand" "=Umn,Umn")
 243         (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand" "w,r")))]
 244   "TARGET_SIMD"
 245   "@
 246    stp\\t%<Vetype>1, %<Vetype>1, %y0
 247    stp\\t%<vw>1, %<vw>1, %y0"
 248   [(set_attr "type" "neon_stp, store_<ldpstp_vel_sz>")]
 249 )
 250
 251 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 252   [(set (match_operand:VQ 0 "register_operand" "=w")
 253         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 254    (set (match_operand:VQ2 2 "register_operand" "=w")
 255         (match_operand:VQ2 3 "memory_operand" "m"))]
 256   "TARGET_FLOAT
 257     && rtx_equal_p (XEXP (operands[3], 0),
 258                     plus_constant (Pmode,
 259                                XEXP (operands[1], 0),
 260                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 261   "ldp\\t%q0, %q2, %z1"
 262   [(set_attr "type" "neon_ldp_q")]
 263 )
 264
 265 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 266   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 267         (match_operand:VQ 1 "register_operand" "w"))
 268    (set (match_operand:VQ2 2 "memory_operand" "=m")
 269         (match_operand:VQ2 3 "register_operand" "w"))]
 270   "TARGET_FLOAT
 271    && rtx_equal_p (XEXP (operands[2], 0),
 272                    plus_constant (Pmode,
 273                                   XEXP (operands[0], 0),
 274                                   GET_MODE_SIZE (<VQ:MODE>mode)))"
 275   "stp\\t%q1, %q3, %z0"
 276   [(set_attr "type" "neon_stp_q")]
 277 )
 278
 279
 280 (define_split
 281   [(set (match_operand:VQMOV 0 "register_operand" "")
 282         (match_operand:VQMOV 1 "register_operand" ""))]
 283   "TARGET_FLOAT
 284    && reload_completed
 285    && GP_REGNUM_P (REGNO (operands[0]))
 286    && GP_REGNUM_P (REGNO (operands[1]))"
 287   [(const_int 0)]
 288 {
 289   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 290   DONE;
 291 })
 292
 293 (define_split
 294   [(set (match_operand:VQMOV 0 "register_operand" "")
 295         (match_operand:VQMOV 1 "register_operand" ""))]
 296   "TARGET_FLOAT
 297    && reload_completed
 298    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 299        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 300   [(const_int 0)]
 301 {
 302   aarch64_split_simd_move (operands[0], operands[1]);
 303   DONE;
 304 })
 305
 306 (define_expand "@aarch64_split_simd_mov<mode>"
 307   [(set (match_operand:VQMOV 0)
 308         (match_operand:VQMOV 1))]
 309   "TARGET_FLOAT"
 310   {
 311     rtx dst = operands[0];
 312     rtx src = operands[1];
 313
 314     if (GP_REGNUM_P (REGNO (src)))
 315       {
 316         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 317         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 318         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 319
 320         emit_move_insn (dst_low_part, src_low_part);
 321         emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
 322                                                src_high_part));
 323       }
 324     else
 325       {
 326         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 327         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 328         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 329         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 330         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
 331         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
 332       }
 333     DONE;
 334   }
 335 )
 336
 337 (define_expand "aarch64_get_half<mode>"
 338   [(set (match_operand:<VHALF> 0 "register_operand")
 339         (vec_select:<VHALF>
 340           (match_operand:VQMOV 1 "register_operand")
 341           (match_operand 2 "ascending_int_parallel")))]
 342   "TARGET_FLOAT"
 343   {
 344     if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
 345       {
 346         emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
 347         DONE;
 348       }
 349   }
 350 )
 351
 352 (define_expand "aarch64_get_low<mode>"
 353   [(match_operand:<VHALF> 0 "register_operand")
 354    (match_operand:VQMOV 1 "register_operand")]
 355   "TARGET_FLOAT"
 356   {
 357     rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 358     emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
 359     DONE;
 360   }
 361 )
 362
 363 (define_expand "aarch64_get_high<mode>"
 364   [(match_operand:<VHALF> 0 "register_operand")
 365    (match_operand:VQMOV 1 "register_operand")]
 366   "TARGET_FLOAT"
 367   {
 368     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 369     emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
 370     DONE;
 371   }
 372 )
 373
 374 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
 375   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
 376         (vec_select:<VHALF>
 377           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
 378           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
 379   "TARGET_SIMD"
 380   "@
 381    #
 382    umov\t%0, %1.d[0]"
 383   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
 384   [(set (match_dup 0) (match_dup 1))]
 385   {
 386     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
 387   }
 388   [(set_attr "type" "mov_reg,neon_to_gp<q>")
 389    (set_attr "length" "4")]
 390 )
 391
 392 (define_insn "aarch64_simd_mov_from_<mode>high"
 393   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
 394         (vec_select:<VHALF>
 395           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w")
 396           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
 397   "TARGET_FLOAT"
 398   "@
 399    dup\t%d0, %1.d[1]
 400    umov\t%0, %1.d[1]
 401    fmov\t%0, %1.d[1]"
 402   [(set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc")
 403    (set_attr "arch" "simd,simd,*")
 404    (set_attr "length" "4")]
 405 )
 406
 407 (define_insn "orn<mode>3<vczle><vczbe>"
 408  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 409        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 410                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 411  "TARGET_SIMD"
 412  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 413   [(set_attr "type" "neon_logic<q>")]
 414 )
 415
 416 (define_insn "bic<mode>3<vczle><vczbe>"
 417  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 418        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 419                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 420  "TARGET_SIMD"
 421  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 422   [(set_attr "type" "neon_logic<q>")]
 423 )
 424
 425 (define_insn "add<mode>3<vczle><vczbe>"
 426   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 427         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 428                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 429   "TARGET_SIMD"
 430   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 431   [(set_attr "type" "neon_add<q>")]
 432 )
 433
 434 (define_insn "sub<mode>3<vczle><vczbe>"
 435   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 436         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 437                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 438   "TARGET_SIMD"
 439   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 440   [(set_attr "type" "neon_sub<q>")]
 441 )
 442
 443 (define_insn "mul<mode>3<vczle><vczbe>"
 444   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 445         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 446                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 447   "TARGET_SIMD"
 448   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 449   [(set_attr "type" "neon_mul_<Vetype><q>")]
 450 )
 451
 452 ;; Advanced SIMD does not support vector DImode MUL, but SVE does.
 453 ;; Make use of the overlap between Z and V registers to implement the V2DI
 454 ;; optab for TARGET_SVE.  The mulvnx2di3 expander can
 455 ;; handle the TARGET_SVE2 case transparently.
 456 (define_expand "mulv2di3"
 457   [(set (match_operand:V2DI 0 "register_operand")
 458         (mult:V2DI (match_operand:V2DI 1 "register_operand")
 459                    (match_operand:V2DI 2 "aarch64_sve_vsm_operand")))]
 460   "TARGET_SVE"
 461   {
 462     machine_mode sve_mode = VNx2DImode;
 463     rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], V2DImode, 0);
 464     rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], V2DImode, 0);
 465     rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], V2DImode, 0);
 466
 467     emit_insn (gen_mulvnx2di3 (sve_op0, sve_op1, sve_op2));
 468     DONE;
 469   }
 470 )
 471
 472 (define_insn "bswap<mode>2"
 473   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 474         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 475   "TARGET_SIMD"
 476   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 477   [(set_attr "type" "neon_rev<q>")]
 478 )
 479
 480 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
 481   [(set (match_operand:VB 0 "register_operand" "=w")
 482         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 483                    UNSPEC_RBIT))]
 484   "TARGET_SIMD"
 485   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 486   [(set_attr "type" "neon_rbit")]
 487 )
 488
 489 (define_expand "ctz<mode>2"
 490   [(set (match_operand:VS 0 "register_operand")
 491         (ctz:VS (match_operand:VS 1 "register_operand")))]
 492   "TARGET_SIMD"
 493   {
 494      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 495      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 496                                              <MODE>mode, 0);
 497      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 498      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 499      DONE;
 500   }
 501 )
 502
 503 (define_expand "xorsign<mode>3"
 504   [(match_operand:VHSDF 0 "register_operand")
 505    (match_operand:VHSDF 1 "register_operand")
 506    (match_operand:VHSDF 2 "register_operand")]
 507   "TARGET_SIMD"
 508 {
 509
 510   machine_mode imode = <V_INT_EQUIV>mode;
 511   rtx v_bitmask = gen_reg_rtx (imode);
 512   rtx op1x = gen_reg_rtx (imode);
 513   rtx op2x = gen_reg_rtx (imode);
 514
 515   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 516   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 517
 518   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 519
 520   emit_move_insn (v_bitmask,
 521                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 522                                                      HOST_WIDE_INT_M1U << bits));
 523
 524   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 525   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 526   emit_move_insn (operands[0],
 527                   lowpart_subreg (<MODE>mode, op1x, imode));
 528   DONE;
 529 }
 530 )
 531
 532 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 533 ;; fact that their usage need to guarantee that the source vectors are
 534 ;; contiguous.  It would be wrong to describe the operation without being able
 535 ;; to describe the permute that is also required, but even if that is done
 536 ;; the permute would have been created as a LOAD_LANES which means the values
 537 ;; in the registers are in the wrong order.
 538 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
 539   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 540         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 541                        (match_operand:VHSDF 2 "register_operand" "w")]
 542                        FCADD))]
 543   "TARGET_COMPLEX"
 544   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 545   [(set_attr "type" "neon_fcadd")]
 546 )
 547
 548 (define_expand "cadd<rot><mode>3"
 549   [(set (match_operand:VHSDF 0 "register_operand")
 550         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 551                        (match_operand:VHSDF 2 "register_operand")]
 552                        FCADD))]
 553   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 554 )
 555
 556 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
 557   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 558         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 559                                    (match_operand:VHSDF 3 "register_operand" "w")]
 560                                    FCMLA)
 561                     (match_operand:VHSDF 1 "register_operand" "0")))]
 562   "TARGET_COMPLEX"
 563   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 564   [(set_attr "type" "neon_fcmla")]
 565 )
 566
 567
 568 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
 569   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 570         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 571                                    (match_operand:VHSDF 3 "register_operand" "w")
 572                                    (match_operand:SI 4 "const_int_operand" "n")]
 573                                    FCMLA)
 574                     (match_operand:VHSDF 1 "register_operand" "0")))]
 575   "TARGET_COMPLEX"
 576 {
 577   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 578   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 579 }
 580   [(set_attr "type" "neon_fcmla")]
 581 )
 582
 583 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
 584   [(set (match_operand:V4HF 0 "register_operand" "=w")
 585         (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 586                                  (match_operand:V8HF 3 "register_operand" "w")
 587                                  (match_operand:SI 4 "const_int_operand" "n")]
 588                                  FCMLA)
 589                    (match_operand:V4HF 1 "register_operand" "0")))]
 590   "TARGET_COMPLEX"
 591 {
 592   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 593   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 594 }
 595   [(set_attr "type" "neon_fcmla")]
 596 )
 597
 598 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 599   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 600         (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 601                                      (match_operand:<VHALF> 3 "register_operand" "w")
 602                                      (match_operand:SI 4 "const_int_operand" "n")]
 603                                      FCMLA)
 604                      (match_operand:VQ_HSF 1 "register_operand" "0")))]
 605   "TARGET_COMPLEX"
 606 {
 607   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 608   operands[4]
 609     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 610   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 611 }
 612   [(set_attr "type" "neon_fcmla")]
 613 )
 614
 615 ;; The complex mla/mls operations always need to expand to two instructions.
 616 ;; The first operation does half the computation and the second does the
 617 ;; remainder.  Because of this, expand early.
 618 (define_expand "cml<fcmac1><conj_op><mode>4"
 619   [(set (match_operand:VHSDF 0 "register_operand")
 620         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 621                                    (match_operand:VHSDF 2 "register_operand")]
 622                                    FCMLA_OP)
 623                     (match_operand:VHSDF 3 "register_operand")))]
 624   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 625 {
 626   rtx tmp = gen_reg_rtx (<MODE>mode);
 627   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
 628                                                  operands[2], operands[1]));
 629   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
 630                                                  operands[2], operands[1]));
 631   DONE;
 632 })
 633
 634 ;; The complex mul operations always need to expand to two instructions.
 635 ;; The first operation does half the computation and the second does the
 636 ;; remainder.  Because of this, expand early.
 637 (define_expand "cmul<conj_op><mode>3"
 638   [(set (match_operand:VHSDF 0 "register_operand")
 639         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 640                        (match_operand:VHSDF 2 "register_operand")]
 641                        FCMUL_OP))]
 642   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 643 {
 644   rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
 645   rtx res1 = gen_reg_rtx (<MODE>mode);
 646   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
 647                                                  operands[2], operands[1]));
 648   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
 649                                                  operands[2], operands[1]));
 650   DONE;
 651 })
 652
 653 ;; These expands map to the Dot Product optab the vectorizer checks for
 654 ;; and to the intrinsics patttern.
 655 ;; The auto-vectorizer expects a dot product builtin that also does an
 656 ;; accumulation into the provided register.
 657 ;; Given the following pattern
 658 ;;
 659 ;; for (i=0; i<len; i++) {
 660 ;;     c = a[i] * b[i];
 661 ;;     r += c;
 662 ;; }
 663 ;; return result;
 664 ;;
 665 ;; This can be auto-vectorized to
 666 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 667 ;;
 668 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 669 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 670 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 671 ;; ...
 672 ;;
 673 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 674 (define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
 675   [(set (match_operand:VS 0 "register_operand" "=w")
 676         (plus:VS
 677           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
 678                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
 679                       DOTPROD)
 680           (match_operand:VS 3 "register_operand" "0")))]
 681   "TARGET_DOTPROD"
 682   "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
 683   [(set_attr "type" "neon_dot<q>")]
 684 )
 685
 686 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
 687 ;; (vector) Dot Product operation and the vectorized optab.
 688 (define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
 689   [(set (match_operand:VS 0 "register_operand" "=w")
 690         (plus:VS
 691           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
 692                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
 693           UNSPEC_USDOT)
 694           (match_operand:VS 3 "register_operand" "0")))]
 695   "TARGET_I8MM"
 696   "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
 697   [(set_attr "type" "neon_dot<q>")]
 698 )
 699
 700 ;; These instructions map to the __builtins for the Dot Product
 701 ;; indexed operations.
 702 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
 703   [(set (match_operand:VS 0 "register_operand" "=w")
 704         (plus:VS
 705           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 706                       (match_operand:V8QI 3 "register_operand" "<h_con>")
 707                       (match_operand:SI 4 "immediate_operand" "i")]
 708                       DOTPROD)
 709           (match_operand:VS 1 "register_operand" "0")))]
 710   "TARGET_DOTPROD"
 711   {
 712     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 713     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 714   }
 715   [(set_attr "type" "neon_dot<q>")]
 716 )
 717
 718 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
 719   [(set (match_operand:VS 0 "register_operand" "=w")
 720         (plus:VS
 721           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 722                       (match_operand:V16QI 3 "register_operand" "<h_con>")
 723                       (match_operand:SI 4 "immediate_operand" "i")]
 724                       DOTPROD)
 725           (match_operand:VS 1 "register_operand" "0")))]
 726   "TARGET_DOTPROD"
 727   {
 728     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 729     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 730   }
 731   [(set_attr "type" "neon_dot<q>")]
 732 )
 733
 734 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
 735 ;; (by element) Dot Product operations.
 736 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
 737   [(set (match_operand:VS 0 "register_operand" "=w")
 738         (plus:VS
 739           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
 740                       (match_operand:VB 3 "register_operand" "w")
 741                       (match_operand:SI 4 "immediate_operand" "i")]
 742           DOTPROD_I8MM)
 743           (match_operand:VS 1 "register_operand" "0")))]
 744   "TARGET_I8MM"
 745   {
 746     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
 747     int lane = INTVAL (operands[4]);
 748     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
 749     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
 750   }
 751   [(set_attr "type" "neon_dot<VS:q>")]
 752 )
 753
 754 (define_expand "copysign<mode>3"
 755   [(match_operand:VHSDF 0 "register_operand")
 756    (match_operand:VHSDF 1 "register_operand")
 757    (match_operand:VHSDF 2 "register_operand")]
 758   "TARGET_SIMD"
 759 {
 760   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 761   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 762
 763   emit_move_insn (v_bitmask,
 764                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 765                                                      HOST_WIDE_INT_M1U << bits));
 766   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 767                                          operands[2], operands[1]));
 768   DONE;
 769 }
 770 )
 771
 772 (define_insn "mul_lane<mode>3"
 773  [(set (match_operand:VMULD 0 "register_operand" "=w")
 774        (mult:VMULD
 775          (vec_duplicate:VMULD
 776            (vec_select:<VEL>
 777              (match_operand:<VCOND> 2 "register_operand" "<h_con>")
 778              (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 779          (match_operand:VMULD 1 "register_operand" "w")))]
 780   "TARGET_SIMD"
 781   {
 782     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
 783     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
 784   }
 785   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 786 )
 787
 788 (define_insn "mul_laneq<mode>3"
 789   [(set (match_operand:VMUL 0 "register_operand" "=w")
 790      (mult:VMUL
 791        (vec_duplicate:VMUL
 792           (vec_select:<VEL>
 793             (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
 794             (parallel [(match_operand:SI 3 "immediate_operand")])))
 795       (match_operand:VMUL 1 "register_operand" "w")))]
 796   "TARGET_SIMD"
 797   {
 798     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
 799     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
 800   }
 801   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 802 )
 803
 804 (define_insn "mul_n<mode>3"
 805  [(set (match_operand:VMUL 0 "register_operand" "=w")
 806        (mult:VMUL
 807          (vec_duplicate:VMUL
 808            (match_operand:<VEL> 2 "register_operand" "<h_con>"))
 809          (match_operand:VMUL 1 "register_operand" "w")))]
 810   "TARGET_SIMD"
 811   "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
 812   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 813 )
 814
 815 (define_insn "@aarch64_rsqrte<mode>"
 816   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 817         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 818                      UNSPEC_RSQRTE))]
 819   "TARGET_SIMD"
 820   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 821   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 822
 823 (define_insn "@aarch64_rsqrts<mode>"
 824   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 825         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 826                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 827          UNSPEC_RSQRTS))]
 828   "TARGET_SIMD"
 829   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 830   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 831
 832 (define_expand "rsqrt<mode>2"
 833   [(set (match_operand:VALLF 0 "register_operand")
 834         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 835                      UNSPEC_RSQRT))]
 836   "TARGET_SIMD"
 837 {
 838   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 839   DONE;
 840 })
 841
 842 (define_insn "aarch64_ursqrte<mode>"
 843 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
 844       (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
 845                    UNSPEC_RSQRTE))]
 846 "TARGET_SIMD"
 847 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 848 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 849
 850 (define_insn "*aarch64_mul3_elt_to_64v2df"
 851   [(set (match_operand:DF 0 "register_operand" "=w")
 852      (mult:DF
 853        (vec_select:DF
 854          (match_operand:V2DF 1 "register_operand" "w")
 855          (parallel [(match_operand:SI 2 "immediate_operand")]))
 856        (match_operand:DF 3 "register_operand" "w")))]
 857   "TARGET_SIMD"
 858   {
 859     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 860     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 861   }
 862   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 863 )
 864
 865 (define_insn "neg<mode>2<vczle><vczbe>"
 866   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 867         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 868   "TARGET_SIMD"
 869   "neg\t%0.<Vtype>, %1.<Vtype>"
 870   [(set_attr "type" "neon_neg<q>")]
 871 )
 872
 873 (define_insn "abs<mode>2<vczle><vczbe>"
 874   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 875         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 876   "TARGET_SIMD"
 877   "abs\t%0.<Vtype>, %1.<Vtype>"
 878   [(set_attr "type" "neon_abs<q>")]
 879 )
 880
 881 ;; The intrinsic version of integer ABS must not be allowed to
 882 ;; combine with any operation with an integrated ABS step, such
 883 ;; as SABD.
 884 (define_insn "aarch64_abs<mode><vczle><vczbe>"
 885   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 886           (unspec:VSDQ_I_DI
 887             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 888            UNSPEC_ABS))]
 889   "TARGET_SIMD"
 890   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 891   [(set_attr "type" "neon_abs<q>")]
 892 )
 893
 894 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 895 ;; This isn't accurate as ABS treats always its input as a signed value.
 896 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 897 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 898 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 899 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
 900   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 901         (minus:VDQ_BHSI
 902           (USMAX:VDQ_BHSI
 903             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 904             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 905           (<max_opp>:VDQ_BHSI
 906             (match_dup 1)
 907             (match_dup 2))))]
 908   "TARGET_SIMD"
 909   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 910   [(set_attr "type" "neon_abd<q>")]
 911 )
 912
 913 (define_expand "<su>abd<mode>3"
 914   [(match_operand:VDQ_BHSI 0 "register_operand")
 915    (USMAX:VDQ_BHSI
 916      (match_operand:VDQ_BHSI 1 "register_operand")
 917      (match_operand:VDQ_BHSI 2 "register_operand"))]
 918   "TARGET_SIMD"
 919   {
 920     emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));
 921     DONE;
 922   }
 923 )
 924
 925 (define_insn "aarch64_<su>abdl<mode>"
 926   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 927         (zero_extend:<VWIDE>
 928           (minus:VD_BHSI
 929             (USMAX:VD_BHSI
 930               (match_operand:VD_BHSI 1 "register_operand" "w")
 931               (match_operand:VD_BHSI 2 "register_operand" "w"))
 932             (<max_opp>:VD_BHSI
 933               (match_dup 1)
 934               (match_dup 2)))))]
 935   "TARGET_SIMD"
 936   "<su>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 937   [(set_attr "type" "neon_abd<q>")]
 938 )
 939
 940 (define_insn "aarch64_<su>abdl2<mode>_insn"
 941   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 942         (zero_extend:<VDBLW>
 943           (minus:<VHALF>
 944             (USMAX:<VHALF>
 945               (vec_select:<VHALF>
 946                 (match_operand:VQW 1 "register_operand" "w")
 947                 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))
 948               (vec_select:<VHALF>
 949                 (match_operand:VQW 2 "register_operand" "w")
 950                 (match_dup 3)))
 951             (<max_opp>:<VHALF>
 952               (vec_select:<VHALF>
 953                 (match_dup 1)
 954                 (match_dup 3))
 955               (vec_select:<VHALF>
 956                 (match_dup 2)
 957                 (match_dup 3))))))]
 958
 959   "TARGET_SIMD"
 960   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 961   [(set_attr "type" "neon_abd<q>")]
 962 )
 963
 964 (define_expand "aarch64_<su>abdl2<mode>"
 965   [(match_operand:<VDBLW> 0 "register_operand")
 966    (USMAX:VQW
 967      (match_operand:VQW 1 "register_operand")
 968      (match_operand:VQW 2 "register_operand"))]
 969   "TARGET_SIMD"
 970   {
 971     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 972     emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[0], operands[1],
 973                                                  operands[2], hi));
 974     DONE;
 975   }
 976 )
 977
 978 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
 979   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 980         (abs:<VWIDE>
 981           (minus:<VWIDE>
 982             (ANY_EXTEND:<VWIDE>
 983               (vec_select:<VHALF>
 984                 (match_operand:VQW 1 "register_operand" "w")
 985                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
 986             (ANY_EXTEND:<VWIDE>
 987               (vec_select:<VHALF>
 988                 (match_operand:VQW 2 "register_operand" "w")
 989                 (match_dup 3))))))]
 990   "TARGET_SIMD"
 991   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 992   [(set_attr "type" "neon_abd_long")]
 993 )
 994
 995 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
 996   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 997         (abs:<VWIDE>
 998           (minus:<VWIDE>
 999             (ANY_EXTEND:<VWIDE>
1000               (vec_select:<VHALF>
1001                 (match_operand:VQW 1 "register_operand" "w")
1002                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1003             (ANY_EXTEND:<VWIDE>
1004               (vec_select:<VHALF>
1005                 (match_operand:VQW 2 "register_operand" "w")
1006                 (match_dup 3))))))]
1007   "TARGET_SIMD"
1008   "<su>abdl\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1009   [(set_attr "type" "neon_abd_long")]
1010 )
1011
1012 (define_expand "vec_widen_<su>abd_hi_<mode>"
1013   [(match_operand:<VWIDE> 0 "register_operand")
1014    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1015    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1016   "TARGET_SIMD"
1017   {
1018     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1019     emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[0], operands[1],
1020                                                        operands[2], p));
1021     DONE;
1022   }
1023 )
1024
1025 (define_expand "vec_widen_<su>abd_lo_<mode>"
1026   [(match_operand:<VWIDE> 0 "register_operand")
1027    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1028    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1029   "TARGET_SIMD"
1030   {
1031     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1032     emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[0], operands[1],
1033                                                        operands[2], p));
1034     DONE;
1035   }
1036 )
1037
1038 (define_insn "aarch64_<su>abal<mode>"
1039   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1040         (plus:<VWIDE>
1041           (zero_extend:<VWIDE>
1042             (minus:VD_BHSI
1043               (USMAX:VD_BHSI
1044                 (match_operand:VD_BHSI 2 "register_operand" "w")
1045                 (match_operand:VD_BHSI 3 "register_operand" "w"))
1046               (<max_opp>:VD_BHSI
1047                 (match_dup 2)
1048                 (match_dup 3))))
1049           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1050   "TARGET_SIMD"
1051   "<su>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1052   [(set_attr "type" "neon_arith_acc<q>")]
1053 )
1054
1055 (define_insn "aarch64_<su>abal2<mode>_insn"
1056   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1057         (plus:<VDBLW>
1058           (zero_extend:<VDBLW>
1059             (minus:<VHALF>
1060               (USMAX:<VHALF>
1061                 (vec_select:<VHALF>
1062                   (match_operand:VQW 2 "register_operand" "w")
1063                   (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
1064                 (vec_select:<VHALF>
1065                   (match_operand:VQW 3 "register_operand" "w")
1066                   (match_dup 4)))
1067               (<max_opp>:<VHALF>
1068                 (vec_select:<VHALF>
1069                   (match_dup 2)
1070                   (match_dup 4))
1071                 (vec_select:<VHALF>
1072                   (match_dup 3)
1073                   (match_dup 4)))))
1074           (match_operand:<VDBLW> 1 "register_operand" "0")))]
1075   "TARGET_SIMD"
1076   "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1077   [(set_attr "type" "neon_arith_acc<q>")]
1078 )
1079
1080 (define_expand "aarch64_<su>abal2<mode>"
1081   [(match_operand:<VDBLW> 0 "register_operand")
1082    (match_operand:<VDBLW> 1 "register_operand")
1083    (USMAX:VQW
1084      (match_operand:VQW 2 "register_operand")
1085      (match_operand:VQW 3 "register_operand"))]
1086   "TARGET_SIMD"
1087   {
1088     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1089     emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
1090                                                  operands[2], operands[3], hi));
1091     DONE;
1092   }
1093 )
1094
1095 (define_expand "aarch64_<su>adalp<mode>"
1096   [(set (match_operand:<VDBLW> 0 "register_operand")
1097         (plus:<VDBLW>
1098           (plus:<VDBLW>
1099             (vec_select:<VDBLW>
1100               (ANY_EXTEND:<V2XWIDE>
1101                 (match_operand:VDQV_L 2 "register_operand"))
1102               (match_dup 3))
1103             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1104               (match_dup 4)))
1105           (match_operand:<VDBLW> 1 "register_operand")))]
1106  "TARGET_SIMD"
1107  {
1108    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
1109    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
1110    operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
1111  }
1112 )
1113
1114 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1115   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1116         (plus:<VDBLW>
1117           (plus:<VDBLW>
1118             (vec_select:<VDBLW>
1119               (ANY_EXTEND:<V2XWIDE>
1120                 (match_operand:VDQV_L 2 "register_operand" "w"))
1121               (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
1122             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1123               (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
1124         (match_operand:<VDBLW> 1 "register_operand" "0")))]
1125  "TARGET_SIMD
1126   && !rtx_equal_p (operands[3], operands[4])"
1127  "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
1128   [(set_attr "type" "neon_reduc_add<q>")]
1129 )
1130
1131 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1132 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
1133 ;; reduction of the difference into a V4SI vector and accumulate that into
1134 ;; operand 3 before copying that into the result operand 0.
1135 ;; Perform that with a sequence of:
1136 ;; UABDL2       tmp.8h, op1.16b, op2.16b
1137 ;; UABAL        tmp.8h, op1.8b, op2.8b
1138 ;; UADALP       op3.4s, tmp.8h
1139 ;; MOV          op0, op3 // should be eliminated in later passes.
1140 ;;
1141 ;; For TARGET_DOTPROD we do:
1142 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
1143 ;; UABD tmp2.16b, op1.16b, op2.16b
1144 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1145 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
1146 ;;
1147 ;; The signed version just uses the signed variants of the above instructions
1148 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1149 ;; unsigned.
1150
1151 (define_expand "<su>sadv16qi"
1152   [(use (match_operand:V4SI 0 "register_operand"))
1153    (USMAX:V16QI (match_operand:V16QI 1 "register_operand")
1154                 (match_operand:V16QI 2 "register_operand"))
1155    (use (match_operand:V4SI 3 "register_operand"))]
1156   "TARGET_SIMD"
1157   {
1158     if (TARGET_DOTPROD)
1159       {
1160         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1161         rtx abd = gen_reg_rtx (V16QImode);
1162         emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
1163         emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
1164         DONE;
1165       }
1166     rtx reduc = gen_reg_rtx (V8HImode);
1167     emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[1],
1168                                             operands[2]));
1169     emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1170                                          gen_lowpart (V8QImode, operands[1]),
1171                                          gen_lowpart (V8QImode,
1172                                                       operands[2])));
1173     emit_insn (gen_aarch64_<su>adalpv8hi (operands[3], operands[3], reduc));
1174     emit_move_insn (operands[0], operands[3]);
1175     DONE;
1176   }
1177 )
1178
1179 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1180   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1181         (plus:VDQ_BHSI (minus:VDQ_BHSI
1182                          (USMAX:VDQ_BHSI
1183                            (match_operand:VDQ_BHSI 2 "register_operand" "w")
1184                            (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1185                          (<max_opp>:VDQ_BHSI
1186                            (match_dup 2)
1187                            (match_dup 3)))
1188                        (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1189   "TARGET_SIMD"
1190   "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1191   [(set_attr "type" "neon_arith_acc<q>")]
1192 )
1193
1194 (define_insn "fabd<mode>3<vczle><vczbe>"
1195   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
1196         (abs:VHSDF_HSDF
1197           (minus:VHSDF_HSDF
1198             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
1199             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1200   "TARGET_SIMD"
1201   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
1202   [(set_attr "type" "neon_fp_abd_<stype><q>")]
1203 )
1204
1205 ;; For AND (vector, register) and BIC (vector, immediate)
1206 (define_insn "and<mode>3<vczle><vczbe>"
1207   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1208         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1209                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
1210   "TARGET_SIMD"
1211   "@
1212    and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1213    * return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,\
1214                                                AARCH64_CHECK_BIC);"
1215   [(set_attr "type" "neon_logic<q>")]
1216 )
1217
1218 ;; For ORR (vector, register) and ORR (vector, immediate)
1219 (define_insn "ior<mode>3<vczle><vczbe>"
1220   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1221         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1222                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
1223   "TARGET_SIMD"
1224   "@
1225    orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1226    * return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,\
1227                                                AARCH64_CHECK_ORR);"
1228   [(set_attr "type" "neon_logic<q>")]
1229 )
1230
1231 (define_insn "xor<mode>3<vczle><vczbe>"
1232   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1233         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1234                  (match_operand:VDQ_I 2 "register_operand" "w")))]
1235   "TARGET_SIMD"
1236   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1237   [(set_attr "type" "neon_logic<q>")]
1238 )
1239
1240 (define_insn "one_cmpl<mode>2<vczle><vczbe>"
1241   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1242         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1243   "TARGET_SIMD"
1244   "not\t%0.<Vbtype>, %1.<Vbtype>"
1245   [(set_attr "type" "neon_logic<q>")]
1246 )
1247
1248 (define_insn "aarch64_simd_vec_set<mode>"
1249   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1250         (vec_merge:VALL_F16
1251             (vec_duplicate:VALL_F16
1252                 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1253             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1254             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1255   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1256   {
1257    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1258    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1259    switch (which_alternative)
1260      {
1261      case 0:
1262         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1263      case 1:
1264         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1265      case 2:
1266         return "ld1\\t{%0.<Vetype>}[%p2], %1";
1267      default:
1268         gcc_unreachable ();
1269      }
1270   }
1271   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1272 )
1273
1274 (define_insn "aarch64_simd_vec_set_zero<mode>"
1275   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1276         (vec_merge:VALL_F16
1277             (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
1278             (match_operand:VALL_F16 3 "register_operand" "0")
1279             (match_operand:SI 2 "immediate_operand" "i")))]
1280   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1281   {
1282     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1283     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1284     return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
1285   }
1286 )
1287
1288 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1289   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1290         (vec_merge:VALL_F16
1291             (vec_duplicate:VALL_F16
1292               (vec_select:<VEL>
1293                 (match_operand:VALL_F16 3 "register_operand" "w")
1294                 (parallel
1295                   [(match_operand:SI 4 "immediate_operand" "i")])))
1296             (match_operand:VALL_F16 1 "register_operand" "0")
1297             (match_operand:SI 2 "immediate_operand" "i")))]
1298   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1299   {
1300     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1301     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1302     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1303
1304     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1305   }
1306   [(set_attr "type" "neon_ins<q>")]
1307 )
1308
1309 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1310   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1311         (vec_merge:VALL_F16_NO_V2Q
1312             (vec_duplicate:VALL_F16_NO_V2Q
1313               (vec_select:<VEL>
1314                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1315                 (parallel
1316                   [(match_operand:SI 4 "immediate_operand" "i")])))
1317             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1318             (match_operand:SI 2 "immediate_operand" "i")))]
1319   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1320   {
1321     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1322     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1323     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1324                                            INTVAL (operands[4]));
1325
1326     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1327   }
1328   [(set_attr "type" "neon_ins<q>")]
1329 )
1330
1331 (define_expand "signbit<mode>2"
1332   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1333    (use (match_operand:VDQSF 1 "register_operand"))]
1334   "TARGET_SIMD"
1335 {
1336   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1337   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1338                                                         shift_amount);
1339   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1340
1341   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1342                                                  shift_vector));
1343   DONE;
1344 })
1345
1346 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1347  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1348        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1349                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1350  "TARGET_SIMD"
1351  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1352   [(set_attr "type" "neon_shift_imm<q>")]
1353 )
1354
1355 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1356  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1357        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
1358                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "D1,Dr")))]
1359  "TARGET_SIMD"
1360  "@
1361   cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1362   sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1363   [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
1364 )
1365
1366 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1367  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1368         (plus:VDQ_I
1369            (SHIFTRT:VDQ_I
1370                 (match_operand:VDQ_I 2 "register_operand" "w")
1371                 (match_operand:VDQ_I 3 "aarch64_simd_rshift_imm"))
1372            (match_operand:VDQ_I 1 "register_operand" "0")))]
1373   "TARGET_SIMD"
1374   "<sra_op>sra\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1375   [(set_attr "type" "neon_shift_acc<q>")]
1376 )
1377
1378 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1379  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
1380         (plus:VSDQ_I_DI
1381           (truncate:VSDQ_I_DI
1382             (SHIFTRT:<V2XWIDE>
1383               (plus:<V2XWIDE>
1384                 (<SHIFTEXTEND>:<V2XWIDE>
1385                   (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
1386                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
1387               (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1388           (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
1389   "TARGET_SIMD
1390    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
1391   "<sra_op>rsra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1392   [(set_attr "type" "neon_shift_acc<q>")]
1393 )
1394
1395 (define_expand "aarch64_<sra_op>sra_n<mode>"
1396  [(set (match_operand:VDQ_I 0 "register_operand")
1397         (plus:VDQ_I
1398            (SHIFTRT:VDQ_I
1399                 (match_operand:VDQ_I 2 "register_operand")
1400                 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1401            (match_operand:VDQ_I 1 "register_operand")))]
1402   "TARGET_SIMD"
1403   {
1404     operands[3]
1405       = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[3]));
1406   }
1407 )
1408
1409 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1410   [(match_operand:VSDQ_I_DI 0 "register_operand")
1411    (match_operand:VSDQ_I_DI 1 "register_operand")
1412    (SHIFTRT:VSDQ_I_DI
1413      (match_operand:VSDQ_I_DI 2 "register_operand")
1414      (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1415   "TARGET_SIMD"
1416   {
1417     /* Use this expander to create the rounding constant vector, which is
1418        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
1419        RTL is generated when handling the DImode expanders.  */
1420     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1421     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
1422     rtx shft = gen_int_mode (INTVAL (operands[3]), DImode);
1423     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1424     if (VECTOR_MODE_P (<MODE>mode))
1425       {
1426         shft = gen_const_vec_duplicate (<MODE>mode, shft);
1427         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1428       }
1429
1430     emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[0], operands[1],
1431                                                       operands[2], shft, rnd));
1432     DONE;
1433   }
1434 )
1435
1436 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1437  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1438        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1439                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1440  "TARGET_SIMD"
1441   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1442   [(set_attr "type" "neon_shift_imm<q>")]
1443 )
1444
1445 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1446  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1447        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1448                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1449  "TARGET_SIMD"
1450  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1451   [(set_attr "type" "neon_shift_reg<q>")]
1452 )
1453
1454 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1455  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1456        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1457                     (match_operand:VDQ_I 2 "register_operand" "w")]
1458                    UNSPEC_ASHIFT_UNSIGNED))]
1459  "TARGET_SIMD"
1460  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1461   [(set_attr "type" "neon_shift_reg<q>")]
1462 )
1463
1464 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1465  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1466        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1467                     (match_operand:VDQ_I 2 "register_operand" "w")]
1468                    UNSPEC_ASHIFT_SIGNED))]
1469  "TARGET_SIMD"
1470  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1471   [(set_attr "type" "neon_shift_reg<q>")]
1472 )
1473
1474 (define_expand "ashl<mode>3"
1475   [(match_operand:VDQ_I 0 "register_operand")
1476    (match_operand:VDQ_I 1 "register_operand")
1477    (match_operand:SI  2 "general_operand")]
1478  "TARGET_SIMD"
1479 {
1480   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1481   int shift_amount;
1482
1483   if (CONST_INT_P (operands[2]))
1484     {
1485       shift_amount = INTVAL (operands[2]);
1486       if (shift_amount >= 0 && shift_amount < bit_width)
1487         {
1488           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1489                                                        shift_amount);
1490           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1491                                                      operands[1],
1492                                                      tmp));
1493           DONE;
1494         }
1495     }
1496
1497   operands[2] = force_reg (SImode, operands[2]);
1498
1499   rtx tmp = gen_reg_rtx (<MODE>mode);
1500   emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1501                                                                operands[2],
1502                                                                0)));
1503   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1504   DONE;
1505 })
1506
1507 (define_expand "lshr<mode>3"
1508   [(match_operand:VDQ_I 0 "register_operand")
1509    (match_operand:VDQ_I 1 "register_operand")
1510    (match_operand:SI  2 "general_operand")]
1511  "TARGET_SIMD"
1512 {
1513   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1514   int shift_amount;
1515
1516   if (CONST_INT_P (operands[2]))
1517     {
1518       shift_amount = INTVAL (operands[2]);
1519       if (shift_amount > 0 && shift_amount <= bit_width)
1520         {
1521           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1522                                                        shift_amount);
1523           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1524                                                   operands[1],
1525                                                   tmp));
1526           DONE;
1527         }
1528     }
1529
1530   operands[2] = force_reg (SImode, operands[2]);
1531
1532   rtx tmp = gen_reg_rtx (SImode);
1533   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1534   emit_insn (gen_negsi2 (tmp, operands[2]));
1535   emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1536                                          convert_to_mode (<VEL>mode, tmp, 0)));
1537   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1538                                                       tmp1));
1539   DONE;
1540 })
1541
1542 (define_expand "ashr<mode>3"
1543   [(match_operand:VDQ_I 0 "register_operand")
1544    (match_operand:VDQ_I 1 "register_operand")
1545    (match_operand:SI  2 "general_operand")]
1546  "TARGET_SIMD"
1547 {
1548   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1549   int shift_amount;
1550
1551   if (CONST_INT_P (operands[2]))
1552     {
1553       shift_amount = INTVAL (operands[2]);
1554       if (shift_amount > 0 && shift_amount <= bit_width)
1555         {
1556           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1557                                                        shift_amount);
1558           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1559                                                   operands[1],
1560                                                   tmp));
1561           DONE;
1562         }
1563     }
1564
1565   operands[2] = force_reg (SImode, operands[2]);
1566
1567   rtx tmp = gen_reg_rtx (SImode);
1568   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1569   emit_insn (gen_negsi2 (tmp, operands[2]));
1570   emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1571                                                                 tmp, 0)));
1572   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1573                                                     tmp1));
1574   DONE;
1575 })
1576
1577 (define_expand "vashl<mode>3"
1578  [(match_operand:VDQ_I 0 "register_operand")
1579   (match_operand:VDQ_I 1 "register_operand")
1580   (match_operand:VDQ_I 2 "register_operand")]
1581  "TARGET_SIMD"
1582 {
1583   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1584                                               operands[2]));
1585   DONE;
1586 })
1587
1588 (define_expand "vashr<mode>3"
1589  [(match_operand:VDQ_I 0 "register_operand")
1590   (match_operand:VDQ_I 1 "register_operand")
1591   (match_operand:VDQ_I 2 "register_operand")]
1592  "TARGET_SIMD"
1593 {
1594   rtx neg = gen_reg_rtx (<MODE>mode);
1595   emit (gen_neg<mode>2 (neg, operands[2]));
1596   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1597                                                     neg));
1598   DONE;
1599 })
1600
1601 ;; DI vector shift
1602 (define_expand "aarch64_ashr_simddi"
1603   [(match_operand:DI 0 "register_operand")
1604    (match_operand:DI 1 "register_operand")
1605    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1606   "TARGET_SIMD"
1607   {
1608     /* An arithmetic shift right by 64 fills the result with copies of the sign
1609        bit, just like asr by 63 - however the standard pattern does not handle
1610        a shift by 64.  */
1611     if (INTVAL (operands[2]) == 64)
1612       operands[2] = GEN_INT (63);
1613     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1614     DONE;
1615   }
1616 )
1617
1618 (define_expand "vlshr<mode>3"
1619  [(match_operand:VDQ_I 0 "register_operand")
1620   (match_operand:VDQ_I 1 "register_operand")
1621   (match_operand:VDQ_I 2 "register_operand")]
1622  "TARGET_SIMD"
1623 {
1624   rtx neg = gen_reg_rtx (<MODE>mode);
1625   emit (gen_neg<mode>2 (neg, operands[2]));
1626   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1627                                                       neg));
1628   DONE;
1629 })
1630
1631 (define_expand "aarch64_lshr_simddi"
1632   [(match_operand:DI 0 "register_operand")
1633    (match_operand:DI 1 "register_operand")
1634    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1635   "TARGET_SIMD"
1636   {
1637     if (INTVAL (operands[2]) == 64)
1638       emit_move_insn (operands[0], const0_rtx);
1639     else
1640       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1641     DONE;
1642   }
1643 )
1644
1645 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1646 (define_insn "vec_shr_<mode><vczle><vczbe>"
1647   [(set (match_operand:VD 0 "register_operand" "=w")
1648         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1649                     (match_operand:SI 2 "immediate_operand" "i")]
1650                    UNSPEC_VEC_SHR))]
1651   "TARGET_SIMD"
1652   {
1653     if (BYTES_BIG_ENDIAN)
1654       return "shl %d0, %d1, %2";
1655     else
1656       return "ushr %d0, %d1, %2";
1657   }
1658   [(set_attr "type" "neon_shift_imm")]
1659 )
1660
1661 (define_expand "vec_set<mode>"
1662   [(match_operand:VALL_F16 0 "register_operand")
1663    (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1664    (match_operand:SI 2 "immediate_operand")]
1665   "TARGET_SIMD"
1666   {
1667     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1668     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1669                                           GEN_INT (elem), operands[0]));
1670     DONE;
1671   }
1672 )
1673
1674
1675 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1676  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1677        (plus:VDQ_BHSI (mult:VDQ_BHSI
1678                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1679                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1680                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1681  "TARGET_SIMD"
1682  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1683   [(set_attr "type" "neon_mla_<Vetype><q>")]
1684 )
1685
1686 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1687  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1688        (plus:VDQHS
1689          (mult:VDQHS
1690            (vec_duplicate:VDQHS
1691               (vec_select:<VEL>
1692                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1693                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1694            (match_operand:VDQHS 3 "register_operand" "w"))
1695          (match_operand:VDQHS 4 "register_operand" "0")))]
1696  "TARGET_SIMD"
1697   {
1698     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1699     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1700   }
1701   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1702 )
1703
1704 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1705  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1706        (plus:VDQHS
1707          (mult:VDQHS
1708            (vec_duplicate:VDQHS
1709               (vec_select:<VEL>
1710                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1711                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1712            (match_operand:VDQHS 3 "register_operand" "w"))
1713          (match_operand:VDQHS 4 "register_operand" "0")))]
1714  "TARGET_SIMD"
1715   {
1716     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1717     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1718   }
1719   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1720 )
1721
1722 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1723  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1724         (plus:VDQHS
1725           (mult:VDQHS
1726             (vec_duplicate:VDQHS
1727               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1728             (match_operand:VDQHS 2 "register_operand" "w"))
1729           (match_operand:VDQHS 1 "register_operand" "0")))]
1730  "TARGET_SIMD"
1731  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1732   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1733 )
1734
1735 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1736  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1737        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1738                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1739                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1740  "TARGET_SIMD"
1741  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1742   [(set_attr "type" "neon_mla_<Vetype><q>")]
1743 )
1744
1745 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1746  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1747        (minus:VDQHS
1748          (match_operand:VDQHS 4 "register_operand" "0")
1749          (mult:VDQHS
1750            (vec_duplicate:VDQHS
1751               (vec_select:<VEL>
1752                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1753                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1754            (match_operand:VDQHS 3 "register_operand" "w"))))]
1755  "TARGET_SIMD"
1756   {
1757     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1758     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1759   }
1760   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1761 )
1762
1763 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1764  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1765        (minus:VDQHS
1766          (match_operand:VDQHS 4 "register_operand" "0")
1767          (mult:VDQHS
1768            (vec_duplicate:VDQHS
1769               (vec_select:<VEL>
1770                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1771                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1772            (match_operand:VDQHS 3 "register_operand" "w"))))]
1773  "TARGET_SIMD"
1774   {
1775     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1776     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1777   }
1778   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1779 )
1780
1781 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1782   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1783         (minus:VDQHS
1784           (match_operand:VDQHS 1 "register_operand" "0")
1785           (mult:VDQHS
1786             (vec_duplicate:VDQHS
1787               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1788             (match_operand:VDQHS 2 "register_operand" "w"))))]
1789   "TARGET_SIMD"
1790   "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1791   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1792 )
1793
1794 ;; Max/Min operations.
1795 (define_insn "<su><maxmin><mode>3<vczle><vczbe>"
1796  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1797        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1798                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1799  "TARGET_SIMD"
1800  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1801   [(set_attr "type" "neon_minmax<q>")]
1802 )
1803
1804 (define_expand "<su><maxmin>v2di3"
1805  [(set (match_operand:V2DI 0 "register_operand")
1806        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1807                     (match_operand:V2DI 2 "register_operand")))]
1808  "TARGET_SIMD"
1809 {
1810   enum rtx_code cmp_operator;
1811   rtx cmp_fmt;
1812
1813   switch (<CODE>)
1814     {
1815     case UMIN:
1816       cmp_operator = LTU;
1817       break;
1818     case SMIN:
1819       cmp_operator = LT;
1820       break;
1821     case UMAX:
1822       cmp_operator = GTU;
1823       break;
1824     case SMAX:
1825       cmp_operator = GT;
1826       break;
1827     default:
1828       gcc_unreachable ();
1829     }
1830
1831   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1832   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1833               operands[2], cmp_fmt, operands[1], operands[2]));
1834   DONE;
1835 })
1836
1837 ;; Pairwise Integer Max/Min operations.
1838 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1839  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1840        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1841                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1842                         MAXMINV))]
1843  "TARGET_SIMD"
1844  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1845   [(set_attr "type" "neon_minmax<q>")]
1846 )
1847
1848 ;; Pairwise FP Max/Min operations.
1849 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1850  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1851        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1852                       (match_operand:VHSDF 2 "register_operand" "w")]
1853                       FMAXMINV))]
1854  "TARGET_SIMD"
1855  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1856   [(set_attr "type" "neon_minmax<q>")]
1857 )
1858
1859 ;; vec_concat gives a new vector with the low elements from operand 1, and
1860 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1861 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1862 ;; What that means, is that the RTL descriptions of the below patterns
1863 ;; need to change depending on endianness.
1864
1865 ;; Narrowing operations.
1866
1867 (define_insn "aarch64_xtn2<mode>_insn_le"
1868   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1869         (vec_concat:<VNARROWQ2>
1870           (match_operand:<VNARROWQ> 1 "register_operand" "0")
1871           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1872   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1873   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1874   [(set_attr "type" "neon_move_narrow_q")]
1875 )
1876
1877 (define_insn "aarch64_xtn2<mode>_insn_be"
1878   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1879         (vec_concat:<VNARROWQ2>
1880           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1881           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1882   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1883   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1884   [(set_attr "type" "neon_move_narrow_q")]
1885 )
1886
1887 (define_expand "aarch64_xtn2<mode>"
1888   [(match_operand:<VNARROWQ2> 0 "register_operand")
1889    (match_operand:<VNARROWQ> 1 "register_operand")
1890    (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1891   "TARGET_SIMD"
1892   {
1893     if (BYTES_BIG_ENDIAN)
1894       emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1895                                                  operands[2]));
1896     else
1897       emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1898                                                  operands[2]));
1899     DONE;
1900   }
1901 )
1902
1903 (define_insn "*aarch64_narrow_trunc<mode>"
1904   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1905         (vec_concat:<VNARROWQ2>
1906           (truncate:<VNARROWQ>
1907             (match_operand:VQN 1 "register_operand" "w"))
1908           (truncate:<VNARROWQ>
1909             (match_operand:VQN 2 "register_operand" "w"))))]
1910   "TARGET_SIMD"
1911 {
1912   if (!BYTES_BIG_ENDIAN)
1913     return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1914   else
1915     return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1916 }
1917   [(set_attr "type" "neon_permute<q>")]
1918 )
1919
1920 ;; Packing doubles.
1921
1922 (define_expand "vec_pack_trunc_<mode>"
1923  [(match_operand:<VNARROWD> 0 "register_operand")
1924   (match_operand:VDN 1 "general_operand")
1925   (match_operand:VDN 2 "general_operand")]
1926  "TARGET_SIMD"
1927 {
1928   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1929   emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
1930   emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1931   DONE;
1932 })
1933
1934 ;; Packing quads.
1935
1936 (define_expand "vec_pack_trunc_<mode>"
1937  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1938        (vec_concat:<VNARROWQ2>
1939          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1940          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1941  "TARGET_SIMD"
1942  {
1943    rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1944    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1945    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1946
1947    emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1948
1949    if (BYTES_BIG_ENDIAN)
1950      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1951                                                 operands[hi]));
1952    else
1953      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1954                                                 operands[hi]));
1955    DONE;
1956  }
1957 )
1958
1959 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1960   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1961         (vec_concat:<VNARROWQ2>
1962           (truncate:<VNARROWQ>
1963             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1964               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1965           (truncate:<VNARROWQ>
1966             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1967               (match_dup 2)))))]
1968   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1969   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1970   [(set_attr "type" "neon_permute<q>")]
1971 )
1972
1973 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1974   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1975         (vec_concat:<VNARROWQ2>
1976           (truncate:<VNARROWQ>
1977             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1978               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1979           (truncate:<VNARROWQ>
1980             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1981               (match_dup 2)))))]
1982   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1983   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1984   [(set_attr "type" "neon_permute<q>")]
1985 )
1986
1987 ;; Widening operations.
1988
1989 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1990   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1991         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1992                                (match_operand:VQW 1 "register_operand" "w")
1993                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1994                             )))]
1995   "TARGET_SIMD"
1996   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1997   [(set_attr "type" "neon_shift_imm_long")]
1998 )
1999
2000 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
2001   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2002         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2003                                (match_operand:VQW 1 "register_operand" "w")
2004                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
2005                             )))]
2006   "TARGET_SIMD"
2007   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
2008   [(set_attr "type" "neon_shift_imm_long")]
2009 )
2010
2011 (define_expand "vec_unpack<su>_hi_<mode>"
2012   [(match_operand:<VWIDE> 0 "register_operand")
2013    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2014   "TARGET_SIMD"
2015   {
2016     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2017     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
2018                                                           operands[1], p));
2019     DONE;
2020   }
2021 )
2022
2023 (define_expand "vec_unpack<su>_lo_<mode>"
2024   [(match_operand:<VWIDE> 0 "register_operand")
2025    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2026   "TARGET_SIMD"
2027   {
2028     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2029     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
2030                                                           operands[1], p));
2031     DONE;
2032   }
2033 )
2034
2035 ;; Widening arithmetic.
2036
2037 (define_insn "*aarch64_<su>mlal_lo<mode>"
2038   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2039         (plus:<VWIDE>
2040           (mult:<VWIDE>
2041               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2042                  (match_operand:VQW 2 "register_operand" "w")
2043                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2044               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2045                  (match_operand:VQW 4 "register_operand" "w")
2046                  (match_dup 3))))
2047           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2048   "TARGET_SIMD"
2049   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2050   [(set_attr "type" "neon_mla_<Vetype>_long")]
2051 )
2052
2053 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2054   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2055         (plus:<VWIDE>
2056           (mult:<VWIDE>
2057               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2058                  (match_operand:VQW 2 "register_operand" "w")
2059                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2060               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2061                  (match_operand:VQW 4 "register_operand" "w")
2062                  (match_dup 3))))
2063           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2064   "TARGET_SIMD"
2065   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2066   [(set_attr "type" "neon_mla_<Vetype>_long")]
2067 )
2068
2069 (define_expand "aarch64_<su>mlal_hi<mode>"
2070   [(match_operand:<VWIDE> 0 "register_operand")
2071    (match_operand:<VWIDE> 1 "register_operand")
2072    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2073    (match_operand:VQW 3 "register_operand")]
2074   "TARGET_SIMD"
2075 {
2076   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2077   emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2078                                                  operands[2], p, operands[3]));
2079   DONE;
2080 }
2081 )
2082
2083 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2084   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2085         (plus:<VWIDE>
2086           (mult:<VWIDE>
2087             (ANY_EXTEND:<VWIDE>
2088               (vec_select:<VHALF>
2089                 (match_operand:VQ_HSI 2 "register_operand" "w")
2090                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2091             (vec_duplicate:<VWIDE>
2092               (ANY_EXTEND:<VWIDE_S>
2093                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2094           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2095   "TARGET_SIMD"
2096   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2097   [(set_attr "type" "neon_mla_<Vetype>_long")]
2098 )
2099
2100 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2101   [(match_operand:<VWIDE> 0 "register_operand")
2102    (match_operand:<VWIDE> 1 "register_operand")
2103    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2104    (match_operand:<VEL> 3 "register_operand")]
2105   "TARGET_SIMD"
2106 {
2107   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108   emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2109              operands[1], operands[2], p, operands[3]));
2110   DONE;
2111 }
2112 )
2113
2114 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2115   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2116         (minus:<VWIDE>
2117           (match_operand:<VWIDE> 1 "register_operand" "0")
2118           (mult:<VWIDE>
2119               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2120                  (match_operand:VQW 2 "register_operand" "w")
2121                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2122               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2123                  (match_operand:VQW 4 "register_operand" "w")
2124                  (match_dup 3))))))]
2125   "TARGET_SIMD"
2126   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2127   [(set_attr "type" "neon_mla_<Vetype>_long")]
2128 )
2129
2130 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2131   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2132         (minus:<VWIDE>
2133           (match_operand:<VWIDE> 1 "register_operand" "0")
2134           (mult:<VWIDE>
2135               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2136                  (match_operand:VQW 2 "register_operand" "w")
2137                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2138               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2139                  (match_operand:VQW 4 "register_operand" "w")
2140                  (match_dup 3))))))]
2141   "TARGET_SIMD"
2142   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2143   [(set_attr "type" "neon_mla_<Vetype>_long")]
2144 )
2145
2146 (define_expand "aarch64_<su>mlsl_hi<mode>"
2147   [(match_operand:<VWIDE> 0 "register_operand")
2148    (match_operand:<VWIDE> 1 "register_operand")
2149    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2150    (match_operand:VQW 3 "register_operand")]
2151   "TARGET_SIMD"
2152 {
2153   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2154   emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2155                                                  operands[2], p, operands[3]));
2156   DONE;
2157 }
2158 )
2159
2160 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2161   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2162         (minus:<VWIDE>
2163           (match_operand:<VWIDE> 1 "register_operand" "0")
2164           (mult:<VWIDE>
2165             (ANY_EXTEND:<VWIDE>
2166               (vec_select:<VHALF>
2167                 (match_operand:VQ_HSI 2 "register_operand" "w")
2168                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2169             (vec_duplicate:<VWIDE>
2170               (ANY_EXTEND:<VWIDE_S>
2171                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2172   "TARGET_SIMD"
2173   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2174   [(set_attr "type" "neon_mla_<Vetype>_long")]
2175 )
2176
2177 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2178   [(match_operand:<VWIDE> 0 "register_operand")
2179    (match_operand:<VWIDE> 1 "register_operand")
2180    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2181    (match_operand:<VEL> 3 "register_operand")]
2182   "TARGET_SIMD"
2183 {
2184   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2185   emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2186              operands[1], operands[2], p, operands[3]));
2187   DONE;
2188 }
2189 )
2190
2191 (define_insn "aarch64_<su>mlal<mode>"
2192   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2193         (plus:<VWIDE>
2194           (mult:<VWIDE>
2195             (ANY_EXTEND:<VWIDE>
2196               (match_operand:VD_BHSI 2 "register_operand" "w"))
2197             (ANY_EXTEND:<VWIDE>
2198               (match_operand:VD_BHSI 3 "register_operand" "w")))
2199           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2200   "TARGET_SIMD"
2201   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2202   [(set_attr "type" "neon_mla_<Vetype>_long")]
2203 )
2204
2205 (define_insn "aarch64_<su>mlal_n<mode>"
2206   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2207         (plus:<VWIDE>
2208           (mult:<VWIDE>
2209             (ANY_EXTEND:<VWIDE>
2210               (match_operand:VD_HSI 2 "register_operand" "w"))
2211             (vec_duplicate:<VWIDE>
2212               (ANY_EXTEND:<VWIDE_S>
2213                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2214           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2215   "TARGET_SIMD"
2216   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2217   [(set_attr "type" "neon_mla_<Vetype>_long")]
2218 )
2219
2220 (define_insn "aarch64_<su>mlsl<mode>"
2221   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2222         (minus:<VWIDE>
2223           (match_operand:<VWIDE> 1 "register_operand" "0")
2224           (mult:<VWIDE>
2225             (ANY_EXTEND:<VWIDE>
2226               (match_operand:VD_BHSI 2 "register_operand" "w"))
2227             (ANY_EXTEND:<VWIDE>
2228               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2229   "TARGET_SIMD"
2230   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2231   [(set_attr "type" "neon_mla_<Vetype>_long")]
2232 )
2233
2234 (define_insn "aarch64_<su>mlsl_n<mode>"
2235   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2236         (minus:<VWIDE>
2237           (match_operand:<VWIDE> 1 "register_operand" "0")
2238           (mult:<VWIDE>
2239             (ANY_EXTEND:<VWIDE>
2240               (match_operand:VD_HSI 2 "register_operand" "w"))
2241             (vec_duplicate:<VWIDE>
2242               (ANY_EXTEND:<VWIDE_S>
2243                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2244   "TARGET_SIMD"
2245   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2246   [(set_attr "type" "neon_mla_<Vetype>_long")]
2247 )
2248
2249 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2250  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2251        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2252                            (match_operand:VQW 1 "register_operand" "w")
2253                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2254                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2255                            (match_operand:VQW 2 "register_operand" "w")
2256                            (match_dup 3)))))]
2257   "TARGET_SIMD"
2258   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2259   [(set_attr "type" "neon_mul_<Vetype>_long")]
2260 )
2261
2262 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2263   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2264         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2265                          (match_operand:VD_BHSI 1 "register_operand" "w"))
2266                       (ANY_EXTEND:<VWIDE>
2267                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2268   "TARGET_SIMD"
2269   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2270   [(set_attr "type" "neon_mul_<Vetype>_long")]
2271 )
2272
2273 (define_expand "vec_widen_<su>mult_lo_<mode>"
2274   [(match_operand:<VWIDE> 0 "register_operand")
2275    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2276    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2277  "TARGET_SIMD"
2278  {
2279    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2280    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2281                                                        operands[1],
2282                                                        operands[2], p));
2283    DONE;
2284  }
2285 )
2286
2287 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2288  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2289       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2290                             (match_operand:VQW 1 "register_operand" "w")
2291                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2292                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2293                             (match_operand:VQW 2 "register_operand" "w")
2294                             (match_dup 3)))))]
2295   "TARGET_SIMD"
2296   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2297   [(set_attr "type" "neon_mul_<Vetype>_long")]
2298 )
2299
2300 (define_expand "vec_widen_<su>mult_hi_<mode>"
2301   [(match_operand:<VWIDE> 0 "register_operand")
2302    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2303    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2304  "TARGET_SIMD"
2305  {
2306    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2307    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2308                                                        operands[1],
2309                                                        operands[2], p));
2310    DONE;
2311
2312  }
2313 )
2314
2315 ;; vmull_lane_s16 intrinsics
2316 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2317   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2318         (mult:<VWIDE>
2319           (ANY_EXTEND:<VWIDE>
2320             (match_operand:<VCOND> 1 "register_operand" "w"))
2321           (vec_duplicate:<VWIDE>
2322             (ANY_EXTEND:<VWIDE_S>
2323               (vec_select:<VEL>
2324                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2325                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2326   "TARGET_SIMD"
2327   {
2328     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2329     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2330   }
2331   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2332 )
2333
2334 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2335   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2336         (mult:<VWIDE>
2337           (ANY_EXTEND:<VWIDE>
2338             (vec_select:<VHALF>
2339               (match_operand:VQ_HSI 1 "register_operand" "w")
2340               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2341           (vec_duplicate:<VWIDE>
2342             (ANY_EXTEND:<VWIDE_S>
2343               (vec_select:<VEL>
2344                 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2345                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2346   "TARGET_SIMD"
2347   {
2348     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2349     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2350   }
2351   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2352 )
2353
2354 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2355   [(match_operand:<VWIDE> 0 "register_operand")
2356    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2357    (match_operand:<VCOND> 2 "register_operand")
2358    (match_operand:SI 3 "immediate_operand")]
2359   "TARGET_SIMD"
2360 {
2361   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2362   emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2363              operands[1], p, operands[2], operands[3]));
2364   DONE;
2365 }
2366 )
2367
2368 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2369   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2370         (mult:<VWIDE>
2371           (ANY_EXTEND:<VWIDE>
2372             (vec_select:<VHALF>
2373               (match_operand:VQ_HSI 1 "register_operand" "w")
2374               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2375           (vec_duplicate:<VWIDE>
2376             (ANY_EXTEND:<VWIDE_S>
2377               (vec_select:<VEL>
2378                 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2379                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2380   "TARGET_SIMD"
2381   {
2382     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2383     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2384   }
2385   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2386 )
2387
2388 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2389   [(match_operand:<VWIDE> 0 "register_operand")
2390    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2391    (match_operand:<VCONQ> 2 "register_operand")
2392    (match_operand:SI 3 "immediate_operand")]
2393   "TARGET_SIMD"
2394 {
2395   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2396   emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2397              operands[1], p, operands[2], operands[3]));
2398   DONE;
2399 }
2400 )
2401
2402 (define_insn "aarch64_<su>mull_n<mode>"
2403   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2404         (mult:<VWIDE>
2405           (ANY_EXTEND:<VWIDE>
2406             (match_operand:VD_HSI 1 "register_operand" "w"))
2407           (vec_duplicate:<VWIDE>
2408             (ANY_EXTEND:<VWIDE_S>
2409               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2410   "TARGET_SIMD"
2411   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2412   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2413 )
2414
2415 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2416   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2417         (mult:<VWIDE>
2418           (ANY_EXTEND:<VWIDE>
2419             (vec_select:<VHALF>
2420               (match_operand:VQ_HSI 1 "register_operand" "w")
2421               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2422           (vec_duplicate:<VWIDE>
2423             (ANY_EXTEND:<VWIDE_S>
2424               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2425   "TARGET_SIMD"
2426   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2427   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2428 )
2429
2430 (define_expand "aarch64_<su>mull_hi_n<mode>"
2431   [(match_operand:<VWIDE> 0 "register_operand")
2432    (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2433    (match_operand:<VEL> 2 "register_operand")]
2434  "TARGET_SIMD"
2435  {
2436    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2437    emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2438                                                     operands[2], p));
2439    DONE;
2440  }
2441 )
2442
2443 ;; vmlal_lane_s16 intrinsics
2444 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2445   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2446         (plus:<VWIDE>
2447           (mult:<VWIDE>
2448             (ANY_EXTEND:<VWIDE>
2449               (match_operand:<VCOND> 2 "register_operand" "w"))
2450             (vec_duplicate:<VWIDE>
2451               (ANY_EXTEND:<VWIDE_S>
2452                 (vec_select:<VEL>
2453                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
2454                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2455           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2456   "TARGET_SIMD"
2457   {
2458     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2459     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2460   }
2461   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2462 )
2463
2464 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2465   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2466         (plus:<VWIDE>
2467           (mult:<VWIDE>
2468             (ANY_EXTEND:<VWIDE>
2469               (vec_select:<VHALF>
2470                 (match_operand:VQ_HSI 2 "register_operand" "w")
2471                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2472             (vec_duplicate:<VWIDE>
2473               (ANY_EXTEND:<VWIDE_S>
2474                 (vec_select:<VEL>
2475                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2476                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2477           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2478   "TARGET_SIMD"
2479   {
2480     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2481     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2482   }
2483   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2484 )
2485
2486 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2487   [(match_operand:<VWIDE> 0 "register_operand")
2488    (match_operand:<VWIDE> 1 "register_operand")
2489    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2490    (match_operand:<VCOND> 3 "register_operand")
2491    (match_operand:SI 4 "immediate_operand")]
2492   "TARGET_SIMD"
2493 {
2494   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2495   emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2496              operands[1], operands[2], p, operands[3], operands[4]));
2497   DONE;
2498 }
2499 )
2500
2501 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2502   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2503         (plus:<VWIDE>
2504           (mult:<VWIDE>
2505             (ANY_EXTEND:<VWIDE>
2506               (vec_select:<VHALF>
2507                 (match_operand:VQ_HSI 2 "register_operand" "w")
2508                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2509             (vec_duplicate:<VWIDE>
2510               (ANY_EXTEND:<VWIDE_S>
2511                 (vec_select:<VEL>
2512                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2513                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2514           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2515   "TARGET_SIMD"
2516   {
2517     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2518     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2519   }
2520   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2521 )
2522
2523 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2524   [(match_operand:<VWIDE> 0 "register_operand")
2525    (match_operand:<VWIDE> 1 "register_operand")
2526    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2527    (match_operand:<VCONQ> 3 "register_operand")
2528    (match_operand:SI 4 "immediate_operand")]
2529   "TARGET_SIMD"
2530 {
2531   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2532   emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2533              operands[1], operands[2], p, operands[3], operands[4]));
2534   DONE;
2535 }
2536 )
2537
2538 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2539   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2540    (minus:<VWIDE>
2541      (match_operand:<VWIDE> 1 "register_operand" "0")
2542      (mult:<VWIDE>
2543        (ANY_EXTEND:<VWIDE>
2544          (match_operand:<VCOND> 2 "register_operand" "w"))
2545        (vec_duplicate:<VWIDE>
2546          (ANY_EXTEND:<VWIDE_S>
2547            (vec_select:<VEL>
2548              (match_operand:VDQHS 3 "register_operand" "<vwx>")
2549              (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2550   "TARGET_SIMD"
2551   {
2552     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2553     return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2554   }
2555   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2556 )
2557
2558 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2559   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2560         (minus:<VWIDE>
2561           (match_operand:<VWIDE> 1 "register_operand" "0")
2562           (mult:<VWIDE>
2563             (ANY_EXTEND:<VWIDE>
2564               (vec_select:<VHALF>
2565                 (match_operand:VQ_HSI 2 "register_operand" "w")
2566                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2567             (vec_duplicate:<VWIDE>
2568               (ANY_EXTEND:<VWIDE_S>
2569                 (vec_select:<VEL>
2570                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2571                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2572           )))]
2573   "TARGET_SIMD"
2574   {
2575     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2576     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2577   }
2578   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2579 )
2580
2581 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2582   [(match_operand:<VWIDE> 0 "register_operand")
2583    (match_operand:<VWIDE> 1 "register_operand")
2584    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2585    (match_operand:<VCOND> 3 "register_operand")
2586    (match_operand:SI 4 "immediate_operand")]
2587   "TARGET_SIMD"
2588 {
2589   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2590   emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2591              operands[1], operands[2], p, operands[3], operands[4]));
2592   DONE;
2593 }
2594 )
2595
2596 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2597   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2598         (minus:<VWIDE>
2599           (match_operand:<VWIDE> 1 "register_operand" "0")
2600           (mult:<VWIDE>
2601             (ANY_EXTEND:<VWIDE>
2602               (vec_select:<VHALF>
2603                 (match_operand:VQ_HSI 2 "register_operand" "w")
2604                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2605             (vec_duplicate:<VWIDE>
2606               (ANY_EXTEND:<VWIDE_S>
2607                 (vec_select:<VEL>
2608                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2609                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2610           )))]
2611   "TARGET_SIMD"
2612   {
2613     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2614     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2615   }
2616   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2617 )
2618
2619 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2620   [(match_operand:<VWIDE> 0 "register_operand")
2621    (match_operand:<VWIDE> 1 "register_operand")
2622    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2623    (match_operand:<VCONQ> 3 "register_operand")
2624    (match_operand:SI 4 "immediate_operand")]
2625   "TARGET_SIMD"
2626 {
2627   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2628   emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2629              operands[1], operands[2], p, operands[3], operands[4]));
2630   DONE;
2631 }
2632 )
2633
2634 ;; FP vector operations.
2635 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2636 ;; double-precision (64-bit) floating-point data types and arithmetic as
2637 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
2638 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2639 ;;
2640 ;; Floating-point operations can raise an exception.  Vectorizing such
2641 ;; operations are safe because of reasons explained below.
2642 ;;
2643 ;; ARMv8 permits an extension to enable trapped floating-point
2644 ;; exception handling, however this is an optional feature.  In the
2645 ;; event of a floating-point exception being raised by vectorised
2646 ;; code then:
2647 ;; 1.  If trapped floating-point exceptions are available, then a trap
2648 ;;     will be taken when any lane raises an enabled exception.  A trap
2649 ;;     handler may determine which lane raised the exception.
2650 ;; 2.  Alternatively a sticky exception flag is set in the
2651 ;;     floating-point status register (FPSR).  Software may explicitly
2652 ;;     test the exception flags, in which case the tests will either
2653 ;;     prevent vectorisation, allowing precise identification of the
2654 ;;     failing operation, or if tested outside of vectorisable regions
2655 ;;     then the specific operation and lane are not of interest.
2656
2657 ;; FP arithmetic operations.
2658
2659 (define_insn "add<mode>3<vczle><vczbe>"
2660  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2661        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2662                    (match_operand:VHSDF 2 "register_operand" "w")))]
2663  "TARGET_SIMD"
2664  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2665   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2666 )
2667
2668 (define_insn "sub<mode>3<vczle><vczbe>"
2669  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2670        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2671                     (match_operand:VHSDF 2 "register_operand" "w")))]
2672  "TARGET_SIMD"
2673  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2674   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2675 )
2676
2677 (define_insn "mul<mode>3<vczle><vczbe>"
2678  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2679        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2680                    (match_operand:VHSDF 2 "register_operand" "w")))]
2681  "TARGET_SIMD"
2682  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2683   [(set_attr "type" "neon_fp_mul_<stype><q>")]
2684 )
2685
2686 (define_expand "div<mode>3"
2687  [(set (match_operand:VHSDF 0 "register_operand")
2688        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2689                   (match_operand:VHSDF 2 "register_operand")))]
2690  "TARGET_SIMD"
2691 {
2692   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2693     DONE;
2694
2695   operands[1] = force_reg (<MODE>mode, operands[1]);
2696 })
2697
2698 (define_insn "*div<mode>3<vczle><vczbe>"
2699  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2700        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2701                  (match_operand:VHSDF 2 "register_operand" "w")))]
2702  "TARGET_SIMD"
2703  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2704   [(set_attr "type" "neon_fp_div_<stype><q>")]
2705 )
2706
2707 ;; SVE has vector integer divisions, unlike Advanced SIMD.
2708 ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
2709 ;; optabs to the midend.
2710 (define_expand "<su_optab>div<mode>3"
2711   [(set (match_operand:VQDIV 0 "register_operand")
2712         (ANY_DIV:VQDIV
2713           (match_operand:VQDIV 1 "register_operand")
2714           (match_operand:VQDIV 2 "register_operand")))]
2715   "TARGET_SVE"
2716   {
2717     machine_mode sve_mode
2718       = aarch64_full_sve_mode (GET_MODE_INNER (<MODE>mode)).require ();
2719     rtx sve_op0 = simplify_gen_subreg (sve_mode, operands[0], <MODE>mode, 0);
2720     rtx sve_op1 = simplify_gen_subreg (sve_mode, operands[1], <MODE>mode, 0);
2721     rtx sve_op2 = simplify_gen_subreg (sve_mode, operands[2], <MODE>mode, 0);
2722
2723     emit_insn (gen_<su_optab>div<vnx>3 (sve_op0, sve_op1, sve_op2));
2724     DONE;
2725   }
2726 )
2727
2728 (define_insn "neg<mode>2<vczle><vczbe>"
2729  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2730        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2731  "TARGET_SIMD"
2732  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2733   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2734 )
2735
2736 (define_insn "abs<mode>2<vczle><vczbe>"
2737  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2738        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2739  "TARGET_SIMD"
2740  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2741   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2742 )
2743
2744 (define_expand "aarch64_float_mla<mode>"
2745   [(set (match_operand:VDQF_DF 0 "register_operand")
2746         (plus:VDQF_DF
2747           (mult:VDQF_DF
2748             (match_operand:VDQF_DF 2 "register_operand")
2749             (match_operand:VDQF_DF 3 "register_operand"))
2750           (match_operand:VDQF_DF 1 "register_operand")))]
2751   "TARGET_SIMD"
2752   {
2753     rtx scratch = gen_reg_rtx (<MODE>mode);
2754     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2755     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2756     DONE;
2757   }
2758 )
2759
2760 (define_expand "aarch64_float_mls<mode>"
2761   [(set (match_operand:VDQF_DF 0 "register_operand")
2762         (minus:VDQF_DF
2763           (match_operand:VDQF_DF 1 "register_operand")
2764           (mult:VDQF_DF
2765             (match_operand:VDQF_DF 2 "register_operand")
2766             (match_operand:VDQF_DF 3 "register_operand"))))]
2767   "TARGET_SIMD"
2768   {
2769     rtx scratch = gen_reg_rtx (<MODE>mode);
2770     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2771     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2772     DONE;
2773   }
2774 )
2775
2776 (define_expand "aarch64_float_mla_n<mode>"
2777   [(set (match_operand:VDQSF 0 "register_operand")
2778         (plus:VDQSF
2779           (mult:VDQSF
2780             (vec_duplicate:VDQSF
2781               (match_operand:<VEL> 3 "register_operand"))
2782             (match_operand:VDQSF 2 "register_operand"))
2783           (match_operand:VDQSF 1 "register_operand")))]
2784   "TARGET_SIMD"
2785   {
2786     rtx scratch = gen_reg_rtx (<MODE>mode);
2787     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2788     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2789     DONE;
2790   }
2791 )
2792
2793 (define_expand "aarch64_float_mls_n<mode>"
2794   [(set (match_operand:VDQSF 0 "register_operand")
2795         (minus:VDQSF
2796           (match_operand:VDQSF 1 "register_operand")
2797           (mult:VDQSF
2798             (vec_duplicate:VDQSF
2799               (match_operand:<VEL> 3 "register_operand"))
2800             (match_operand:VDQSF 2 "register_operand"))))]
2801   "TARGET_SIMD"
2802   {
2803     rtx scratch = gen_reg_rtx (<MODE>mode);
2804     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2805     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2806     DONE;
2807   }
2808 )
2809
2810 (define_expand "aarch64_float_mla_lane<mode>"
2811   [(set (match_operand:VDQSF 0 "register_operand")
2812         (plus:VDQSF
2813           (mult:VDQSF
2814             (vec_duplicate:VDQSF
2815               (vec_select:<VEL>
2816                 (match_operand:V2SF 3 "register_operand")
2817                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2818             (match_operand:VDQSF 2 "register_operand"))
2819           (match_operand:VDQSF 1 "register_operand")))]
2820   "TARGET_SIMD"
2821   {
2822     rtx scratch = gen_reg_rtx (<MODE>mode);
2823     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2824                                     operands[3], operands[4]));
2825     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2826     DONE;
2827   }
2828 )
2829
2830 (define_expand "aarch64_float_mls_lane<mode>"
2831   [(set (match_operand:VDQSF 0 "register_operand")
2832         (minus:VDQSF
2833           (match_operand:VDQSF 1 "register_operand")
2834           (mult:VDQSF
2835             (vec_duplicate:VDQSF
2836               (vec_select:<VEL>
2837                 (match_operand:V2SF 3 "register_operand")
2838                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2839             (match_operand:VDQSF 2 "register_operand"))))]
2840   "TARGET_SIMD"
2841   {
2842     rtx scratch = gen_reg_rtx (<MODE>mode);
2843     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2844                                     operands[3], operands[4]));
2845     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2846     DONE;
2847   }
2848 )
2849
2850 (define_expand "aarch64_float_mla_laneq<mode>"
2851   [(set (match_operand:VDQSF 0 "register_operand")
2852         (plus:VDQSF
2853           (mult:VDQSF
2854             (vec_duplicate:VDQSF
2855               (vec_select:<VEL>
2856                 (match_operand:V4SF 3 "register_operand")
2857                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2858             (match_operand:VDQSF 2 "register_operand"))
2859           (match_operand:VDQSF 1 "register_operand")))]
2860   "TARGET_SIMD"
2861   {
2862     rtx scratch = gen_reg_rtx (<MODE>mode);
2863     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2864                                      operands[3], operands[4]));
2865     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2866     DONE;
2867   }
2868 )
2869
2870 (define_expand "aarch64_float_mls_laneq<mode>"
2871   [(set (match_operand:VDQSF 0 "register_operand")
2872         (minus:VDQSF
2873           (match_operand:VDQSF 1 "register_operand")
2874           (mult:VDQSF
2875             (vec_duplicate:VDQSF
2876               (vec_select:<VEL>
2877                 (match_operand:V4SF 3 "register_operand")
2878                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2879             (match_operand:VDQSF 2 "register_operand"))))]
2880   "TARGET_SIMD"
2881   {
2882     rtx scratch = gen_reg_rtx (<MODE>mode);
2883     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2884                                      operands[3], operands[4]));
2885     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2886     DONE;
2887   }
2888 )
2889
2890 (define_insn "fma<mode>4<vczle><vczbe>"
2891   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2892        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2893                   (match_operand:VHSDF 2 "register_operand" "w")
2894                   (match_operand:VHSDF 3 "register_operand" "0")))]
2895   "TARGET_SIMD"
2896  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2897   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2898 )
2899
2900 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2901   [(set (match_operand:VDQF 0 "register_operand" "=w")
2902     (fma:VDQF
2903       (vec_duplicate:VDQF
2904         (vec_select:<VEL>
2905           (match_operand:VDQF 1 "register_operand" "<h_con>")
2906           (parallel [(match_operand:SI 2 "immediate_operand")])))
2907       (match_operand:VDQF 3 "register_operand" "w")
2908       (match_operand:VDQF 4 "register_operand" "0")))]
2909   "TARGET_SIMD"
2910   {
2911     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2912     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2913   }
2914   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2915 )
2916
2917 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2918   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2919     (fma:VDQSF
2920       (vec_duplicate:VDQSF
2921         (vec_select:<VEL>
2922           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2923           (parallel [(match_operand:SI 2 "immediate_operand")])))
2924       (match_operand:VDQSF 3 "register_operand" "w")
2925       (match_operand:VDQSF 4 "register_operand" "0")))]
2926   "TARGET_SIMD"
2927   {
2928     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2929     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2930   }
2931   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2932 )
2933
2934 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2935   [(set (match_operand:VMUL 0 "register_operand" "=w")
2936     (fma:VMUL
2937       (vec_duplicate:VMUL
2938           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2939       (match_operand:VMUL 2 "register_operand" "w")
2940       (match_operand:VMUL 3 "register_operand" "0")))]
2941   "TARGET_SIMD"
2942   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2943   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2944 )
2945
2946 (define_insn "*aarch64_fma4_elt_to_64v2df"
2947   [(set (match_operand:DF 0 "register_operand" "=w")
2948     (fma:DF
2949         (vec_select:DF
2950           (match_operand:V2DF 1 "register_operand" "w")
2951           (parallel [(match_operand:SI 2 "immediate_operand")]))
2952       (match_operand:DF 3 "register_operand" "w")
2953       (match_operand:DF 4 "register_operand" "0")))]
2954   "TARGET_SIMD"
2955   {
2956     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2957     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2958   }
2959   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2960 )
2961
2962 (define_insn "fnma<mode>4<vczle><vczbe>"
2963   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2964         (fma:VHSDF
2965           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2966           (match_operand:VHSDF 2 "register_operand" "w")
2967           (match_operand:VHSDF 3 "register_operand" "0")))]
2968   "TARGET_SIMD"
2969   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2970   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2971 )
2972
2973 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2974   [(set (match_operand:VDQF 0 "register_operand" "=w")
2975     (fma:VDQF
2976       (neg:VDQF
2977         (match_operand:VDQF 3 "register_operand" "w"))
2978       (vec_duplicate:VDQF
2979         (vec_select:<VEL>
2980           (match_operand:VDQF 1 "register_operand" "<h_con>")
2981           (parallel [(match_operand:SI 2 "immediate_operand")])))
2982       (match_operand:VDQF 4 "register_operand" "0")))]
2983   "TARGET_SIMD"
2984   {
2985     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2986     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2987   }
2988   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2989 )
2990
2991 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2992   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2993     (fma:VDQSF
2994       (neg:VDQSF
2995         (match_operand:VDQSF 3 "register_operand" "w"))
2996       (vec_duplicate:VDQSF
2997         (vec_select:<VEL>
2998           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2999           (parallel [(match_operand:SI 2 "immediate_operand")])))
3000       (match_operand:VDQSF 4 "register_operand" "0")))]
3001   "TARGET_SIMD"
3002   {
3003     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3004     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3005   }
3006   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3007 )
3008
3009 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
3010   [(set (match_operand:VMUL 0 "register_operand" "=w")
3011     (fma:VMUL
3012       (neg:VMUL
3013         (match_operand:VMUL 2 "register_operand" "w"))
3014       (vec_duplicate:VMUL
3015         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
3016       (match_operand:VMUL 3 "register_operand" "0")))]
3017   "TARGET_SIMD"
3018   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
3019   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3020 )
3021
3022 (define_insn "*aarch64_fnma4_elt_to_64v2df"
3023   [(set (match_operand:DF 0 "register_operand" "=w")
3024     (fma:DF
3025       (vec_select:DF
3026         (match_operand:V2DF 1 "register_operand" "w")
3027         (parallel [(match_operand:SI 2 "immediate_operand")]))
3028       (neg:DF
3029         (match_operand:DF 3 "register_operand" "w"))
3030       (match_operand:DF 4 "register_operand" "0")))]
3031   "TARGET_SIMD"
3032   {
3033     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3034     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
3035   }
3036   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3037 )
3038
3039 ;; Vector versions of the floating-point frint patterns.
3040 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3041 (define_insn "<frint_pattern><mode>2<vczle><vczbe>"
3042   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3043         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3044                        FRINT))]
3045   "TARGET_SIMD"
3046   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
3047   [(set_attr "type" "neon_fp_round_<stype><q>")]
3048 )
3049
3050 ;; Vector versions of the fcvt standard patterns.
3051 ;; Expands to lbtrunc, lround, lceil, lfloor
3052 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
3053   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3054         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3055                                [(match_operand:VHSDF 1 "register_operand" "w")]
3056                                FCVT)))]
3057   "TARGET_SIMD"
3058   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
3059   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3060 )
3061
3062 ;; HF Scalar variants of related SIMD instructions.
3063 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3064   [(set (match_operand:HI 0 "register_operand" "=w")
3065         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3066                       FCVT)))]
3067   "TARGET_SIMD_F16INST"
3068   "fcvt<frint_suffix><su>\t%h0, %h1"
3069   [(set_attr "type" "neon_fp_to_int_s")]
3070 )
3071
3072 (define_insn "<optab>_trunchfhi2"
3073   [(set (match_operand:HI 0 "register_operand" "=w")
3074         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3075   "TARGET_SIMD_F16INST"
3076   "fcvtz<su>\t%h0, %h1"
3077   [(set_attr "type" "neon_fp_to_int_s")]
3078 )
3079
3080 (define_insn "<optab>hihf2"
3081   [(set (match_operand:HF 0 "register_operand" "=w")
3082         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3083   "TARGET_SIMD_F16INST"
3084   "<su_optab>cvtf\t%h0, %h1"
3085   [(set_attr "type" "neon_int_to_fp_s")]
3086 )
3087
3088 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3089   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3090         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3091                                [(mult:VDQF
3092          (match_operand:VDQF 1 "register_operand" "w")
3093          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3094                                UNSPEC_FRINTZ)))]
3095   "TARGET_SIMD
3096    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3097                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3098   {
3099     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3100     char buf[64];
3101     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3102     output_asm_insn (buf, operands);
3103     return "";
3104   }
3105   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3106 )
3107
3108 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3109   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3110         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3111                                [(match_operand:VHSDF 1 "register_operand")]
3112                                 UNSPEC_FRINTZ)))]
3113   "TARGET_SIMD"
3114   {})
3115
3116 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3117   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3118         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3119                                [(match_operand:VHSDF 1 "register_operand")]
3120                                 UNSPEC_FRINTZ)))]
3121   "TARGET_SIMD"
3122   {})
3123
3124 (define_expand "ftrunc<VHSDF:mode>2"
3125   [(set (match_operand:VHSDF 0 "register_operand")
3126         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3127                        UNSPEC_FRINTZ))]
3128   "TARGET_SIMD"
3129   {})
3130
3131 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3132   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3133         (FLOATUORS:VHSDF
3134           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3135   "TARGET_SIMD"
3136   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3137   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3138 )
3139
3140 ;; Conversions between vectors of floats and doubles.
3141 ;; Contains a mix of patterns to match standard pattern names
3142 ;; and those for intrinsics.
3143
3144 ;; Float widening operations.
3145
3146 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3147   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3148         (float_extend:<VWIDE> (vec_select:<VHALF>
3149                                (match_operand:VQ_HSF 1 "register_operand" "w")
3150                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3151                             )))]
3152   "TARGET_SIMD"
3153   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3154   [(set_attr "type" "neon_fp_cvt_widen_s")]
3155 )
3156
3157 ;; Convert between fixed-point and floating-point (vector modes)
3158
3159 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3160   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3161         (unspec:<VHSDF:FCVT_TARGET>
3162           [(match_operand:VHSDF 1 "register_operand" "w")
3163            (match_operand:SI 2 "immediate_operand" "i")]
3164          FCVT_F2FIXED))]
3165   "TARGET_SIMD"
3166   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3167   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3168 )
3169
3170 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3171   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3172         (unspec:<VDQ_HSDI:FCVT_TARGET>
3173           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3174            (match_operand:SI 2 "immediate_operand" "i")]
3175          FCVT_FIXED2F))]
3176   "TARGET_SIMD"
3177   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3178   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3179 )
3180
3181 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3182 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3183 ;; the meaning of HI and LO changes depending on the target endianness.
3184 ;; While elsewhere we map the higher numbered elements of a vector to
3185 ;; the lower architectural lanes of the vector, for these patterns we want
3186 ;; to always treat "hi" as referring to the higher architectural lanes.
3187 ;; Consequently, while the patterns below look inconsistent with our
3188 ;; other big-endian patterns their behavior is as required.
3189
3190 (define_expand "vec_unpacks_lo_<mode>"
3191   [(match_operand:<VWIDE> 0 "register_operand")
3192    (match_operand:VQ_HSF 1 "register_operand")]
3193   "TARGET_SIMD"
3194   {
3195     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3196     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3197                                                        operands[1], p));
3198     DONE;
3199   }
3200 )
3201
3202 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3203   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3204         (float_extend:<VWIDE> (vec_select:<VHALF>
3205                                (match_operand:VQ_HSF 1 "register_operand" "w")
3206                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3207                             )))]
3208   "TARGET_SIMD"
3209   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3210   [(set_attr "type" "neon_fp_cvt_widen_s")]
3211 )
3212
3213 (define_expand "vec_unpacks_hi_<mode>"
3214   [(match_operand:<VWIDE> 0 "register_operand")
3215    (match_operand:VQ_HSF 1 "register_operand")]
3216   "TARGET_SIMD"
3217   {
3218     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3219     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3220                                                        operands[1], p));
3221     DONE;
3222   }
3223 )
3224 (define_insn "aarch64_float_extend_lo_<Vwide>"
3225   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3226         (float_extend:<VWIDE>
3227           (match_operand:VDF 1 "register_operand" "w")))]
3228   "TARGET_SIMD"
3229   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3230   [(set_attr "type" "neon_fp_cvt_widen_s")]
3231 )
3232
3233 ;; Float narrowing operations.
3234
3235 (define_insn "aarch64_float_trunc_rodd_df"
3236   [(set (match_operand:SF 0 "register_operand" "=w")
3237         (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3238                 UNSPEC_FCVTXN))]
3239   "TARGET_SIMD"
3240   "fcvtxn\\t%s0, %d1"
3241   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3242 )
3243
3244 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3245   [(set (match_operand:V2SF 0 "register_operand" "=w")
3246         (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3247                 UNSPEC_FCVTXN))]
3248   "TARGET_SIMD"
3249   "fcvtxn\\t%0.2s, %1.2d"
3250   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3251 )
3252
3253 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3254   [(set (match_operand:V4SF 0 "register_operand" "=w")
3255         (vec_concat:V4SF
3256           (match_operand:V2SF 1 "register_operand" "0")
3257           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3258                 UNSPEC_FCVTXN)))]
3259   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3260   "fcvtxn2\\t%0.4s, %2.2d"
3261   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3262 )
3263
3264 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3265   [(set (match_operand:V4SF 0 "register_operand" "=w")
3266         (vec_concat:V4SF
3267           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3268                 UNSPEC_FCVTXN)
3269           (match_operand:V2SF 1 "register_operand" "0")))]
3270   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3271   "fcvtxn2\\t%0.4s, %2.2d"
3272   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3273 )
3274
3275 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3276   [(match_operand:V4SF 0 "register_operand")
3277    (match_operand:V2SF 1 "register_operand")
3278    (match_operand:V2DF 2 "register_operand")]
3279   "TARGET_SIMD"
3280 {
3281   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3282                              ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3283                              : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3284   emit_insn (gen (operands[0], operands[1], operands[2]));
3285   DONE;
3286 }
3287 )
3288
3289 (define_insn "aarch64_float_truncate_lo_<mode><vczle><vczbe>"
3290   [(set (match_operand:VDF 0 "register_operand" "=w")
3291       (float_truncate:VDF
3292         (match_operand:<VWIDE> 1 "register_operand" "w")))]
3293   "TARGET_SIMD"
3294   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3295   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3296 )
3297
3298 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3299   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3300     (vec_concat:<VDBL>
3301       (match_operand:VDF 1 "register_operand" "0")
3302       (float_truncate:VDF
3303         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3304   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3305   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3306   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3307 )
3308
3309 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3310   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3311     (vec_concat:<VDBL>
3312       (float_truncate:VDF
3313         (match_operand:<VWIDE> 2 "register_operand" "w"))
3314       (match_operand:VDF 1 "register_operand" "0")))]
3315   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3316   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3317   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3318 )
3319
3320 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3321   [(match_operand:<VDBL> 0 "register_operand")
3322    (match_operand:VDF 1 "register_operand")
3323    (match_operand:<VWIDE> 2 "register_operand")]
3324   "TARGET_SIMD"
3325 {
3326   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3327                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3328                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3329   emit_insn (gen (operands[0], operands[1], operands[2]));
3330   DONE;
3331 }
3332 )
3333
3334 (define_expand "vec_pack_trunc_v2df"
3335   [(set (match_operand:V4SF 0 "register_operand")
3336       (vec_concat:V4SF
3337         (float_truncate:V2SF
3338             (match_operand:V2DF 1 "register_operand"))
3339         (float_truncate:V2SF
3340             (match_operand:V2DF 2 "register_operand"))
3341           ))]
3342   "TARGET_SIMD"
3343   {
3344     rtx tmp = gen_reg_rtx (V2SFmode);
3345     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3346     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3347
3348     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3349     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3350                                                    tmp, operands[hi]));
3351     DONE;
3352   }
3353 )
3354
3355 (define_expand "vec_pack_trunc_df"
3356   [(set (match_operand:V2SF 0 "register_operand")
3357         (vec_concat:V2SF
3358           (float_truncate:SF (match_operand:DF 1 "general_operand"))
3359           (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
3360   "TARGET_SIMD"
3361   {
3362     rtx tmp = gen_reg_rtx (V2SFmode);
3363     emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
3364     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3365     DONE;
3366   }
3367 )
3368
3369 ;; FP Max/Min
3370 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
3371 ;; expression like:
3372 ;;      a = (b < c) ? b : c;
3373 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3374 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3375 ;; -ffast-math.
3376 ;;
3377 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3378 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3379 ;; operand will be returned when both operands are zero (i.e. they may not
3380 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
3381 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3382 ;; NaNs.
3383
3384 (define_insn "<su><maxmin><mode>3"
3385   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3386         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3387                        (match_operand:VHSDF 2 "register_operand" "w")))]
3388   "TARGET_SIMD"
3389   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3390   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3391 )
3392
3393 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3394 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3395 ;; which implement the IEEE fmax ()/fmin () functions.
3396 (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
3397   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3398        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3399                       (match_operand:VHSDF 2 "register_operand" "w")]
3400                       FMAXMIN_UNS))]
3401   "TARGET_SIMD"
3402   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3403   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3404 )
3405
3406 ;; 'across lanes' add.
3407
3408 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3409  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3410        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3411                       (match_operand:VHSDF 2 "register_operand" "w")]
3412         UNSPEC_FADDV))]
3413  "TARGET_SIMD"
3414  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3415   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3416 )
3417
3418 (define_insn "reduc_plus_scal_<mode>"
3419  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3420        (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
3421                     UNSPEC_ADDV))]
3422  "TARGET_SIMD"
3423  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3424   [(set_attr "type" "neon_reduc_add<q>")]
3425 )
3426
3427 (define_insn "reduc_plus_scal_v2si"
3428  [(set (match_operand:SI 0 "register_operand" "=w")
3429        (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3430                     UNSPEC_ADDV))]
3431  "TARGET_SIMD"
3432  "addp\\t%0.2s, %1.2s, %1.2s"
3433   [(set_attr "type" "neon_reduc_add")]
3434 )
3435
3436 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3437 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3438  [(set (match_operand:GPI 0 "register_operand" "=w")
3439        (zero_extend:GPI
3440         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3441                              UNSPEC_ADDV)))]
3442  "TARGET_SIMD"
3443  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3444   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3445 )
3446
3447 (define_insn "reduc_plus_scal_<mode>"
3448  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3449        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3450                    UNSPEC_FADDV))]
3451  "TARGET_SIMD"
3452  "faddp\\t%<Vetype>0, %1.<Vtype>"
3453   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3454 )
3455
3456 (define_expand "reduc_plus_scal_v4sf"
3457  [(set (match_operand:SF 0 "register_operand")
3458        (unspec:SF [(match_operand:V4SF 1 "register_operand")]
3459                     UNSPEC_FADDV))]
3460  "TARGET_SIMD"
3461 {
3462   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3463   rtx scratch = gen_reg_rtx (V4SFmode);
3464   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3465   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3466   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3467   DONE;
3468 })
3469
3470 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3471 ;; sign or zero-extends its elements.
3472 (define_insn "aarch64_<su>addlv<mode>"
3473  [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3474        (unspec:<VWIDE_S>
3475          [(ANY_EXTEND:<V2XWIDE>
3476             (match_operand:VDQV_L 1 "register_operand" "w"))]
3477          UNSPEC_ADDV))]
3478  "TARGET_SIMD"
3479  "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3480   [(set_attr "type" "neon_reduc_add<q>")]
3481 )
3482
3483 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3484 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3485 ;; of that vector are used.  We can greatly simplify the RTL expression using
3486 ;; this splitter.
3487 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3488  [(set (match_operand:<VWIDE_S> 0 "register_operand")
3489        (unspec:<VWIDE_S>
3490          [(plus:<VDBLW>
3491             (vec_select:<VDBLW>
3492               (ANY_EXTEND:<V2XWIDE>
3493                 (match_operand:VDQV_L 1 "register_operand"))
3494               (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3495             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3496               (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half")))]
3497          UNSPEC_ADDV))]
3498  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3499  "#"
3500  "&& 1"
3501   [(set (match_dup 0)
3502        (unspec:<VWIDE_S>
3503          [(ANY_EXTEND:<V2XWIDE>
3504             (match_dup 1))]
3505          UNSPEC_ADDV))]
3506   {}
3507 )
3508
3509 ;; Similar to the above but for two-step zero-widening reductions.
3510 ;; We can push the outer zero_extend outside the ADDV unspec and make
3511 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3512 ;; in a single instruction.
3513 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3514  [(set (match_operand:<VWIDE2X_S> 0 "register_operand" "=w")
3515        (unspec:<VWIDE2X_S>
3516          [(zero_extend:<VQUADW>
3517             (plus:<VDBLW>
3518               (vec_select:<VDBLW>
3519                 (zero_extend:<V2XWIDE>
3520                   (match_operand:VDQQH 1 "register_operand" "w"))
3521                 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3522               (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup 1))
3523                 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half"))))]
3524          UNSPEC_ADDV))]
3525  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3526  "#"
3527  "&& 1"
3528   [(set (match_dup 0)
3529         (zero_extend:<VWIDE2X_S>
3530           (unspec:<VWIDE_S>
3531             [(zero_extend:<V2XWIDE>
3532                (match_dup 1))]
3533             UNSPEC_ADDV)))]
3534   {}
3535 )
3536
3537 ;; Zero-extending version of the above.  As these intrinsics produce a scalar
3538 ;; value that may be used by further intrinsics we want to avoid moving the
3539 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3540
3541 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3542  [(set (match_operand:GPI 0 "register_operand" "=w")
3543        (zero_extend:GPI
3544          (unspec:<VWIDE_S>
3545            [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3546               (match_operand:VDQV_L 1 "register_operand" "w"))]
3547          UNSPEC_ADDV)))]
3548  "TARGET_SIMD
3549   && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3550  "<su>addl<VDQV_L:vp>\\t%<VDQV_L:Vwstype>0<VDQV_L:Vwsuf>, %1.<VDQV_L:Vtype>"
3551   [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3552 )
3553
3554 (define_expand "aarch64_<su>addlp<mode>"
3555   [(set (match_operand:<VDBLW> 0 "register_operand")
3556         (plus:<VDBLW>
3557           (vec_select:<VDBLW>
3558             (ANY_EXTEND:<V2XWIDE>
3559               (match_operand:VDQV_L 1 "register_operand"))
3560             (match_dup 2))
3561           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3562             (match_dup 3))))]
3563  "TARGET_SIMD"
3564  {
3565    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
3566    operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
3567    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
3568  }
3569 )
3570
3571 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3572   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3573         (plus:<VDBLW>
3574           (vec_select:<VDBLW>
3575             (ANY_EXTEND:<V2XWIDE>
3576               (match_operand:VDQV_L 1 "register_operand" "w"))
3577             (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
3578           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3579             (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
3580  "TARGET_SIMD
3581   && !rtx_equal_p (operands[2], operands[3])"
3582  "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3583   [(set_attr "type" "neon_reduc_add<q>")]
3584 )
3585
3586 (define_insn "clrsb<mode>2<vczle><vczbe>"
3587   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3588         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3589   "TARGET_SIMD"
3590   "cls\\t%0.<Vtype>, %1.<Vtype>"
3591   [(set_attr "type" "neon_cls<q>")]
3592 )
3593
3594 (define_insn "clz<mode>2<vczle><vczbe>"
3595  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3596        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3597  "TARGET_SIMD"
3598  "clz\\t%0.<Vtype>, %1.<Vtype>"
3599   [(set_attr "type" "neon_cls<q>")]
3600 )
3601
3602 (define_insn "popcount<mode>2<vczle><vczbe>"
3603   [(set (match_operand:VB 0 "register_operand" "=w")
3604         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3605   "TARGET_SIMD"
3606   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3607   [(set_attr "type" "neon_cnt<q>")]
3608 )
3609
3610 ;; 'across lanes' max and min ops.
3611
3612 ;; Template for outputting a scalar, so we can create __builtins which can be
3613 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
3614 (define_expand "reduc_<optab>_scal_<mode>"
3615   [(match_operand:<VEL> 0 "register_operand")
3616    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3617                  FMAXMINV)]
3618   "TARGET_SIMD"
3619   {
3620     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3621     rtx scratch = gen_reg_rtx (<MODE>mode);
3622     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3623                                                          operands[1]));
3624     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3625     DONE;
3626   }
3627 )
3628
3629 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3630   [(match_operand:<VEL> 0 "register_operand")
3631    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3632                  FMAXMINNMV)]
3633   "TARGET_SIMD"
3634   {
3635     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3636     DONE;
3637   }
3638 )
3639
3640 ;; Likewise for integer cases, signed and unsigned.
3641 (define_expand "reduc_<optab>_scal_<mode>"
3642   [(match_operand:<VEL> 0 "register_operand")
3643    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3644                     MAXMINV)]
3645   "TARGET_SIMD"
3646   {
3647     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3648     rtx scratch = gen_reg_rtx (<MODE>mode);
3649     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3650                                                          operands[1]));
3651     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3652     DONE;
3653   }
3654 )
3655
3656 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3657  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3658        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3659                     MAXMINV))]
3660  "TARGET_SIMD"
3661  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3662   [(set_attr "type" "neon_reduc_minmax<q>")]
3663 )
3664
3665 (define_insn "aarch64_reduc_<optab>_internalv2si"
3666  [(set (match_operand:V2SI 0 "register_operand" "=w")
3667        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3668                     MAXMINV))]
3669  "TARGET_SIMD"
3670  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3671   [(set_attr "type" "neon_reduc_minmax")]
3672 )
3673
3674 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3675  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3676        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3677                       FMAXMINV))]
3678  "TARGET_SIMD"
3679  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3680   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3681 )
3682
3683 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3684 ;; allocation.
3685 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3686 ;; to select.
3687 ;;
3688 ;; Thus our BSL is of the form:
3689 ;;   op0 = bsl (mask, op2, op3)
3690 ;; We can use any of:
3691 ;;
3692 ;;   if (op0 = mask)
3693 ;;     bsl mask, op1, op2
3694 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3695 ;;     bit op0, op2, mask
3696 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3697 ;;     bif op0, op1, mask
3698 ;;
3699 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3700 ;; Some forms of straight-line code may generate the equivalent form
3701 ;; in *aarch64_simd_bsl<mode>_alt.
3702
3703 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3704   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3705         (xor:VDQ_I
3706            (and:VDQ_I
3707              (xor:VDQ_I
3708                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
3709                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
3710              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3711           (match_dup:<V_INT_EQUIV> 3)
3712         ))]
3713   "TARGET_SIMD"
3714   "@
3715   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3716   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3717   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
3718   [(set_attr "type" "neon_bsl<q>")]
3719 )
3720
3721 ;; We need this form in addition to the above pattern to match the case
3722 ;; when combine tries merging three insns such that the second operand of
3723 ;; the outer XOR matches the second operand of the inner XOR rather than
3724 ;; the first.  The two are equivalent but since recog doesn't try all
3725 ;; permutations of commutative operations, we have to have a separate pattern.
3726
3727 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3728   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3729         (xor:VDQ_I
3730            (and:VDQ_I
3731              (xor:VDQ_I
3732                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
3733                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
3734               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3735           (match_dup:<V_INT_EQUIV> 2)))]
3736   "TARGET_SIMD"
3737   "@
3738   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3739   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3740   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
3741   [(set_attr "type" "neon_bsl<q>")]
3742 )
3743
3744 ;; DImode is special, we want to avoid computing operations which are
3745 ;; more naturally computed in general purpose registers in the vector
3746 ;; registers.  If we do that, we need to move all three operands from general
3747 ;; purpose registers to vector registers, then back again.  However, we
3748 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3749 ;; optimizations based on the component operations of a BSL.
3750 ;;
3751 ;; That means we need a splitter back to the individual operations, if they
3752 ;; would be better calculated on the integer side.
3753
3754 (define_insn_and_split "aarch64_simd_bsldi_internal"
3755   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3756         (xor:DI
3757            (and:DI
3758              (xor:DI
3759                (match_operand:DI 3 "register_operand" "w,0,w,r")
3760                (match_operand:DI 2 "register_operand" "w,w,0,r"))
3761              (match_operand:DI 1 "register_operand" "0,w,w,r"))
3762           (match_dup:DI 3)
3763         ))]
3764   "TARGET_SIMD"
3765   "@
3766   bsl\\t%0.8b, %2.8b, %3.8b
3767   bit\\t%0.8b, %2.8b, %1.8b
3768   bif\\t%0.8b, %3.8b, %1.8b
3769   #"
3770   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3771   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3772 {
3773   /* Split back to individual operations.  If we're before reload, and
3774      able to create a temporary register, do so.  If we're after reload,
3775      we've got an early-clobber destination register, so use that.
3776      Otherwise, we can't create pseudos and we can't yet guarantee that
3777      operands[0] is safe to write, so FAIL to split.  */
3778
3779   rtx scratch;
3780   if (reload_completed)
3781     scratch = operands[0];
3782   else if (can_create_pseudo_p ())
3783     scratch = gen_reg_rtx (DImode);
3784   else
3785     FAIL;
3786
3787   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3788   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3789   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3790   DONE;
3791 }
3792   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3793    (set_attr "length" "4,4,4,12")]
3794 )
3795
3796 (define_insn_and_split "aarch64_simd_bsldi_alt"
3797   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3798         (xor:DI
3799            (and:DI
3800              (xor:DI
3801                (match_operand:DI 3 "register_operand" "w,w,0,r")
3802                (match_operand:DI 2 "register_operand" "w,0,w,r"))
3803              (match_operand:DI 1 "register_operand" "0,w,w,r"))
3804           (match_dup:DI 2)
3805         ))]
3806   "TARGET_SIMD"
3807   "@
3808   bsl\\t%0.8b, %3.8b, %2.8b
3809   bit\\t%0.8b, %3.8b, %1.8b
3810   bif\\t%0.8b, %2.8b, %1.8b
3811   #"
3812   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3813   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3814 {
3815   /* Split back to individual operations.  If we're before reload, and
3816      able to create a temporary register, do so.  If we're after reload,
3817      we've got an early-clobber destination register, so use that.
3818      Otherwise, we can't create pseudos and we can't yet guarantee that
3819      operands[0] is safe to write, so FAIL to split.  */
3820
3821   rtx scratch;
3822   if (reload_completed)
3823     scratch = operands[0];
3824   else if (can_create_pseudo_p ())
3825     scratch = gen_reg_rtx (DImode);
3826   else
3827     FAIL;
3828
3829   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3830   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3831   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3832   DONE;
3833 }
3834   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3835    (set_attr "length" "4,4,4,12")]
3836 )
3837
3838 (define_expand "aarch64_simd_bsl<mode>"
3839   [(match_operand:VALLDIF 0 "register_operand")
3840    (match_operand:<V_INT_EQUIV> 1 "register_operand")
3841    (match_operand:VALLDIF 2 "register_operand")
3842    (match_operand:VALLDIF 3 "register_operand")]
3843  "TARGET_SIMD"
3844 {
3845   /* We can't alias operands together if they have different modes.  */
3846   rtx tmp = operands[0];
3847   if (FLOAT_MODE_P (<MODE>mode))
3848     {
3849       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3850       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3851       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3852     }
3853   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3854   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3855                                                          operands[1],
3856                                                          operands[2],
3857                                                          operands[3]));
3858   if (tmp != operands[0])
3859     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3860
3861   DONE;
3862 })
3863
3864 (define_expand "vcond_mask_<mode><v_int_equiv>"
3865   [(match_operand:VALLDI 0 "register_operand")
3866    (match_operand:VALLDI 1 "nonmemory_operand")
3867    (match_operand:VALLDI 2 "nonmemory_operand")
3868    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3869   "TARGET_SIMD"
3870 {
3871   /* If we have (a = (P) ? -1 : 0);
3872      Then we can simply move the generated mask (result must be int).  */
3873   if (operands[1] == CONSTM1_RTX (<MODE>mode)
3874       && operands[2] == CONST0_RTX (<MODE>mode))
3875     emit_move_insn (operands[0], operands[3]);
3876   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
3877   else if (operands[1] == CONST0_RTX (<MODE>mode)
3878            && operands[2] == CONSTM1_RTX (<MODE>mode))
3879     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3880   else
3881     {
3882       if (!REG_P (operands[1]))
3883         operands[1] = force_reg (<MODE>mode, operands[1]);
3884       if (!REG_P (operands[2]))
3885         operands[2] = force_reg (<MODE>mode, operands[2]);
3886       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3887                                              operands[1], operands[2]));
3888     }
3889
3890   DONE;
3891 })
3892
3893 ;; Patterns comparing two vectors to produce a mask.
3894
3895 (define_expand "vec_cmp<mode><mode>"
3896   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3897           (match_operator 1 "comparison_operator"
3898             [(match_operand:VSDQ_I_DI 2 "register_operand")
3899              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3900   "TARGET_SIMD"
3901 {
3902   rtx mask = operands[0];
3903   enum rtx_code code = GET_CODE (operands[1]);
3904
3905   switch (code)
3906     {
3907     case NE:
3908     case LE:
3909     case LT:
3910     case GE:
3911     case GT:
3912     case EQ:
3913       if (operands[3] == CONST0_RTX (<MODE>mode))
3914         break;
3915
3916       /* Fall through.  */
3917     default:
3918       if (!REG_P (operands[3]))
3919         operands[3] = force_reg (<MODE>mode, operands[3]);
3920
3921       break;
3922     }
3923
3924   switch (code)
3925     {
3926     case LT:
3927       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3928       break;
3929
3930     case GE:
3931       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3932       break;
3933
3934     case LE:
3935       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3936       break;
3937
3938     case GT:
3939       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3940       break;
3941
3942     case LTU:
3943       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3944       break;
3945
3946     case GEU:
3947       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3948       break;
3949
3950     case LEU:
3951       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3952       break;
3953
3954     case GTU:
3955       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3956       break;
3957
3958     case NE:
3959       /* Handle NE as !EQ.  */
3960       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3961       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3962       break;
3963
3964     case EQ:
3965       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3966       break;
3967
3968     default:
3969       gcc_unreachable ();
3970     }
3971
3972   DONE;
3973 })
3974
3975 (define_expand "vec_cmp<mode><v_int_equiv>"
3976   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3977         (match_operator 1 "comparison_operator"
3978             [(match_operand:VDQF 2 "register_operand")
3979              (match_operand:VDQF 3 "nonmemory_operand")]))]
3980   "TARGET_SIMD"
3981 {
3982   int use_zero_form = 0;
3983   enum rtx_code code = GET_CODE (operands[1]);
3984   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3985
3986   rtx (*comparison) (rtx, rtx, rtx) = NULL;
3987
3988   switch (code)
3989     {
3990     case LE:
3991     case LT:
3992     case GE:
3993     case GT:
3994     case EQ:
3995       if (operands[3] == CONST0_RTX (<MODE>mode))
3996         {
3997           use_zero_form = 1;
3998           break;
3999         }
4000       /* Fall through.  */
4001     default:
4002       if (!REG_P (operands[3]))
4003         operands[3] = force_reg (<MODE>mode, operands[3]);
4004
4005       break;
4006     }
4007
4008   switch (code)
4009     {
4010     case LT:
4011       if (use_zero_form)
4012         {
4013           comparison = gen_aarch64_cmlt<mode>;
4014           break;
4015         }
4016       /* Fall through.  */
4017     case UNLT:
4018       std::swap (operands[2], operands[3]);
4019       /* Fall through.  */
4020     case UNGT:
4021     case GT:
4022       comparison = gen_aarch64_cmgt<mode>;
4023       break;
4024     case LE:
4025       if (use_zero_form)
4026         {
4027           comparison = gen_aarch64_cmle<mode>;
4028           break;
4029         }
4030       /* Fall through.  */
4031     case UNLE:
4032       std::swap (operands[2], operands[3]);
4033       /* Fall through.  */
4034     case UNGE:
4035     case GE:
4036       comparison = gen_aarch64_cmge<mode>;
4037       break;
4038     case NE:
4039     case EQ:
4040       comparison = gen_aarch64_cmeq<mode>;
4041       break;
4042     case UNEQ:
4043     case ORDERED:
4044     case UNORDERED:
4045     case LTGT:
4046       break;
4047     default:
4048       gcc_unreachable ();
4049     }
4050
4051   switch (code)
4052     {
4053     case UNGE:
4054     case UNGT:
4055     case UNLE:
4056     case UNLT:
4057       {
4058         /* All of the above must not raise any FP exceptions.  Thus we first
4059            check each operand for NaNs and force any elements containing NaN to
4060            zero before using them in the compare.
4061            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4062                                      (cm<cc> (isnan (a) ? 0.0 : a,
4063                                               isnan (b) ? 0.0 : b))
4064            We use the following transformations for doing the comparisions:
4065            a UNGE b -> a GE b
4066            a UNGT b -> a GT b
4067            a UNLE b -> b GE a
4068            a UNLT b -> b GT a.  */
4069
4070         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4071         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4072         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4073         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4074         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4075         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4076         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4077                                           lowpart_subreg (<V_INT_EQUIV>mode,
4078                                                           operands[2],
4079                                                           <MODE>mode)));
4080         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4081                                           lowpart_subreg (<V_INT_EQUIV>mode,
4082                                                           operands[3],
4083                                                           <MODE>mode)));
4084         gcc_assert (comparison != NULL);
4085         emit_insn (comparison (operands[0],
4086                                lowpart_subreg (<MODE>mode,
4087                                                tmp0, <V_INT_EQUIV>mode),
4088                                lowpart_subreg (<MODE>mode,
4089                                                tmp1, <V_INT_EQUIV>mode)));
4090         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
4091       }
4092       break;
4093
4094     case LT:
4095     case LE:
4096     case GT:
4097     case GE:
4098     case EQ:
4099     case NE:
4100       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
4101          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
4102          a GE b -> a GE b
4103          a GT b -> a GT b
4104          a LE b -> b GE a
4105          a LT b -> b GT a
4106          a EQ b -> a EQ b
4107          a NE b -> ~(a EQ b)  */
4108       gcc_assert (comparison != NULL);
4109       emit_insn (comparison (operands[0], operands[2], operands[3]));
4110       if (code == NE)
4111         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4112       break;
4113
4114     case LTGT:
4115       /* LTGT is not guranteed to not generate a FP exception.  So let's
4116          go the faster way : ((a > b) || (b > a)).  */
4117       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4118                                          operands[2], operands[3]));
4119       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4120       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4121       break;
4122
4123     case ORDERED:
4124     case UNORDERED:
4125     case UNEQ:
4126       /* cmeq (a, a) & cmeq (b, b).  */
4127       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4128                                          operands[2], operands[2]));
4129       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4130       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4131
4132       if (code == UNORDERED)
4133         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4134       else if (code == UNEQ)
4135         {
4136           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4137           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4138         }
4139       break;
4140
4141     default:
4142       gcc_unreachable ();
4143     }
4144
4145   DONE;
4146 })
4147
4148 (define_expand "vec_cmpu<mode><mode>"
4149   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4150           (match_operator 1 "comparison_operator"
4151             [(match_operand:VSDQ_I_DI 2 "register_operand")
4152              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4153   "TARGET_SIMD"
4154 {
4155   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4156                                       operands[2], operands[3]));
4157   DONE;
4158 })
4159
4160 (define_expand "vcond<mode><mode>"
4161   [(set (match_operand:VALLDI 0 "register_operand")
4162         (if_then_else:VALLDI
4163           (match_operator 3 "comparison_operator"
4164             [(match_operand:VALLDI 4 "register_operand")
4165              (match_operand:VALLDI 5 "nonmemory_operand")])
4166           (match_operand:VALLDI 1 "nonmemory_operand")
4167           (match_operand:VALLDI 2 "nonmemory_operand")))]
4168   "TARGET_SIMD"
4169 {
4170   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4171   enum rtx_code code = GET_CODE (operands[3]);
4172
4173   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4174      it as well as switch operands 1/2 in order to avoid the additional
4175      NOT instruction.  */
4176   if (code == NE)
4177     {
4178       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4179                                     operands[4], operands[5]);
4180       std::swap (operands[1], operands[2]);
4181     }
4182   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4183                                              operands[4], operands[5]));
4184   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4185                                                  operands[2], mask));
4186
4187   DONE;
4188 })
4189
4190 (define_expand "vcond<v_cmp_mixed><mode>"
4191   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4192         (if_then_else:<V_cmp_mixed>
4193           (match_operator 3 "comparison_operator"
4194             [(match_operand:VDQF_COND 4 "register_operand")
4195              (match_operand:VDQF_COND 5 "nonmemory_operand")])
4196           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4197           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
4198   "TARGET_SIMD"
4199 {
4200   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4201   enum rtx_code code = GET_CODE (operands[3]);
4202
4203   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4204      it as well as switch operands 1/2 in order to avoid the additional
4205      NOT instruction.  */
4206   if (code == NE)
4207     {
4208       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4209                                     operands[4], operands[5]);
4210       std::swap (operands[1], operands[2]);
4211     }
4212   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4213                                              operands[4], operands[5]));
4214   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4215                                                 operands[0], operands[1],
4216                                                 operands[2], mask));
4217
4218   DONE;
4219 })
4220
4221 (define_expand "vcondu<mode><mode>"
4222   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4223         (if_then_else:VSDQ_I_DI
4224           (match_operator 3 "comparison_operator"
4225             [(match_operand:VSDQ_I_DI 4 "register_operand")
4226              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4227           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4228           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4229   "TARGET_SIMD"
4230 {
4231   rtx mask = gen_reg_rtx (<MODE>mode);
4232   enum rtx_code code = GET_CODE (operands[3]);
4233
4234   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4235      it as well as switch operands 1/2 in order to avoid the additional
4236      NOT instruction.  */
4237   if (code == NE)
4238     {
4239       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4240                                     operands[4], operands[5]);
4241       std::swap (operands[1], operands[2]);
4242     }
4243   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4244                                       operands[4], operands[5]));
4245   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4246                                                  operands[2], mask));
4247   DONE;
4248 })
4249
4250 (define_expand "vcondu<mode><v_cmp_mixed>"
4251   [(set (match_operand:VDQF 0 "register_operand")
4252         (if_then_else:VDQF
4253           (match_operator 3 "comparison_operator"
4254             [(match_operand:<V_cmp_mixed> 4 "register_operand")
4255              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4256           (match_operand:VDQF 1 "nonmemory_operand")
4257           (match_operand:VDQF 2 "nonmemory_operand")))]
4258   "TARGET_SIMD"
4259 {
4260   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4261   enum rtx_code code = GET_CODE (operands[3]);
4262
4263   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4264      it as well as switch operands 1/2 in order to avoid the additional
4265      NOT instruction.  */
4266   if (code == NE)
4267     {
4268       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4269                                     operands[4], operands[5]);
4270       std::swap (operands[1], operands[2]);
4271     }
4272   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4273                                                   mask, operands[3],
4274                                                   operands[4], operands[5]));
4275   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4276                                                  operands[2], mask));
4277   DONE;
4278 })
4279
4280 ;; Patterns for AArch64 SIMD Intrinsics.
4281
4282 ;; Lane extraction with sign extension to general purpose register.
4283 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4284   [(set (match_operand:GPI 0 "register_operand" "=r")
4285         (sign_extend:GPI
4286           (vec_select:<VDQQH:VEL>
4287             (match_operand:VDQQH 1 "register_operand" "w")
4288             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4289   "TARGET_SIMD"
4290   {
4291     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4292                                            INTVAL (operands[2]));
4293     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4294   }
4295   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4296 )
4297
4298 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4299   [(set (match_operand:GPI 0 "register_operand" "=r")
4300         (zero_extend:GPI
4301           (vec_select:<VDQQH:VEL>
4302             (match_operand:VDQQH 1 "register_operand" "w")
4303             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4304   "TARGET_SIMD"
4305   {
4306     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4307                                            INTVAL (operands[2]));
4308     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4309   }
4310   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4311 )
4312
4313 ;; Lane extraction of a value, neither sign nor zero extension
4314 ;; is guaranteed so upper bits should be considered undefined.
4315 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4316 ;; Extracting lane zero is split into a simple move when it is between SIMD
4317 ;; registers or a store.
4318 (define_insn_and_split "aarch64_get_lane<mode>"
4319   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4320         (vec_select:<VEL>
4321           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4322           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4323   "TARGET_SIMD"
4324   {
4325     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4326     switch (which_alternative)
4327       {
4328         case 0:
4329           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4330         case 1:
4331           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4332         case 2:
4333           return "st1\\t{%1.<Vetype>}[%2], %0";
4334         default:
4335           gcc_unreachable ();
4336       }
4337   }
4338  "&& reload_completed
4339   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4340  [(set (match_dup 0) (match_dup 1))]
4341  {
4342    operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4343  }
4344   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4345 )
4346
4347 (define_insn "*aarch64_get_high<mode>"
4348   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
4349         (vec_select:<VEL>
4350           (match_operand:VQ_2E 1 "register_operand" "w")
4351           (parallel [(match_operand:SI 2 "immediate_operand")])))]
4352   "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
4353   "fmov\t%0, %1.d[1]"
4354   [(set_attr "type" "f_mrc")]
4355 )
4356
4357 (define_insn "load_pair_lanes<mode>"
4358   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4359         (vec_concat:<VDBL>
4360            (match_operand:VDCSIF 1 "memory_operand" "Utq")
4361            (match_operand:VDCSIF 2 "memory_operand" "m")))]
4362   "TARGET_FLOAT
4363    && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
4364   "ldr\\t%<single_dtype>0, %1"
4365   [(set_attr "type" "neon_load1_1reg<dblq>")]
4366 )
4367
4368 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4369 ;; below.  The reason for having both of them is that the alternatives of
4370 ;; the later patterns do not have consistent register preferences: the STP
4371 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4372 ;; the GPR form is more natural for scalar integers) whereas the other
4373 ;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4374 ;;
4375 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4376 ;; which the destination was always memory.  On the other hand, expressing
4377 ;; the true preferences makes GPRs seem more palatable than they really are
4378 ;; for register destinations.
4379 ;;
4380 ;; Despite that, we do still want the general form to have STP alternatives,
4381 ;; in order to handle cases where a register destination is spilled.
4382 ;;
4383 ;; The best compromise therefore seemed to be to have a dedicated STP
4384 ;; pattern to catch cases in which the destination was always memory.
4385 ;; This dedicated pattern must come first.
4386
4387 (define_insn "store_pair_lanes<mode>"
4388   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
4389         (vec_concat:<VDBL>
4390            (match_operand:VDCSIF 1 "register_operand" "w, r")
4391            (match_operand:VDCSIF 2 "register_operand" "w, r")))]
4392   "TARGET_FLOAT"
4393   "@
4394    stp\t%<single_type>1, %<single_type>2, %y0
4395    stp\t%<single_wx>1, %<single_wx>2, %y0"
4396   [(set_attr "type" "neon_stp, store_16")]
4397 )
4398
4399 ;; Form a vector whose least significant half comes from operand 1 and whose
4400 ;; most significant half comes from operand 2.  The register alternatives
4401 ;; tie the least significant half to the same register as the destination,
4402 ;; so that only the other half needs to be handled explicitly.  For the
4403 ;; reasons given above, the STP alternatives use ? for constraints that
4404 ;; the register alternatives either don't accept or themselves disparage.
4405
4406 (define_insn "*aarch64_combine_internal<mode>"
4407   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
4408         (vec_concat:<VDBL>
4409           (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")
4410           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, w, ?r")))]
4411   "TARGET_FLOAT
4412    && !BYTES_BIG_ENDIAN
4413    && (register_operand (operands[0], <VDBL>mode)
4414        || register_operand (operands[2], <MODE>mode))"
4415   "@
4416    ins\t%0.<single_type>[1], %2.<single_type>[0]
4417    ins\t%0.<single_type>[1], %<single_wx>2
4418    fmov\t%0.d[1], %2
4419    ld1\t{%0.<single_type>}[1], %2
4420    stp\t%<single_type>1, %<single_type>2, %y0
4421    stp\t%<single_wx>1, %<single_wx>2, %y0"
4422   [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr,
4423                      neon_load1_one_lane<dblq>, neon_stp, store_16")
4424    (set_attr "arch" "simd,simd,*,simd,*,*")]
4425 )
4426
4427 (define_insn "*aarch64_combine_internal_be<mode>"
4428   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
4429         (vec_concat:<VDBL>
4430           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, ?w, ?r")
4431           (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")))]
4432   "TARGET_FLOAT
4433    && BYTES_BIG_ENDIAN
4434    && (register_operand (operands[0], <VDBL>mode)
4435        || register_operand (operands[2], <MODE>mode))"
4436   "@
4437    ins\t%0.<single_type>[1], %2.<single_type>[0]
4438    ins\t%0.<single_type>[1], %<single_wx>2
4439    fmov\t%0.d[1], %2
4440    ld1\t{%0.<single_type>}[1], %2
4441    stp\t%<single_type>2, %<single_type>1, %y0
4442    stp\t%<single_wx>2, %<single_wx>1, %y0"
4443   [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr, neon_load1_one_lane<dblq>, neon_stp, store_16")
4444    (set_attr "arch" "simd,simd,*,simd,*,*")]
4445 )
4446
4447 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4448 ;; dest vector.
4449
4450 (define_insn "*aarch64_combinez<mode>"
4451   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4452         (vec_concat:<VDBL>
4453           (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")
4454           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
4455   "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4456   "@
4457    fmov\\t%<single_type>0, %<single_type>1
4458    fmov\t%<single_type>0, %<single_wx>1
4459    ldr\\t%<single_type>0, %1"
4460   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
4461 )
4462
4463 (define_insn "*aarch64_combinez_be<mode>"
4464   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4465         (vec_concat:<VDBL>
4466           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4467           (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))]
4468   "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4469   "@
4470    fmov\\t%<single_type>0, %<single_type>1
4471    fmov\t%<single_type>0, %<single_wx>1
4472    ldr\\t%<single_type>0, %1"
4473   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
4474 )
4475
4476 ;; Form a vector whose first half (in array order) comes from operand 1
4477 ;; and whose second half (in array order) comes from operand 2.
4478 ;; This operand order follows the RTL vec_concat operation.
4479 (define_expand "@aarch64_vec_concat<mode>"
4480   [(set (match_operand:<VDBL> 0 "register_operand")
4481         (vec_concat:<VDBL>
4482           (match_operand:VDCSIF 1 "general_operand")
4483           (match_operand:VDCSIF 2 "general_operand")))]
4484   "TARGET_FLOAT"
4485 {
4486   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4487   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4488
4489   if (MEM_P (operands[1])
4490       && MEM_P (operands[2])
4491       && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4492     /* Use load_pair_lanes<mode>.  */
4493     ;
4494   else if (operands[hi] == CONST0_RTX (<MODE>mode))
4495     {
4496       /* Use *aarch64_combinez<mode>.  */
4497       if (!nonimmediate_operand (operands[lo], <MODE>mode))
4498         operands[lo] = force_reg (<MODE>mode, operands[lo]);
4499     }
4500   else
4501     {
4502       /* Use *aarch64_combine_internal<mode>.  */
4503       operands[lo] = force_reg (<MODE>mode, operands[lo]);
4504       if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4505         {
4506           if (MEM_P (operands[hi]))
4507             {
4508               rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4509               operands[hi] = replace_equiv_address (operands[hi], addr);
4510             }
4511           else
4512             operands[hi] = force_reg (<MODE>mode, operands[hi]);
4513         }
4514     }
4515 })
4516
4517 ;; Form a vector whose least significant half comes from operand 1 and whose
4518 ;; most significant half comes from operand 2.  This operand order follows
4519 ;; arm_neon.h vcombine* intrinsics.
4520 (define_expand "aarch64_combine<mode>"
4521   [(match_operand:<VDBL> 0 "register_operand")
4522    (match_operand:VDC 1 "general_operand")
4523    (match_operand:VDC 2 "general_operand")]
4524   "TARGET_FLOAT"
4525 {
4526   if (BYTES_BIG_ENDIAN)
4527     std::swap (operands[1], operands[2]);
4528   emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4529                                            operands[2]));
4530   DONE;
4531 }
4532 )
4533
4534 ;; <su><addsub>l<q>.
4535
4536 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4537  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4538        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4539                            (match_operand:VQW 1 "register_operand" "w")
4540                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4541                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4542                            (match_operand:VQW 2 "register_operand" "w")
4543                            (match_dup 3)))))]
4544   "TARGET_SIMD"
4545   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4546   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4547 )
4548
4549 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4550  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4551        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4552                            (match_operand:VQW 1 "register_operand" "w")
4553                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4554                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4555                            (match_operand:VQW 2 "register_operand" "w")
4556                            (match_dup 3)))))]
4557   "TARGET_SIMD"
4558   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4559   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4560 )
4561
4562 (define_expand "vec_widen_<su>add_lo_<mode>"
4563   [(match_operand:<VWIDE> 0 "register_operand")
4564    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4565    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4566   "TARGET_SIMD"
4567 {
4568   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4569   emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4570                                                      operands[2], p));
4571   DONE;
4572 })
4573
4574 (define_expand "vec_widen_<su>add_hi_<mode>"
4575   [(match_operand:<VWIDE> 0 "register_operand")
4576    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4577    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4578   "TARGET_SIMD"
4579 {
4580   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4581   emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4582                                                      operands[2], p));
4583   DONE;
4584 })
4585
4586 (define_expand "vec_widen_<su>sub_lo_<mode>"
4587   [(match_operand:<VWIDE> 0 "register_operand")
4588    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4589    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4590   "TARGET_SIMD"
4591 {
4592   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4593   emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4594                                                      operands[2], p));
4595   DONE;
4596 })
4597
4598 (define_expand "vec_widen_<su>sub_hi_<mode>"
4599   [(match_operand:<VWIDE> 0 "register_operand")
4600    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4601    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4602   "TARGET_SIMD"
4603 {
4604   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4605   emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4606                                                      operands[2], p));
4607   DONE;
4608 })
4609
4610 (define_expand "aarch64_saddl2<mode>"
4611   [(match_operand:<VWIDE> 0 "register_operand")
4612    (match_operand:VQW 1 "register_operand")
4613    (match_operand:VQW 2 "register_operand")]
4614   "TARGET_SIMD"
4615 {
4616   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4617   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4618                                                   operands[2], p));
4619   DONE;
4620 })
4621
4622 (define_expand "aarch64_uaddl2<mode>"
4623   [(match_operand:<VWIDE> 0 "register_operand")
4624    (match_operand:VQW 1 "register_operand")
4625    (match_operand:VQW 2 "register_operand")]
4626   "TARGET_SIMD"
4627 {
4628   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4629   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4630                                                   operands[2], p));
4631   DONE;
4632 })
4633
4634 (define_expand "aarch64_ssubl2<mode>"
4635   [(match_operand:<VWIDE> 0 "register_operand")
4636    (match_operand:VQW 1 "register_operand")
4637    (match_operand:VQW 2 "register_operand")]
4638   "TARGET_SIMD"
4639 {
4640   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4641   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4642                                                 operands[2], p));
4643   DONE;
4644 })
4645
4646 (define_expand "aarch64_usubl2<mode>"
4647   [(match_operand:<VWIDE> 0 "register_operand")
4648    (match_operand:VQW 1 "register_operand")
4649    (match_operand:VQW 2 "register_operand")]
4650   "TARGET_SIMD"
4651 {
4652   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4653   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4654                                                 operands[2], p));
4655   DONE;
4656 })
4657
4658 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4659  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4660        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4661                            (match_operand:VD_BHSI 1 "register_operand" "w"))
4662                        (ANY_EXTEND:<VWIDE>
4663                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4664   "TARGET_SIMD"
4665   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4666   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4667 )
4668
4669 ;; <su><addsub>w<q>.
4670
4671 (define_expand "widen_ssum<mode>3"
4672   [(set (match_operand:<VDBLW> 0 "register_operand")
4673         (plus:<VDBLW> (sign_extend:<VDBLW>
4674                         (match_operand:VQW 1 "register_operand"))
4675                       (match_operand:<VDBLW> 2 "register_operand")))]
4676   "TARGET_SIMD"
4677   {
4678     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4679     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4680
4681     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4682                                                 operands[1], p));
4683     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4684     DONE;
4685   }
4686 )
4687
4688 (define_expand "widen_ssum<mode>3"
4689   [(set (match_operand:<VWIDE> 0 "register_operand")
4690         (plus:<VWIDE> (sign_extend:<VWIDE>
4691                         (match_operand:VD_BHSI 1 "register_operand"))
4692                       (match_operand:<VWIDE> 2 "register_operand")))]
4693   "TARGET_SIMD"
4694 {
4695   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4696   DONE;
4697 })
4698
4699 (define_expand "widen_usum<mode>3"
4700   [(set (match_operand:<VDBLW> 0 "register_operand")
4701         (plus:<VDBLW> (zero_extend:<VDBLW>
4702                         (match_operand:VQW 1 "register_operand"))
4703                       (match_operand:<VDBLW> 2 "register_operand")))]
4704   "TARGET_SIMD"
4705   {
4706     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4707     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4708
4709     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4710                                                  operands[1], p));
4711     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4712     DONE;
4713   }
4714 )
4715
4716 (define_expand "widen_usum<mode>3"
4717   [(set (match_operand:<VWIDE> 0 "register_operand")
4718         (plus:<VWIDE> (zero_extend:<VWIDE>
4719                         (match_operand:VD_BHSI 1 "register_operand"))
4720                       (match_operand:<VWIDE> 2 "register_operand")))]
4721   "TARGET_SIMD"
4722 {
4723   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4724   DONE;
4725 })
4726
4727 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4728   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4729         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4730           (ANY_EXTEND:<VWIDE>
4731             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4732   "TARGET_SIMD"
4733   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4734   [(set_attr "type" "neon_sub_widen")]
4735 )
4736
4737 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4738   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4739         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4740           (ANY_EXTEND:<VWIDE>
4741             (vec_select:<VHALF>
4742               (match_operand:VQW 2 "register_operand" "w")
4743               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4744   "TARGET_SIMD"
4745   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4746   [(set_attr "type" "neon_sub_widen")]
4747 )
4748
4749 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4750   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4751         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4752           (ANY_EXTEND:<VWIDE>
4753             (vec_select:<VHALF>
4754               (match_operand:VQW 2 "register_operand" "w")
4755               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4756   "TARGET_SIMD"
4757   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4758   [(set_attr "type" "neon_sub_widen")]
4759 )
4760
4761 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4762   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4763         (plus:<VWIDE>
4764           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4765           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4766   "TARGET_SIMD"
4767   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4768   [(set_attr "type" "neon_add_widen")]
4769 )
4770
4771 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4772   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4773         (plus:<VWIDE>
4774           (ANY_EXTEND:<VWIDE>
4775             (vec_select:<VHALF>
4776               (match_operand:VQW 2 "register_operand" "w")
4777               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4778           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4779   "TARGET_SIMD"
4780   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4781   [(set_attr "type" "neon_add_widen")]
4782 )
4783
4784 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4785   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4786         (plus:<VWIDE>
4787           (ANY_EXTEND:<VWIDE>
4788             (vec_select:<VHALF>
4789               (match_operand:VQW 2 "register_operand" "w")
4790               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4791           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4792   "TARGET_SIMD"
4793   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4794   [(set_attr "type" "neon_add_widen")]
4795 )
4796
4797 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4798   [(set (match_operand:<VWIDE> 0 "register_operand")
4799         (ADDSUB:<VWIDE>
4800           (ANY_EXTEND:<VWIDE>
4801             (vec_select:<VHALF>
4802               (match_operand:VQW 2 "register_operand")
4803               (match_dup 3)))
4804           (match_operand:<VWIDE> 1 "register_operand")))]
4805   "TARGET_SIMD"
4806 {
4807   /* We still do an emit_insn rather than relying on the pattern above
4808      because for the MINUS case the operands would need to be swapped
4809      around.  */
4810   operands[3]
4811     = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4812   emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4813                                                        operands[0],
4814                                                        operands[1],
4815                                                        operands[2],
4816                                                        operands[3]));
4817   DONE;
4818 })
4819
4820 ;; <su><r>h<addsub>.
4821
4822 (define_expand "<su_optab>avg<mode>3_floor"
4823   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4824         (truncate:VDQ_BHSI
4825           (ashiftrt:<V2XWIDE>
4826             (plus:<V2XWIDE>
4827               (ANY_EXTEND:<V2XWIDE>
4828                 (match_operand:VDQ_BHSI 1 "register_operand"))
4829               (ANY_EXTEND:<V2XWIDE>
4830                 (match_operand:VDQ_BHSI 2 "register_operand")))
4831             (match_dup 3))))]
4832   "TARGET_SIMD"
4833   {
4834     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4835   }
4836 )
4837
4838 (define_expand "<su_optab>avg<mode>3_ceil"
4839   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4840         (truncate:VDQ_BHSI
4841           (ashiftrt:<V2XWIDE>
4842             (plus:<V2XWIDE>
4843               (plus:<V2XWIDE>
4844                 (ANY_EXTEND:<V2XWIDE>
4845                   (match_operand:VDQ_BHSI 1 "register_operand"))
4846                 (ANY_EXTEND:<V2XWIDE>
4847                   (match_operand:VDQ_BHSI 2 "register_operand")))
4848                (match_dup 3))
4849             (match_dup 3))))]
4850   "TARGET_SIMD"
4851   {
4852     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4853   }
4854 )
4855
4856 (define_expand "aarch64_<su>hsub<mode>"
4857   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4858         (truncate:VDQ_BHSI
4859           (ashiftrt:<V2XWIDE>
4860             (minus:<V2XWIDE>
4861               (ANY_EXTEND:<V2XWIDE>
4862                 (match_operand:VDQ_BHSI 1 "register_operand"))
4863               (ANY_EXTEND:<V2XWIDE>
4864                 (match_operand:VDQ_BHSI 2 "register_operand")))
4865             (match_dup 3))))]
4866   "TARGET_SIMD"
4867   {
4868     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4869   }
4870 )
4871
4872 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4873   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4874         (truncate:VDQ_BHSI
4875           (ashiftrt:<V2XWIDE>
4876             (ADDSUB:<V2XWIDE>
4877               (ANY_EXTEND:<V2XWIDE>
4878                 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4879               (ANY_EXTEND:<V2XWIDE>
4880                 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4881             (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))))]
4882   "TARGET_SIMD"
4883   "<su>h<ADDSUB:optab>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4884   [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4885 )
4886
4887 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4888   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4889         (truncate:VDQ_BHSI
4890           (ashiftrt:<V2XWIDE>
4891             (plus:<V2XWIDE>
4892               (plus:<V2XWIDE>
4893                 (ANY_EXTEND:<V2XWIDE>
4894                   (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4895                 (ANY_EXTEND:<V2XWIDE>
4896                   (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4897                (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))
4898             (match_dup 3))))]
4899   "TARGET_SIMD"
4900   "<su>rhadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4901   [(set_attr "type" "neon_add_halve<q>")]
4902 )
4903
4904 ;; <r><addsub>hn<q>.
4905
4906 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4907   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4908         (truncate:<VNARROWQ>
4909           (ashiftrt:VQN
4910             (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4911                         (match_operand:VQN 2 "register_operand" "w"))
4912             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4913   "TARGET_SIMD"
4914   "<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4915   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4916 )
4917
4918 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4919   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4920         (truncate:<VNARROWQ>
4921           (ashiftrt:VQN
4922             (plus:VQN
4923               (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4924                           (match_operand:VQN 2 "register_operand" "w"))
4925               (match_operand:VQN 3 "aarch64_simd_raddsubhn_imm_vec"))
4926             (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4927   "TARGET_SIMD"
4928   "r<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4929   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4930 )
4931
4932 (define_expand "aarch64_<optab>hn<mode>"
4933   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4934         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4935                     (match_operand:VQN 2 "register_operand")))]
4936   "TARGET_SIMD"
4937   {
4938     rtx shft
4939       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4940                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4941     emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[0], operands[1],
4942                                                  operands[2], shft));
4943     DONE;
4944   }
4945 )
4946
4947 (define_expand "aarch64_r<optab>hn<mode>"
4948   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4949         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4950                     (match_operand:VQN 2 "register_operand")))]
4951   "TARGET_SIMD"
4952   {
4953     rtx shft
4954       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4955                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4956     rtx rnd
4957       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4958         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4959     emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[0], operands[1],
4960                                                   operands[2], rnd, shft));
4961     DONE;
4962   }
4963 )
4964
4965 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4966   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4967         (vec_concat:<VNARROWQ2>
4968           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4969           (truncate:<VNARROWQ>
4970             (ashiftrt:VQN
4971               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4972                           (match_operand:VQN 3 "register_operand" "w"))
4973               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4974   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4975   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4976   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4977 )
4978
4979 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4980   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4981         (vec_concat:<VNARROWQ2>
4982           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4983           (truncate:<VNARROWQ>
4984             (ashiftrt:VQN
4985               (plus:VQN
4986                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4987                             (match_operand:VQN 3 "register_operand" "w"))
4988                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4989               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4990   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4991   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4992   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4993 )
4994
4995 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4996   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4997         (vec_concat:<VNARROWQ2>
4998           (truncate:<VNARROWQ>
4999             (ashiftrt:VQN
5000               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
5001                           (match_operand:VQN 3 "register_operand" "w"))
5002               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))
5003           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5004   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5005   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
5006   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
5007 )
5008
5009 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
5010   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5011         (vec_concat:<VNARROWQ2>
5012           (truncate:<VNARROWQ>
5013             (ashiftrt:VQN
5014               (plus:VQN
5015                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
5016                             (match_operand:VQN 3 "register_operand" "w"))
5017                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
5018               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))
5019           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5020   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5021   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
5022   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
5023 )
5024
5025 (define_expand "aarch64_<optab>hn2<mode>"
5026   [(match_operand:<VNARROWQ2> 0 "register_operand")
5027    (match_operand:<VNARROWQ> 1 "register_operand")
5028    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5029                (match_operand:VQN 3 "register_operand"))]
5030   "TARGET_SIMD"
5031   {
5032     rtx shft
5033       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5034                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5035     if (BYTES_BIG_ENDIAN)
5036       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[0],
5037                                 operands[1], operands[2], operands[3], shft));
5038     else
5039       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[0],
5040                                 operands[1], operands[2], operands[3], shft));
5041     DONE;
5042   }
5043 )
5044
5045 (define_expand "aarch64_r<optab>hn2<mode>"
5046   [(match_operand:<VNARROWQ2> 0 "register_operand")
5047    (match_operand:<VNARROWQ> 1 "register_operand")
5048    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5049                (match_operand:VQN 3 "register_operand"))]
5050   "TARGET_SIMD"
5051   {
5052     rtx shft
5053       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5054                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5055     rtx rnd
5056       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5057         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
5058     if (BYTES_BIG_ENDIAN)
5059       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[0],
5060                                 operands[1], operands[2], operands[3], rnd, shft));
5061     else
5062       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[0],
5063                                 operands[1], operands[2], operands[3], rnd, shft));
5064     DONE;
5065   }
5066 )
5067
5068 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5069 (define_insn_and_split "*bitmask_shift_plus<mode>"
5070   [(set (match_operand:VQN 0 "register_operand" "=&w")
5071         (plus:VQN
5072           (lshiftrt:VQN
5073             (plus:VQN (match_operand:VQN 1 "register_operand" "w")
5074                       (match_operand:VQN 2 "register_operand" "w"))
5075             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5076           (match_operand:VQN 4 "register_operand" "w")))]
5077   "TARGET_SIMD"
5078   "#"
5079   "&& true"
5080   [(const_int 0)]
5081 {
5082   rtx tmp;
5083   if (can_create_pseudo_p ())
5084     tmp = gen_reg_rtx (<VNARROWQ>mode);
5085   else
5086     tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[0]));
5087   emit_insn (gen_aarch64_addhn<mode> (tmp, operands[1], operands[2]));
5088   emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[0], operands[4], tmp));
5089   DONE;
5090 })
5091
5092 ;; pmul.
5093
5094 (define_insn "aarch64_pmul<mode>"
5095   [(set (match_operand:VB 0 "register_operand" "=w")
5096         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
5097                     (match_operand:VB 2 "register_operand" "w")]
5098                    UNSPEC_PMUL))]
5099  "TARGET_SIMD"
5100  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5101   [(set_attr "type" "neon_mul_<Vetype><q>")]
5102 )
5103
5104 (define_insn "aarch64_pmullv8qi"
5105   [(set (match_operand:V8HI 0 "register_operand" "=w")
5106         (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
5107                       (match_operand:V8QI 2 "register_operand" "w")]
5108                      UNSPEC_PMULL))]
5109  "TARGET_SIMD"
5110  "pmull\\t%0.8h, %1.8b, %2.8b"
5111   [(set_attr "type" "neon_mul_b_long")]
5112 )
5113
5114 (define_insn "aarch64_pmull_hiv16qi_insn"
5115   [(set (match_operand:V8HI 0 "register_operand" "=w")
5116         (unspec:V8HI
5117           [(vec_select:V8QI
5118              (match_operand:V16QI 1 "register_operand" "w")
5119              (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
5120            (vec_select:V8QI
5121              (match_operand:V16QI 2 "register_operand" "w")
5122              (match_dup 3))]
5123           UNSPEC_PMULL))]
5124  "TARGET_SIMD"
5125  "pmull2\\t%0.8h, %1.16b, %2.16b"
5126   [(set_attr "type" "neon_mul_b_long")]
5127 )
5128
5129 (define_expand "aarch64_pmull_hiv16qi"
5130   [(match_operand:V8HI 0 "register_operand")
5131    (match_operand:V16QI 1 "register_operand")
5132    (match_operand:V16QI 2 "register_operand")]
5133  "TARGET_SIMD"
5134  {
5135    rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
5136    emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
5137                                               operands[2], p));
5138    DONE;
5139  }
5140 )
5141
5142 ;; fmulx.
5143
5144 (define_insn "aarch64_fmulx<mode>"
5145   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5146         (unspec:VHSDF_HSDF
5147           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5148            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5149            UNSPEC_FMULX))]
5150  "TARGET_SIMD"
5151  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5152  [(set_attr "type" "neon_fp_mul_<stype>")]
5153 )
5154
5155 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5156
5157 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5158   [(set (match_operand:VDQSF 0 "register_operand" "=w")
5159         (unspec:VDQSF
5160          [(match_operand:VDQSF 1 "register_operand" "w")
5161           (vec_duplicate:VDQSF
5162            (vec_select:<VEL>
5163             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
5164             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5165          UNSPEC_FMULX))]
5166   "TARGET_SIMD"
5167   {
5168     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
5169     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5170   }
5171   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5172 )
5173
5174 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5175
5176 (define_insn "*aarch64_mulx_elt<mode>"
5177   [(set (match_operand:VDQF 0 "register_operand" "=w")
5178         (unspec:VDQF
5179          [(match_operand:VDQF 1 "register_operand" "w")
5180           (vec_duplicate:VDQF
5181            (vec_select:<VEL>
5182             (match_operand:VDQF 2 "register_operand" "w")
5183             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5184          UNSPEC_FMULX))]
5185   "TARGET_SIMD"
5186   {
5187     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5188     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5189   }
5190   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5191 )
5192
5193 ;; vmulxq_lane
5194
5195 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5196   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5197         (unspec:VHSDF
5198          [(match_operand:VHSDF 1 "register_operand" "w")
5199           (vec_duplicate:VHSDF
5200             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5201          UNSPEC_FMULX))]
5202   "TARGET_SIMD"
5203   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
5204   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5205 )
5206
5207 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5208 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
5209 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5210
5211 (define_insn "*aarch64_vgetfmulx<mode>"
5212   [(set (match_operand:<VEL> 0 "register_operand" "=w")
5213         (unspec:<VEL>
5214          [(match_operand:<VEL> 1 "register_operand" "w")
5215           (vec_select:<VEL>
5216            (match_operand:VDQF 2 "register_operand" "w")
5217             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5218          UNSPEC_FMULX))]
5219   "TARGET_SIMD"
5220   {
5221     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5222     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5223   }
5224   [(set_attr "type" "fmul<Vetype>")]
5225 )
5226 ;; <su>q<addsub>
5227
5228 (define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
5229   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5230         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
5231                         (match_operand:VSDQ_I 2 "register_operand" "w")))]
5232   "TARGET_SIMD"
5233   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5234   [(set_attr "type" "neon_q<addsub><q>")]
5235 )
5236
5237 ;; suqadd and usqadd
5238
5239 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5240   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5241         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5242                         (match_operand:VSDQ_I 2 "register_operand" "w")]
5243                        USSUQADD))]
5244   "TARGET_SIMD"
5245   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5246   [(set_attr "type" "neon_qadd<q>")]
5247 )
5248
5249 ;; sqmovn and uqmovn
5250
5251 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5252   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5253         (SAT_TRUNC:<VNARROWQ>
5254           (match_operand:SD_HSDI 1 "register_operand" "w")))]
5255   "TARGET_SIMD"
5256   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5257   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5258 )
5259
5260 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5261   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5262         (SAT_TRUNC:<VNARROWQ>
5263           (match_operand:VQN 1 "register_operand" "w")))]
5264   "TARGET_SIMD"
5265   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5266   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5267 )
5268
5269 (define_insn "aarch64_<su>qxtn2<mode>_le"
5270   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5271         (vec_concat:<VNARROWQ2>
5272           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5273           (SAT_TRUNC:<VNARROWQ>
5274             (match_operand:VQN 2 "register_operand" "w"))))]
5275   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5276   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5277    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5278 )
5279
5280 (define_insn "aarch64_<su>qxtn2<mode>_be"
5281   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5282         (vec_concat:<VNARROWQ2>
5283           (SAT_TRUNC:<VNARROWQ>
5284             (match_operand:VQN 2 "register_operand" "w"))
5285           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5286   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5287   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5288    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5289 )
5290
5291 (define_expand "aarch64_<su>qxtn2<mode>"
5292   [(match_operand:<VNARROWQ2> 0 "register_operand")
5293    (match_operand:<VNARROWQ> 1 "register_operand")
5294    (SAT_TRUNC:<VNARROWQ>
5295      (match_operand:VQN 2 "register_operand"))]
5296   "TARGET_SIMD"
5297   {
5298     if (BYTES_BIG_ENDIAN)
5299       emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5300                                                  operands[2]));
5301     else
5302       emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5303                                                  operands[2]));
5304     DONE;
5305   }
5306 )
5307
5308 ;; sqmovun
5309
5310 (define_insn "aarch64_sqmovun<mode>"
5311   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5312         (truncate:<VNARROWQ>
5313           (smin:SD_HSDI
5314             (smax:SD_HSDI
5315               (match_operand:SD_HSDI 1 "register_operand" "w")
5316               (const_int 0))
5317             (const_int <half_mask>))))]
5318    "TARGET_SIMD"
5319    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5320    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5321 )
5322
5323 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5324   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5325         (truncate:<VNARROWQ>
5326           (smin:VQN
5327             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5328                       (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
5329             (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
5330   "TARGET_SIMD"
5331   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5332   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5333 )
5334
5335 (define_expand "aarch64_sqmovun<mode>"
5336   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5337         (truncate:<VNARROWQ>
5338           (smin:VQN
5339             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5340                       (match_dup 2))
5341             (match_dup 3))))]
5342   "TARGET_SIMD"
5343   {
5344     operands[2] = CONST0_RTX (<MODE>mode);
5345     operands[3]
5346       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5347                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5348   }
5349 )
5350
5351 (define_insn "aarch64_sqxtun2<mode>_le"
5352   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5353         (vec_concat:<VNARROWQ2>
5354           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5355           (truncate:<VNARROWQ>
5356             (smin:VQN
5357               (smax:VQN
5358                 (match_operand:VQN 2 "register_operand" "w")
5359                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5360               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
5361   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5362   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5363    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5364 )
5365
5366 (define_insn "aarch64_sqxtun2<mode>_be"
5367   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5368         (vec_concat:<VNARROWQ2>
5369           (truncate:<VNARROWQ>
5370             (smin:VQN
5371               (smax:VQN
5372                 (match_operand:VQN 2 "register_operand" "w")
5373                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5374               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
5375           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5376   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5377   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5378    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5379 )
5380
5381 (define_expand "aarch64_sqxtun2<mode>"
5382   [(match_operand:<VNARROWQ2> 0 "register_operand")
5383    (match_operand:<VNARROWQ> 1 "register_operand")
5384    (match_operand:VQN 2 "register_operand")]
5385   "TARGET_SIMD"
5386   {
5387     rtx zeros = CONST0_RTX (<MODE>mode);
5388     rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5389                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5390     if (BYTES_BIG_ENDIAN)
5391       emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5392                                                operands[2], zeros, half_umax));
5393     else
5394       emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5395                                                operands[2], zeros, half_umax));
5396     DONE;
5397   }
5398 )
5399
5400 ;; <su>q<absneg>
5401
5402 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5403   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5404         (UNQOPS:VSDQ_I
5405           (match_operand:VSDQ_I 1 "register_operand" "w")))]
5406   "TARGET_SIMD"
5407   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5408   [(set_attr "type" "neon_<optab><q>")]
5409 )
5410
5411 ;; sq<r>dmulh.
5412
5413 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5414   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5415         (unspec:VSDQ_HSI
5416           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5417            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5418          VQDMULH))]
5419   "TARGET_SIMD"
5420   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5421   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5422 )
5423
5424 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5425   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5426         (unspec:VDQHS
5427           [(match_operand:VDQHS 1 "register_operand" "w")
5428            (vec_duplicate:VDQHS
5429              (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5430          VQDMULH))]
5431   "TARGET_SIMD"
5432   "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5433   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5434 )
5435
5436 ;; sq<r>dmulh_lane
5437
5438 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5439   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5440         (unspec:VDQHS
5441           [(match_operand:VDQHS 1 "register_operand" "w")
5442            (vec_select:<VEL>
5443              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5444              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5445          VQDMULH))]
5446   "TARGET_SIMD"
5447   "*
5448    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5449    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5450   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5451 )
5452
5453 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5454   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5455         (unspec:VDQHS
5456           [(match_operand:VDQHS 1 "register_operand" "w")
5457            (vec_select:<VEL>
5458              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5459              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5460          VQDMULH))]
5461   "TARGET_SIMD"
5462   "*
5463    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5464    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5465   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5466 )
5467
5468 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5469   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5470         (unspec:SD_HSI
5471           [(match_operand:SD_HSI 1 "register_operand" "w")
5472            (vec_select:<VEL>
5473              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5474              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5475          VQDMULH))]
5476   "TARGET_SIMD"
5477   "*
5478    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5479    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5480   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5481 )
5482
5483 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5484   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5485         (unspec:SD_HSI
5486           [(match_operand:SD_HSI 1 "register_operand" "w")
5487            (vec_select:<VEL>
5488              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5489              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5490          VQDMULH))]
5491   "TARGET_SIMD"
5492   "*
5493    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5494    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5495   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5496 )
5497
5498 ;; sqrdml[as]h.
5499
5500 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5501   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5502         (unspec:VSDQ_HSI
5503           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5504            (match_operand:VSDQ_HSI 2 "register_operand" "w")
5505            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5506           SQRDMLH_AS))]
5507    "TARGET_SIMD_RDMA"
5508    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5509    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5510 )
5511
5512 ;; sqrdml[as]h_lane.
5513
5514 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5515   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5516         (unspec:VDQHS
5517           [(match_operand:VDQHS 1 "register_operand" "0")
5518            (match_operand:VDQHS 2 "register_operand" "w")
5519            (vec_select:<VEL>
5520              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5521              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5522           SQRDMLH_AS))]
5523    "TARGET_SIMD_RDMA"
5524    {
5525      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5526      return
5527       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5528    }
5529    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5530 )
5531
5532 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5533   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5534         (unspec:SD_HSI
5535           [(match_operand:SD_HSI 1 "register_operand" "0")
5536            (match_operand:SD_HSI 2 "register_operand" "w")
5537            (vec_select:<VEL>
5538              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5539              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5540           SQRDMLH_AS))]
5541    "TARGET_SIMD_RDMA"
5542    {
5543      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5544      return
5545       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5546    }
5547    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5548 )
5549
5550 ;; sqrdml[as]h_laneq.
5551
5552 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5553   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5554         (unspec:VDQHS
5555           [(match_operand:VDQHS 1 "register_operand" "0")
5556            (match_operand:VDQHS 2 "register_operand" "w")
5557            (vec_select:<VEL>
5558              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5559              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5560           SQRDMLH_AS))]
5561    "TARGET_SIMD_RDMA"
5562    {
5563      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5564      return
5565       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5566    }
5567    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5568 )
5569
5570 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5571   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5572         (unspec:SD_HSI
5573           [(match_operand:SD_HSI 1 "register_operand" "0")
5574            (match_operand:SD_HSI 2 "register_operand" "w")
5575            (vec_select:<VEL>
5576              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5577              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5578           SQRDMLH_AS))]
5579    "TARGET_SIMD_RDMA"
5580    {
5581      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5582      return
5583       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5584    }
5585    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5586 )
5587
5588 ;; vqdml[sa]l
5589
5590 (define_insn "aarch64_sqdmlal<mode>"
5591   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5592         (ss_plus:<VWIDE>
5593           (ss_ashift:<VWIDE>
5594               (mult:<VWIDE>
5595                 (sign_extend:<VWIDE>
5596                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5597                 (sign_extend:<VWIDE>
5598                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5599               (const_int 1))
5600           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5601   "TARGET_SIMD"
5602   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5603   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5604 )
5605
5606 (define_insn "aarch64_sqdmlsl<mode>"
5607   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5608         (ss_minus:<VWIDE>
5609           (match_operand:<VWIDE> 1 "register_operand" "0")
5610           (ss_ashift:<VWIDE>
5611               (mult:<VWIDE>
5612                 (sign_extend:<VWIDE>
5613                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5614                 (sign_extend:<VWIDE>
5615                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5616               (const_int 1))))]
5617   "TARGET_SIMD"
5618   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5619   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5620 )
5621
5622 ;; vqdml[sa]l_lane
5623
5624 (define_insn "aarch64_sqdmlal_lane<mode>"
5625   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5626         (ss_plus:<VWIDE>
5627           (ss_ashift:<VWIDE>
5628             (mult:<VWIDE>
5629               (sign_extend:<VWIDE>
5630                 (match_operand:VD_HSI 2 "register_operand" "w"))
5631               (vec_duplicate:<VWIDE>
5632                 (sign_extend:<VWIDE_S>
5633                   (vec_select:<VEL>
5634                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5635                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5636               ))
5637             (const_int 1))
5638           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5639   "TARGET_SIMD"
5640   {
5641     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5642     return
5643       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5644   }
5645   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5646 )
5647
5648 (define_insn "aarch64_sqdmlsl_lane<mode>"
5649   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5650         (ss_minus:<VWIDE>
5651           (match_operand:<VWIDE> 1 "register_operand" "0")
5652           (ss_ashift:<VWIDE>
5653             (mult:<VWIDE>
5654               (sign_extend:<VWIDE>
5655                 (match_operand:VD_HSI 2 "register_operand" "w"))
5656               (vec_duplicate:<VWIDE>
5657                 (sign_extend:<VWIDE_S>
5658                   (vec_select:<VEL>
5659                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5660                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5661               ))
5662             (const_int 1))))]
5663   "TARGET_SIMD"
5664   {
5665     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5666     return
5667       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5668   }
5669   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5670 )
5671
5672
5673 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5674   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5675         (ss_minus:<VWIDE>
5676           (match_operand:<VWIDE> 1 "register_operand" "0")
5677           (ss_ashift:<VWIDE>
5678             (mult:<VWIDE>
5679               (sign_extend:<VWIDE>
5680                 (match_operand:VD_HSI 2 "register_operand" "w"))
5681               (vec_duplicate:<VWIDE>
5682                 (sign_extend:<VWIDE_S>
5683                   (vec_select:<VEL>
5684                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5685                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5686               ))
5687             (const_int 1))))]
5688   "TARGET_SIMD"
5689   {
5690     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5691     return
5692       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5693   }
5694   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5695 )
5696
5697 (define_insn "aarch64_sqdmlal_laneq<mode>"
5698   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5699         (ss_plus:<VWIDE>
5700           (ss_ashift:<VWIDE>
5701             (mult:<VWIDE>
5702               (sign_extend:<VWIDE>
5703                 (match_operand:VD_HSI 2 "register_operand" "w"))
5704               (vec_duplicate:<VWIDE>
5705                 (sign_extend:<VWIDE_S>
5706                   (vec_select:<VEL>
5707                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5708                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5709               ))
5710             (const_int 1))
5711           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5712   "TARGET_SIMD"
5713   {
5714     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5715     return
5716       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5717   }
5718   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5719 )
5720
5721
5722 (define_insn "aarch64_sqdmlal_lane<mode>"
5723   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5724         (ss_plus:<VWIDE>
5725           (ss_ashift:<VWIDE>
5726             (mult:<VWIDE>
5727               (sign_extend:<VWIDE>
5728                 (match_operand:SD_HSI 2 "register_operand" "w"))
5729               (sign_extend:<VWIDE>
5730                 (vec_select:<VEL>
5731                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5732                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5733               )
5734             (const_int 1))
5735           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5736   "TARGET_SIMD"
5737   {
5738     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5739     return
5740       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5741   }
5742   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5743 )
5744
5745 (define_insn "aarch64_sqdmlsl_lane<mode>"
5746   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5747         (ss_minus:<VWIDE>
5748           (match_operand:<VWIDE> 1 "register_operand" "0")
5749           (ss_ashift:<VWIDE>
5750             (mult:<VWIDE>
5751               (sign_extend:<VWIDE>
5752                 (match_operand:SD_HSI 2 "register_operand" "w"))
5753               (sign_extend:<VWIDE>
5754                 (vec_select:<VEL>
5755                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5756                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5757               )
5758             (const_int 1))))]
5759   "TARGET_SIMD"
5760   {
5761     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5762     return
5763       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5764   }
5765   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5766 )
5767
5768
5769 (define_insn "aarch64_sqdmlal_laneq<mode>"
5770   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5771         (ss_plus:<VWIDE>
5772           (ss_ashift:<VWIDE>
5773             (mult:<VWIDE>
5774               (sign_extend:<VWIDE>
5775                 (match_operand:SD_HSI 2 "register_operand" "w"))
5776               (sign_extend:<VWIDE>
5777                 (vec_select:<VEL>
5778                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5779                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5780               )
5781             (const_int 1))
5782           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5783   "TARGET_SIMD"
5784   {
5785     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5786     return
5787       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5788   }
5789   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5790 )
5791
5792 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5793   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5794         (ss_minus:<VWIDE>
5795           (match_operand:<VWIDE> 1 "register_operand" "0")
5796           (ss_ashift:<VWIDE>
5797             (mult:<VWIDE>
5798               (sign_extend:<VWIDE>
5799                 (match_operand:SD_HSI 2 "register_operand" "w"))
5800               (sign_extend:<VWIDE>
5801                 (vec_select:<VEL>
5802                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5803                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5804               )
5805             (const_int 1))))]
5806   "TARGET_SIMD"
5807   {
5808     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5809     return
5810       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5811   }
5812   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5813 )
5814
5815 ;; vqdml[sa]l_n
5816
5817 (define_insn "aarch64_sqdmlsl_n<mode>"
5818   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5819         (ss_minus:<VWIDE>
5820           (match_operand:<VWIDE> 1 "register_operand" "0")
5821           (ss_ashift:<VWIDE>
5822               (mult:<VWIDE>
5823                 (sign_extend:<VWIDE>
5824                       (match_operand:VD_HSI 2 "register_operand" "w"))
5825                 (vec_duplicate:<VWIDE>
5826                   (sign_extend:<VWIDE_S>
5827                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5828               (const_int 1))))]
5829   "TARGET_SIMD"
5830   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5831   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5832 )
5833
5834 (define_insn "aarch64_sqdmlal_n<mode>"
5835   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5836         (ss_plus:<VWIDE>
5837           (ss_ashift:<VWIDE>
5838               (mult:<VWIDE>
5839                 (sign_extend:<VWIDE>
5840                       (match_operand:VD_HSI 2 "register_operand" "w"))
5841                 (vec_duplicate:<VWIDE>
5842                   (sign_extend:<VWIDE_S>
5843                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5844               (const_int 1))
5845           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5846   "TARGET_SIMD"
5847   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5848   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5849 )
5850
5851
5852 ;; sqdml[as]l2
5853
5854 (define_insn "aarch64_sqdmlal2<mode>_internal"
5855   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5856         (ss_plus:<VWIDE>
5857          (ss_ashift:<VWIDE>
5858              (mult:<VWIDE>
5859                (sign_extend:<VWIDE>
5860                  (vec_select:<VHALF>
5861                      (match_operand:VQ_HSI 2 "register_operand" "w")
5862                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5863                (sign_extend:<VWIDE>
5864                  (vec_select:<VHALF>
5865                      (match_operand:VQ_HSI 3 "register_operand" "w")
5866                      (match_dup 4))))
5867              (const_int 1))
5868           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5869   "TARGET_SIMD"
5870   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5871   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5872 )
5873
5874 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5875   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5876         (ss_minus:<VWIDE>
5877          (match_operand:<VWIDE> 1 "register_operand" "0")
5878          (ss_ashift:<VWIDE>
5879              (mult:<VWIDE>
5880                (sign_extend:<VWIDE>
5881                  (vec_select:<VHALF>
5882                      (match_operand:VQ_HSI 2 "register_operand" "w")
5883                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5884                (sign_extend:<VWIDE>
5885                  (vec_select:<VHALF>
5886                      (match_operand:VQ_HSI 3 "register_operand" "w")
5887                      (match_dup 4))))
5888              (const_int 1))))]
5889   "TARGET_SIMD"
5890   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5891   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5892 )
5893
5894 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5895   [(match_operand:<VWIDE> 0 "register_operand")
5896    (SBINQOPS:<VWIDE>
5897      (match_operand:<VWIDE> 1 "register_operand")
5898      (match_dup 1))
5899    (match_operand:VQ_HSI 2 "register_operand")
5900    (match_operand:VQ_HSI 3 "register_operand")]
5901   "TARGET_SIMD"
5902 {
5903   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5904   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5905                                                 operands[1], operands[2],
5906                                                 operands[3], p));
5907   DONE;
5908 })
5909
5910 ;; vqdml[sa]l2_lane
5911
5912 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5913   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5914         (ss_minus:<VWIDE>
5915           (match_operand:<VWIDE> 1 "register_operand" "0")
5916           (ss_ashift:<VWIDE>
5917               (mult:<VWIDE>
5918                 (sign_extend:<VWIDE>
5919                   (vec_select:<VHALF>
5920                     (match_operand:VQ_HSI 2 "register_operand" "w")
5921                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5922                 (vec_duplicate:<VWIDE>
5923                   (sign_extend:<VWIDE_S>
5924                     (vec_select:<VEL>
5925                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5926                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5927                     ))))
5928               (const_int 1))))]
5929   "TARGET_SIMD"
5930   {
5931     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5932     return
5933      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5934   }
5935   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5936 )
5937
5938 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5939   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5940         (ss_plus:<VWIDE>
5941           (ss_ashift:<VWIDE>
5942               (mult:<VWIDE>
5943                 (sign_extend:<VWIDE>
5944                   (vec_select:<VHALF>
5945                     (match_operand:VQ_HSI 2 "register_operand" "w")
5946                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5947                 (vec_duplicate:<VWIDE>
5948                   (sign_extend:<VWIDE_S>
5949                     (vec_select:<VEL>
5950                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5951                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5952                     ))))
5953               (const_int 1))
5954           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5955   "TARGET_SIMD"
5956   {
5957     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5958     return
5959      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5960   }
5961   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5962 )
5963
5964 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5965   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5966         (ss_minus:<VWIDE>
5967           (match_operand:<VWIDE> 1 "register_operand" "0")
5968           (ss_ashift:<VWIDE>
5969               (mult:<VWIDE>
5970                 (sign_extend:<VWIDE>
5971                   (vec_select:<VHALF>
5972                     (match_operand:VQ_HSI 2 "register_operand" "w")
5973                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5974                 (vec_duplicate:<VWIDE>
5975                   (sign_extend:<VWIDE_S>
5976                     (vec_select:<VEL>
5977                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5978                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5979                     ))))
5980               (const_int 1))))]
5981   "TARGET_SIMD"
5982   {
5983     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5984     return
5985      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5986   }
5987   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5988 )
5989
5990 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5991   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5992         (ss_plus:<VWIDE>
5993           (ss_ashift:<VWIDE>
5994               (mult:<VWIDE>
5995                 (sign_extend:<VWIDE>
5996                   (vec_select:<VHALF>
5997                     (match_operand:VQ_HSI 2 "register_operand" "w")
5998                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5999                 (vec_duplicate:<VWIDE>
6000                   (sign_extend:<VWIDE_S>
6001                     (vec_select:<VEL>
6002                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
6003                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6004                     ))))
6005               (const_int 1))
6006           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6007   "TARGET_SIMD"
6008   {
6009     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
6010     return
6011      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6012   }
6013   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6014 )
6015
6016 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
6017   [(match_operand:<VWIDE> 0 "register_operand")
6018    (SBINQOPS:<VWIDE>
6019      (match_operand:<VWIDE> 1 "register_operand")
6020      (match_dup 1))
6021    (match_operand:VQ_HSI 2 "register_operand")
6022    (match_operand:<VCOND> 3 "register_operand")
6023    (match_operand:SI 4 "immediate_operand")]
6024   "TARGET_SIMD"
6025 {
6026   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6027   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
6028                                                 operands[1], operands[2],
6029                                                 operands[3], operands[4], p));
6030   DONE;
6031 })
6032
6033 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6034   [(match_operand:<VWIDE> 0 "register_operand")
6035    (SBINQOPS:<VWIDE>
6036      (match_operand:<VWIDE> 1 "register_operand")
6037      (match_dup 1))
6038    (match_operand:VQ_HSI 2 "register_operand")
6039    (match_operand:<VCONQ> 3 "register_operand")
6040    (match_operand:SI 4 "immediate_operand")]
6041   "TARGET_SIMD"
6042 {
6043   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6044   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
6045                                                 operands[1], operands[2],
6046                                                 operands[3], operands[4], p));
6047   DONE;
6048 })
6049
6050 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6051   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6052         (ss_minus:<VWIDE>
6053           (match_operand:<VWIDE> 1 "register_operand" "0")
6054           (ss_ashift:<VWIDE>
6055             (mult:<VWIDE>
6056               (sign_extend:<VWIDE>
6057                 (vec_select:<VHALF>
6058                   (match_operand:VQ_HSI 2 "register_operand" "w")
6059                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6060               (vec_duplicate:<VWIDE>
6061                 (sign_extend:<VWIDE_S>
6062                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6063             (const_int 1))))]
6064   "TARGET_SIMD"
6065   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6066   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6067 )
6068
6069 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6070   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6071         (ss_plus:<VWIDE>
6072           (ss_ashift:<VWIDE>
6073             (mult:<VWIDE>
6074               (sign_extend:<VWIDE>
6075                 (vec_select:<VHALF>
6076                   (match_operand:VQ_HSI 2 "register_operand" "w")
6077                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6078               (vec_duplicate:<VWIDE>
6079                 (sign_extend:<VWIDE_S>
6080                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6081             (const_int 1))
6082           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6083   "TARGET_SIMD"
6084   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6085   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6086 )
6087
6088 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6089   [(match_operand:<VWIDE> 0 "register_operand")
6090    (SBINQOPS:<VWIDE>
6091      (match_operand:<VWIDE> 1 "register_operand")
6092      (match_dup 1))
6093    (match_operand:VQ_HSI 2 "register_operand")
6094    (match_operand:<VEL> 3 "register_operand")]
6095   "TARGET_SIMD"
6096 {
6097   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6098   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
6099                                                 operands[1], operands[2],
6100                                                 operands[3], p));
6101   DONE;
6102 })
6103
6104 ;; vqdmull
6105
6106 (define_insn "aarch64_sqdmull<mode>"
6107   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6108         (ss_ashift:<VWIDE>
6109              (mult:<VWIDE>
6110                (sign_extend:<VWIDE>
6111                      (match_operand:VSD_HSI 1 "register_operand" "w"))
6112                (sign_extend:<VWIDE>
6113                      (match_operand:VSD_HSI 2 "register_operand" "w")))
6114              (const_int 1)))]
6115   "TARGET_SIMD"
6116   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6117   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6118 )
6119
6120 ;; vqdmull_lane
6121
6122 (define_insn "aarch64_sqdmull_lane<mode>"
6123   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6124         (ss_ashift:<VWIDE>
6125              (mult:<VWIDE>
6126                (sign_extend:<VWIDE>
6127                  (match_operand:VD_HSI 1 "register_operand" "w"))
6128                (vec_duplicate:<VWIDE>
6129                  (sign_extend:<VWIDE_S>
6130                    (vec_select:<VEL>
6131                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6132                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6133                ))
6134              (const_int 1)))]
6135   "TARGET_SIMD"
6136   {
6137     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6138     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6139   }
6140   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6141 )
6142
6143 (define_insn "aarch64_sqdmull_laneq<mode>"
6144   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6145         (ss_ashift:<VWIDE>
6146              (mult:<VWIDE>
6147                (sign_extend:<VWIDE>
6148                  (match_operand:VD_HSI 1 "register_operand" "w"))
6149                (vec_duplicate:<VWIDE>
6150                  (sign_extend:<VWIDE_S>
6151                    (vec_select:<VEL>
6152                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6153                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6154                ))
6155              (const_int 1)))]
6156   "TARGET_SIMD"
6157   {
6158     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6159     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6160   }
6161   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6162 )
6163
6164 (define_insn "aarch64_sqdmull_lane<mode>"
6165   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6166         (ss_ashift:<VWIDE>
6167              (mult:<VWIDE>
6168                (sign_extend:<VWIDE>
6169                  (match_operand:SD_HSI 1 "register_operand" "w"))
6170                (sign_extend:<VWIDE>
6171                  (vec_select:<VEL>
6172                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6173                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6174                ))
6175              (const_int 1)))]
6176   "TARGET_SIMD"
6177   {
6178     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6179     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6180   }
6181   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6182 )
6183
6184 (define_insn "aarch64_sqdmull_laneq<mode>"
6185   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6186         (ss_ashift:<VWIDE>
6187              (mult:<VWIDE>
6188                (sign_extend:<VWIDE>
6189                  (match_operand:SD_HSI 1 "register_operand" "w"))
6190                (sign_extend:<VWIDE>
6191                  (vec_select:<VEL>
6192                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6193                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6194                ))
6195              (const_int 1)))]
6196   "TARGET_SIMD"
6197   {
6198     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6199     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6200   }
6201   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6202 )
6203
6204 ;; vqdmull_n
6205
6206 (define_insn "aarch64_sqdmull_n<mode>"
6207   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6208         (ss_ashift:<VWIDE>
6209              (mult:<VWIDE>
6210                (sign_extend:<VWIDE>
6211                  (match_operand:VD_HSI 1 "register_operand" "w"))
6212                (vec_duplicate:<VWIDE>
6213                  (sign_extend:<VWIDE_S>
6214                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6215                )
6216              (const_int 1)))]
6217   "TARGET_SIMD"
6218   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6219   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6220 )
6221
6222 ;; vqdmull2
6223
6224 (define_insn "aarch64_sqdmull2<mode>_internal"
6225   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6226         (ss_ashift:<VWIDE>
6227              (mult:<VWIDE>
6228                (sign_extend:<VWIDE>
6229                  (vec_select:<VHALF>
6230                    (match_operand:VQ_HSI 1 "register_operand" "w")
6231                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6232                (sign_extend:<VWIDE>
6233                  (vec_select:<VHALF>
6234                    (match_operand:VQ_HSI 2 "register_operand" "w")
6235                    (match_dup 3)))
6236                )
6237              (const_int 1)))]
6238   "TARGET_SIMD"
6239   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6240   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6241 )
6242
6243 (define_expand "aarch64_sqdmull2<mode>"
6244   [(match_operand:<VWIDE> 0 "register_operand")
6245    (match_operand:VQ_HSI 1 "register_operand")
6246    (match_operand:VQ_HSI 2 "register_operand")]
6247   "TARGET_SIMD"
6248 {
6249   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6250   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6251                                                   operands[2], p));
6252   DONE;
6253 })
6254
6255 ;; vqdmull2_lane
6256
6257 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6258   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6259         (ss_ashift:<VWIDE>
6260              (mult:<VWIDE>
6261                (sign_extend:<VWIDE>
6262                  (vec_select:<VHALF>
6263                    (match_operand:VQ_HSI 1 "register_operand" "w")
6264                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6265                (vec_duplicate:<VWIDE>
6266                  (sign_extend:<VWIDE_S>
6267                    (vec_select:<VEL>
6268                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6269                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6270                ))
6271              (const_int 1)))]
6272   "TARGET_SIMD"
6273   {
6274     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6275     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6276   }
6277   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6278 )
6279
6280 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6281   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6282         (ss_ashift:<VWIDE>
6283              (mult:<VWIDE>
6284                (sign_extend:<VWIDE>
6285                  (vec_select:<VHALF>
6286                    (match_operand:VQ_HSI 1 "register_operand" "w")
6287                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6288                (vec_duplicate:<VWIDE>
6289                  (sign_extend:<VWIDE_S>
6290                    (vec_select:<VEL>
6291                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6292                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6293                ))
6294              (const_int 1)))]
6295   "TARGET_SIMD"
6296   {
6297     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6298     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6299   }
6300   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6301 )
6302
6303 (define_expand "aarch64_sqdmull2_lane<mode>"
6304   [(match_operand:<VWIDE> 0 "register_operand")
6305    (match_operand:VQ_HSI 1 "register_operand")
6306    (match_operand:<VCOND> 2 "register_operand")
6307    (match_operand:SI 3 "immediate_operand")]
6308   "TARGET_SIMD"
6309 {
6310   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6311   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6312                                                        operands[2], operands[3],
6313                                                        p));
6314   DONE;
6315 })
6316
6317 (define_expand "aarch64_sqdmull2_laneq<mode>"
6318   [(match_operand:<VWIDE> 0 "register_operand")
6319    (match_operand:VQ_HSI 1 "register_operand")
6320    (match_operand:<VCONQ> 2 "register_operand")
6321    (match_operand:SI 3 "immediate_operand")]
6322   "TARGET_SIMD"
6323 {
6324   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6325   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6326                                                        operands[2], operands[3],
6327                                                        p));
6328   DONE;
6329 })
6330
6331 ;; vqdmull2_n
6332
6333 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6334   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6335         (ss_ashift:<VWIDE>
6336              (mult:<VWIDE>
6337                (sign_extend:<VWIDE>
6338                  (vec_select:<VHALF>
6339                    (match_operand:VQ_HSI 1 "register_operand" "w")
6340                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6341                (vec_duplicate:<VWIDE>
6342                  (sign_extend:<VWIDE_S>
6343                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6344                )
6345              (const_int 1)))]
6346   "TARGET_SIMD"
6347   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6348   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6349 )
6350
6351 (define_expand "aarch64_sqdmull2_n<mode>"
6352   [(match_operand:<VWIDE> 0 "register_operand")
6353    (match_operand:VQ_HSI 1 "register_operand")
6354    (match_operand:<VEL> 2 "register_operand")]
6355   "TARGET_SIMD"
6356 {
6357   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6358   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6359                                                     operands[2], p));
6360   DONE;
6361 })
6362
6363 ;; vshl
6364
6365 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6366   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6367         (unspec:VSDQ_I_DI
6368           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6369            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6370          VSHL))]
6371   "TARGET_SIMD"
6372   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6373   [(set_attr "type" "neon_shift_reg<q>")]
6374 )
6375
6376
6377 ;; vqshl
6378
6379 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6380   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6381         (unspec:VSDQ_I
6382           [(match_operand:VSDQ_I 1 "register_operand" "w")
6383            (match_operand:VSDQ_I 2 "register_operand" "w")]
6384          VQSHL))]
6385   "TARGET_SIMD"
6386   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6387   [(set_attr "type" "neon_sat_shift_reg<q>")]
6388 )
6389
6390 ;; vshll_n
6391
6392 (define_insn "aarch64_<su>shll<mode>"
6393   [(set (match_operand:<VWIDE> 0 "register_operand")
6394         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6395                             (match_operand:VD_BHSI 1 "register_operand"))
6396                          (match_operand:<VWIDE> 2
6397                            "aarch64_simd_shll_imm_vec")))]
6398   "TARGET_SIMD"
6399   {@ [cons: =0, 1, 2]
6400      [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6401      [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6402   }
6403   [(set_attr "type" "neon_shift_imm_long")]
6404 )
6405
6406 (define_expand "aarch64_<sur>shll_n<mode>"
6407   [(set (match_operand:<VWIDE> 0 "register_operand")
6408         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
6409                          (match_operand:SI 2
6410                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6411                          VSHLL))]
6412   "TARGET_SIMD"
6413   {
6414     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6415     emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
6416     DONE;
6417   }
6418 )
6419
6420 ;; vshll_high_n
6421
6422 (define_insn "aarch64_<su>shll2<mode>"
6423   [(set (match_operand:<VWIDE> 0 "register_operand")
6424         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6425                           (vec_select:<VHALF>
6426                             (match_operand:VQW 1 "register_operand")
6427                             (match_operand:VQW 2 "vect_par_cnst_hi_half")))
6428                          (match_operand:<VWIDE> 3
6429                            "aarch64_simd_shll_imm_vec")))]
6430   "TARGET_SIMD"
6431   {@ [cons: =0, 1, 2, 3]
6432      [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6433      [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6434   }
6435   [(set_attr "type" "neon_shift_imm_long")]
6436 )
6437
6438 (define_expand "aarch64_<sur>shll2_n<mode>"
6439   [(set (match_operand:<VWIDE> 0 "register_operand")
6440         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
6441                          (match_operand:SI 2
6442                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6443                          VSHLL))]
6444   "TARGET_SIMD"
6445   {
6446     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6447     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6448     emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
6449     DONE;
6450   }
6451 )
6452
6453 ;; vrshr_n
6454
6455 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6456   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6457         (truncate:VSDQ_I_DI
6458           (SHIFTRT:<V2XWIDE>
6459             (plus:<V2XWIDE>
6460               (<SHIFTEXTEND>:<V2XWIDE>
6461                 (match_operand:VSDQ_I_DI 1 "register_operand" "w"))
6462               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6463             (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6464   "TARGET_SIMD
6465    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6466   "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6467   [(set_attr "type" "neon_sat_shift_imm<q>")]
6468 )
6469
6470 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6471   [(match_operand:VSDQ_I_DI 0 "register_operand")
6472    (SHIFTRT:VSDQ_I_DI
6473      (match_operand:VSDQ_I_DI 1 "register_operand")
6474      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6475   "TARGET_SIMD"
6476   {
6477     /* Use this expander to create the rounding constant vector, which is
6478        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6479        RTL is generated when handling the DImode expanders.  */
6480     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6481     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6482     rtx shft = gen_int_mode (INTVAL (operands[2]), DImode);
6483     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6484     if (VECTOR_MODE_P (<MODE>mode))
6485       {
6486         shft = gen_const_vec_duplicate (<MODE>mode, shft);
6487         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6488       }
6489
6490     emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1],
6491                                                       shft, rnd));
6492     DONE;
6493   }
6494 )
6495
6496 ;; v(r)sra_n
6497
6498 (define_insn "aarch64_<sur>sra_ndi"
6499   [(set (match_operand:DI 0 "register_operand" "=w")
6500        (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6501                       (match_operand:DI 2 "register_operand" "w")
6502                        (match_operand:SI 3
6503                         "aarch64_simd_shift_imm_offset_di" "i")]
6504                       VSRA))]
6505   "TARGET_SIMD"
6506   "<sur>sra\\t%d0, %d2, %3"
6507   [(set_attr "type" "neon_shift_acc")]
6508 )
6509
6510 ;; vs<lr>i_n
6511
6512 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6513   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6514         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6515                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6516                        (match_operand:SI 3
6517                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6518                       VSLRI))]
6519   "TARGET_SIMD"
6520   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6521   [(set_attr "type" "neon_shift_imm<q>")]
6522 )
6523
6524 ;; vqshl(u)
6525
6526 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6527   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6528         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6529                        (match_operand:SI 2
6530                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
6531                       VQSHL_N))]
6532   "TARGET_SIMD"
6533   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6534   [(set_attr "type" "neon_sat_shift_imm<q>")]
6535 )
6536
6537
6538 ;; vq(r)shr(u)n_n
6539
6540 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6541   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6542         (SAT_TRUNC:<VNARROWQ>
6543           (<TRUNC_SHIFT>:SD_HSDI
6544             (match_operand:SD_HSDI 1 "register_operand" "w")
6545             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6546   "TARGET_SIMD"
6547   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6548   [(set_attr "type" "neon_shift_imm_narrow_q")]
6549 )
6550
6551 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6552   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6553         (ALL_TRUNC:<VNARROWQ>
6554           (SHIFTRT:VQN
6555             (match_operand:VQN 1 "register_operand" "w")
6556             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6557   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6558   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6559   [(set_attr "type" "neon_shift_imm_narrow_q")]
6560 )
6561
6562 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6563   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6564         (ALL_TRUNC:<VNARROWQ>
6565           (<TRUNC_SHIFT>:VQN
6566             (match_operand:VQN 1 "register_operand")
6567             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6568   "TARGET_SIMD"
6569   {
6570     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6571                                                  INTVAL (operands[2]));
6572   }
6573 )
6574
6575 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6576   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6577         (ALL_TRUNC:<VNARROWQ>
6578           (<TRUNC_SHIFT>:<V2XWIDE>
6579             (plus:<V2XWIDE>
6580               (<TRUNCEXTEND>:<V2XWIDE>
6581                 (match_operand:VQN 1 "register_operand" "w"))
6582               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6583             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6584   "TARGET_SIMD
6585    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6586   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6587   [(set_attr "type" "neon_shift_imm_narrow_q")]
6588 )
6589
6590 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6591   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6592         (SAT_TRUNC:<VNARROWQ>
6593           (<TRUNC_SHIFT>:<DWI>
6594             (plus:<DWI>
6595               (<TRUNCEXTEND>:<DWI>
6596                 (match_operand:SD_HSDI 1 "register_operand" "w"))
6597               (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6598             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6599   "TARGET_SIMD
6600    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6601   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6602   [(set_attr "type" "neon_shift_imm_narrow_q")]
6603 )
6604
6605 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6606   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6607         (SAT_TRUNC:<VNARROWQ>
6608           (<TRUNC_SHIFT>:<V2XWIDE>
6609             (plus:<V2XWIDE>
6610               (<TRUNCEXTEND>:<V2XWIDE>
6611                 (match_operand:SD_HSDI 1 "register_operand"))
6612               (match_dup 3))
6613             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6614   "TARGET_SIMD"
6615   {
6616     /* Use this expander to create the rounding constant vector, which is
6617        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6618        RTL is generated when handling the DImode expanders.  */
6619     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6620     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6621     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6622   }
6623 )
6624
6625 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6626   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6627         (ALL_TRUNC:<VNARROWQ>
6628           (<TRUNC_SHIFT>:<V2XWIDE>
6629             (plus:<V2XWIDE>
6630               (<TRUNCEXTEND>:<V2XWIDE>
6631                 (match_operand:VQN 1 "register_operand"))
6632               (match_dup 3))
6633             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6634   "TARGET_SIMD"
6635   {
6636     if (<CODE> == TRUNCATE
6637         && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6638       {
6639         rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6640         emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
6641         DONE;
6642       }
6643     /* Use this expander to create the rounding constant vector, which is
6644        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6645        RTL is generated when handling the DImode expanders.  */
6646     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6647     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6648     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6649     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6650     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6651   }
6652 )
6653
6654 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6655   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6656         (truncate:<VNARROWQ>
6657           (smin:VQN
6658             (smax:VQN
6659               (ashiftrt:VQN
6660                 (match_operand:VQN 1 "register_operand" "w")
6661                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6662               (match_operand:VQN 3 "aarch64_simd_imm_zero"))
6663             (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
6664   "TARGET_SIMD"
6665   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6666   [(set_attr "type" "neon_shift_imm_narrow_q")]
6667 )
6668
6669 (define_insn "aarch64_sqshrun_n<mode>_insn"
6670   [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
6671         (smin:SD_HSDI
6672           (smax:SD_HSDI
6673             (ashiftrt:SD_HSDI
6674               (match_operand:SD_HSDI 1 "register_operand" "w")
6675               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6676             (const_int 0))
6677           (const_int <half_mask>)))]
6678   "TARGET_SIMD"
6679   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6680   [(set_attr "type" "neon_shift_imm_narrow_q")]
6681 )
6682
6683 (define_expand "aarch64_sqshrun_n<mode>"
6684   [(match_operand:<VNARROWQ> 0 "register_operand")
6685    (match_operand:SD_HSDI 1 "register_operand")
6686    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6687   "TARGET_SIMD"
6688   {
6689     rtx dst = gen_reg_rtx (<MODE>mode);
6690     emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
6691                                                  operands[2]));
6692     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6693     DONE;
6694   }
6695 )
6696
6697 (define_expand "aarch64_sqshrun_n<mode>"
6698   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6699         (truncate:<VNARROWQ>
6700           (smin:VQN
6701             (smax:VQN
6702               (ashiftrt:VQN
6703                 (match_operand:VQN 1 "register_operand")
6704                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6705               (match_dup 3))
6706             (match_dup 4))))]
6707   "TARGET_SIMD"
6708   {
6709     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6710                                                  INTVAL (operands[2]));
6711     operands[3] = CONST0_RTX (<MODE>mode);
6712     operands[4]
6713       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6714                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6715   }
6716 )
6717
6718 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6719   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6720         (truncate:<VNARROWQ>
6721           (smin:<V2XWIDE>
6722             (smax:<V2XWIDE>
6723               (ashiftrt:<V2XWIDE>
6724                 (plus:<V2XWIDE>
6725                   (sign_extend:<V2XWIDE>
6726                     (match_operand:VQN 1 "register_operand" "w"))
6727                   (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6728                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6729               (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
6730             (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
6731   "TARGET_SIMD
6732    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6733   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6734   [(set_attr "type" "neon_shift_imm_narrow_q")]
6735 )
6736
6737 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6738   [(set (match_operand:<DWI> 0 "register_operand" "=w")
6739         (smin:<DWI>
6740           (smax:<DWI>
6741             (ashiftrt:<DWI>
6742               (plus:<DWI>
6743                 (sign_extend:<DWI>
6744                   (match_operand:SD_HSDI 1 "register_operand" "w"))
6745                 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6746               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6747             (const_int 0))
6748           (const_int <half_mask>)))]
6749   "TARGET_SIMD
6750    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6751   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6752   [(set_attr "type" "neon_shift_imm_narrow_q")]
6753 )
6754
6755 (define_expand "aarch64_sqrshrun_n<mode>"
6756   [(match_operand:<VNARROWQ> 0 "register_operand")
6757    (match_operand:SD_HSDI 1 "register_operand")
6758    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6759   "TARGET_SIMD"
6760   {
6761     int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6762     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6763     rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6764     rtx dst = gen_reg_rtx (<DWI>mode);
6765     emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
6766     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6767     DONE;
6768   }
6769 )
6770
6771 (define_expand "aarch64_sqrshrun_n<mode>"
6772   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6773         (truncate:<VNARROWQ>
6774           (smin:<V2XWIDE>
6775             (smax:<V2XWIDE>
6776               (ashiftrt:<V2XWIDE>
6777                 (plus:<V2XWIDE>
6778                   (sign_extend:<V2XWIDE>
6779                     (match_operand:VQN 1 "register_operand"))
6780                   (match_dup 3))
6781                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6782               (match_dup 4))
6783             (match_dup 5))))]
6784   "TARGET_SIMD"
6785   {
6786     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6787     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6788     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6789     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6790     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6791     operands[4] = CONST0_RTX (<V2XWIDE>mode);
6792     operands[5]
6793       = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6794     operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
6795   }
6796 )
6797
6798 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6799   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6800         (vec_concat:<VNARROWQ2>
6801           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6802           (ALL_TRUNC:<VNARROWQ>
6803             (SHIFTRT:VQN
6804               (match_operand:VQN 2 "register_operand" "w")
6805               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6806   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6807    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6808   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6809   [(set_attr "type" "neon_shift_imm_narrow_q")]
6810 )
6811
6812 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6813   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6814         (vec_concat:<VNARROWQ2>
6815           (ALL_TRUNC:<VNARROWQ>
6816             (SHIFTRT:VQN
6817               (match_operand:VQN 2 "register_operand" "w")
6818               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6819           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6820   "TARGET_SIMD && BYTES_BIG_ENDIAN
6821    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6822   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6823   [(set_attr "type" "neon_shift_imm_narrow_q")]
6824 )
6825
6826 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6827   [(match_operand:<VNARROWQ2> 0 "register_operand")
6828    (match_operand:<VNARROWQ> 1 "register_operand")
6829    (ALL_TRUNC:<VNARROWQ>
6830      (SHIFTRT:VQN (match_operand:VQN 2 "register_operand")))
6831    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6832   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6833   {
6834     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6835                                                  INTVAL (operands[3]));
6836
6837     if (BYTES_BIG_ENDIAN)
6838       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6839                 operands[0], operands[1], operands[2], operands[3]));
6840     else
6841       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6842                 operands[0], operands[1], operands[2], operands[3]));
6843     DONE;
6844   }
6845 )
6846
6847 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6848   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6849         (vec_concat:<VNARROWQ2>
6850           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6851           (ALL_TRUNC:<VNARROWQ>
6852             (<TRUNC_SHIFT>:<V2XWIDE>
6853               (plus:<V2XWIDE>
6854                 (<TRUNCEXTEND>:<V2XWIDE>
6855                   (match_operand:VQN 2 "register_operand" "w"))
6856                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6857               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6858   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6859    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6860   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6861   [(set_attr "type" "neon_shift_imm_narrow_q")]
6862 )
6863
6864 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
6865   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6866         (vec_concat:<VNARROWQ2>
6867           (ALL_TRUNC:<VNARROWQ>
6868             (<TRUNC_SHIFT>:<V2XWIDE>
6869               (plus:<V2XWIDE>
6870                 (<TRUNCEXTEND>:<V2XWIDE>
6871                   (match_operand:VQN 2 "register_operand" "w"))
6872                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6873               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6874           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6875   "TARGET_SIMD && BYTES_BIG_ENDIAN
6876    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6877   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6878   [(set_attr "type" "neon_shift_imm_narrow_q")]
6879 )
6880
6881 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
6882   [(match_operand:<VNARROWQ2> 0 "register_operand")
6883    (match_operand:<VNARROWQ> 1 "register_operand")
6884    (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
6885    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6886   "TARGET_SIMD"
6887   {
6888     if (<CODE> == TRUNCATE
6889         && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6890       {
6891         rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
6892         emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
6893                                               operands[2], tmp));
6894         DONE;
6895       }
6896     /* Use this expander to create the rounding constant vector, which is
6897        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6898        RTL is generated when handling the DImode expanders.  */
6899     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6900     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
6901     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6902     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6903     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
6904     if (BYTES_BIG_ENDIAN)
6905       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
6906                                                               operands[1],
6907                                                               operands[2],
6908                                                               operands[3],
6909                                                               rnd));
6910     else
6911       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
6912                                                               operands[1],
6913                                                               operands[2],
6914                                                               operands[3],
6915                                                               rnd));
6916     DONE;
6917   }
6918 )
6919
6920 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
6921   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6922         (vec_concat:<VNARROWQ2>
6923           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6924           (truncate:<VNARROWQ>
6925             (smin:VQN
6926               (smax:VQN
6927                 (ashiftrt:VQN
6928                   (match_operand:VQN 2 "register_operand" "w")
6929                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6930                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6931               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
6932   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6933   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6934   [(set_attr "type" "neon_shift_imm_narrow_q")]
6935 )
6936
6937 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
6938   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6939         (vec_concat:<VNARROWQ2>
6940           (truncate:<VNARROWQ>
6941             (smin:VQN
6942               (smax:VQN
6943                 (ashiftrt:VQN
6944                   (match_operand:VQN 2 "register_operand" "w")
6945                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6946                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6947               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
6948           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6949   "TARGET_SIMD && BYTES_BIG_ENDIAN"
6950   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6951   [(set_attr "type" "neon_shift_imm_narrow_q")]
6952 )
6953
6954 (define_expand "aarch64_sqshrun2_n<mode>"
6955   [(match_operand:<VNARROWQ2> 0 "register_operand")
6956    (match_operand:<VNARROWQ> 1 "register_operand")
6957    (match_operand:VQN 2 "register_operand")
6958    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6959   "TARGET_SIMD"
6960   {
6961     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6962                                                  INTVAL (operands[3]));
6963     rtx zeros = CONST0_RTX (<MODE>mode);
6964     rtx half_umax
6965       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6966                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6967     if (BYTES_BIG_ENDIAN)
6968       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
6969                                 operands[1], operands[2], operands[3],
6970                                 zeros, half_umax));
6971     else
6972       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
6973                                 operands[1], operands[2], operands[3],
6974                                 zeros, half_umax));
6975     DONE;
6976   }
6977 )
6978
6979 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
6980   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6981         (vec_concat:<VNARROWQ2>
6982           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6983           (truncate:<VNARROWQ>
6984             (smin:<V2XWIDE>
6985               (smax:<V2XWIDE>
6986                 (ashiftrt:<V2XWIDE>
6987                   (plus:<V2XWIDE>
6988                     (sign_extend:<V2XWIDE>
6989                       (match_operand:VQN 2 "register_operand" "w"))
6990                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6991                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6992                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
6993               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
6994   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6995    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6996   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6997   [(set_attr "type" "neon_shift_imm_narrow_q")]
6998 )
6999
7000 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
7001   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7002         (vec_concat:<VNARROWQ2>
7003           (truncate:<VNARROWQ>
7004             (smin:<V2XWIDE>
7005               (smax:<V2XWIDE>
7006                 (ashiftrt:<V2XWIDE>
7007                   (plus:<V2XWIDE>
7008                     (sign_extend:<V2XWIDE>
7009                       (match_operand:VQN 2 "register_operand" "w"))
7010                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7011                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7012                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
7013               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
7014           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7015   "TARGET_SIMD && BYTES_BIG_ENDIAN
7016    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7017   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7018   [(set_attr "type" "neon_shift_imm_narrow_q")]
7019 )
7020
7021 (define_expand "aarch64_sqrshrun2_n<mode>"
7022   [(match_operand:<VNARROWQ2> 0 "register_operand")
7023    (match_operand:<VNARROWQ> 1 "register_operand")
7024    (match_operand:VQN 2 "register_operand")
7025    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7026   "TARGET_SIMD"
7027   {
7028     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7029     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7030     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7031     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7032     rtx zero = CONST0_RTX (<V2XWIDE>mode);
7033     rtx half_umax
7034       = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7035                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7036     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7037     if (BYTES_BIG_ENDIAN)
7038       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
7039                                 operands[1], operands[2], operands[3], rnd,
7040                                 zero, half_umax));
7041     else
7042       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
7043                                 operands[1], operands[2], operands[3], rnd,
7044                                 zero, half_umax));
7045     DONE;
7046   }
7047 )
7048
7049 ;; cm(eq|ge|gt|lt|le)
7050 ;; Note, we have constraints for Dz and Z as different expanders
7051 ;; have different ideas of what should be passed to this pattern.
7052
7053 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7054   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
7055         (neg:<V_INT_EQUIV>
7056           (COMPARISONS:<V_INT_EQUIV>
7057             (match_operand:VDQ_I 1 "register_operand" "w,w")
7058             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
7059           )))]
7060   "TARGET_SIMD"
7061   "@
7062   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7063   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
7064   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
7065 )
7066
7067 (define_insn_and_split "aarch64_cm<optab>di"
7068   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
7069         (neg:DI
7070           (COMPARISONS:DI
7071             (match_operand:DI 1 "register_operand" "w,w,r")
7072             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7073           )))
7074      (clobber (reg:CC CC_REGNUM))]
7075   "TARGET_SIMD"
7076   "#"
7077   "&& reload_completed"
7078   [(set (match_operand:DI 0 "register_operand")
7079         (neg:DI
7080           (COMPARISONS:DI
7081             (match_operand:DI 1 "register_operand")
7082             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7083           )))]
7084   {
7085     /* If we are in the general purpose register file,
7086        we split to a sequence of comparison and store.  */
7087     if (GP_REGNUM_P (REGNO (operands[0]))
7088         && GP_REGNUM_P (REGNO (operands[1])))
7089       {
7090         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
7091         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7092         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7093         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7094         DONE;
7095       }
7096     /* Otherwise, we expand to a similar pattern which does not
7097        clobber CC_REGNUM.  */
7098   }
7099   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7100 )
7101
7102 (define_insn "*aarch64_cm<optab>di"
7103   [(set (match_operand:DI 0 "register_operand" "=w,w")
7104         (neg:DI
7105           (COMPARISONS:DI
7106             (match_operand:DI 1 "register_operand" "w,w")
7107             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
7108           )))]
7109   "TARGET_SIMD && reload_completed"
7110   "@
7111   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
7112   cm<optab>\t%d0, %d1, #0"
7113   [(set_attr "type" "neon_compare, neon_compare_zero")]
7114 )
7115
7116 ;; cm(hs|hi)
7117
7118 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7119   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7120         (neg:<V_INT_EQUIV>
7121           (UCOMPARISONS:<V_INT_EQUIV>
7122             (match_operand:VDQ_I 1 "register_operand" "w")
7123             (match_operand:VDQ_I 2 "register_operand" "w")
7124           )))]
7125   "TARGET_SIMD"
7126   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7127   [(set_attr "type" "neon_compare<q>")]
7128 )
7129
7130 (define_insn_and_split "aarch64_cm<optab>di"
7131   [(set (match_operand:DI 0 "register_operand" "=w,r")
7132         (neg:DI
7133           (UCOMPARISONS:DI
7134             (match_operand:DI 1 "register_operand" "w,r")
7135             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
7136           )))
7137     (clobber (reg:CC CC_REGNUM))]
7138   "TARGET_SIMD"
7139   "#"
7140   "&& reload_completed"
7141   [(set (match_operand:DI 0 "register_operand")
7142         (neg:DI
7143           (UCOMPARISONS:DI
7144             (match_operand:DI 1 "register_operand")
7145             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7146           )))]
7147   {
7148     /* If we are in the general purpose register file,
7149        we split to a sequence of comparison and store.  */
7150     if (GP_REGNUM_P (REGNO (operands[0]))
7151         && GP_REGNUM_P (REGNO (operands[1])))
7152       {
7153         machine_mode mode = CCmode;
7154         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7155         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7156         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7157         DONE;
7158       }
7159     /* Otherwise, we expand to a similar pattern which does not
7160        clobber CC_REGNUM.  */
7161   }
7162   [(set_attr "type" "neon_compare,multiple")]
7163 )
7164
7165 (define_insn "*aarch64_cm<optab>di"
7166   [(set (match_operand:DI 0 "register_operand" "=w")
7167         (neg:DI
7168           (UCOMPARISONS:DI
7169             (match_operand:DI 1 "register_operand" "w")
7170             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
7171           )))]
7172   "TARGET_SIMD && reload_completed"
7173   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
7174   [(set_attr "type" "neon_compare")]
7175 )
7176
7177 ;; cmtst
7178
7179 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
7180 ;; we don't have any insns using ne, and aarch64_vcond outputs
7181 ;; not (neg (eq (and x y) 0))
7182 ;; which is rewritten by simplify_rtx as
7183 ;; plus (eq (and x y) 0) -1.
7184
7185 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7186   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7187         (plus:<V_INT_EQUIV>
7188           (eq:<V_INT_EQUIV>
7189             (and:VDQ_I
7190               (match_operand:VDQ_I 1 "register_operand" "w")
7191               (match_operand:VDQ_I 2 "register_operand" "w"))
7192             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
7193           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
7194   ]
7195   "TARGET_SIMD"
7196   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7197   [(set_attr "type" "neon_tst<q>")]
7198 )
7199
7200 ;; One can also get a cmtsts by having to combine a
7201 ;; not (neq (eq x 0)) in which case you rewrite it to
7202 ;; a comparison against itself
7203
7204 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7205   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7206         (plus:<V_INT_EQUIV>
7207           (eq:<V_INT_EQUIV>
7208             (match_operand:VDQ_I 1 "register_operand" "w")
7209             (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
7210           (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
7211   ]
7212   "TARGET_SIMD"
7213   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
7214   [(set_attr "type" "neon_tst<q>")]
7215 )
7216
7217 (define_insn_and_split "aarch64_cmtstdi"
7218   [(set (match_operand:DI 0 "register_operand" "=w,r")
7219         (neg:DI
7220           (ne:DI
7221             (and:DI
7222               (match_operand:DI 1 "register_operand" "w,r")
7223               (match_operand:DI 2 "register_operand" "w,r"))
7224             (const_int 0))))
7225     (clobber (reg:CC CC_REGNUM))]
7226   "TARGET_SIMD"
7227   "#"
7228   "&& reload_completed"
7229   [(set (match_operand:DI 0 "register_operand")
7230         (neg:DI
7231           (ne:DI
7232             (and:DI
7233               (match_operand:DI 1 "register_operand")
7234               (match_operand:DI 2 "register_operand"))
7235             (const_int 0))))]
7236   {
7237     /* If we are in the general purpose register file,
7238        we split to a sequence of comparison and store.  */
7239     if (GP_REGNUM_P (REGNO (operands[0]))
7240         && GP_REGNUM_P (REGNO (operands[1])))
7241       {
7242         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
7243         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7244         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7245         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7246         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7247         DONE;
7248       }
7249     /* Otherwise, we expand to a similar pattern which does not
7250        clobber CC_REGNUM.  */
7251   }
7252   [(set_attr "type" "neon_tst,multiple")]
7253 )
7254
7255 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7256   [(set (match_operand:DI 0 "register_operand" "=w")
7257         (neg:DI
7258           (ne:DI
7259             (and:DI
7260               (match_operand:DI 1 "register_operand" "w")
7261               (match_operand:DI 2 "register_operand" "w"))
7262             (const_int 0))))]
7263   "TARGET_SIMD"
7264   "cmtst\t%d0, %d1, %d2"
7265   [(set_attr "type" "neon_tst")]
7266 )
7267
7268 ;; fcm(eq|ge|gt|le|lt)
7269
7270 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7271   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
7272         (neg:<V_INT_EQUIV>
7273           (COMPARISONS:<V_INT_EQUIV>
7274             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
7275             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
7276           )))]
7277   "TARGET_SIMD"
7278   "@
7279   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7280   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
7281   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7282 )
7283
7284 ;; fac(ge|gt)
7285 ;; Note we can also handle what would be fac(le|lt) by
7286 ;; generating fac(ge|gt).
7287
7288 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7289   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7290         (neg:<V_INT_EQUIV>
7291           (FAC_COMPARISONS:<V_INT_EQUIV>
7292             (abs:VHSDF_HSDF
7293               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
7294             (abs:VHSDF_HSDF
7295               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
7296   )))]
7297   "TARGET_SIMD"
7298   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7299   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7300 )
7301
7302 ;; addp
7303
7304 ;; ADDP with two registers semantically concatenates them and performs
7305 ;; a pairwise addition on the result.  For 128-bit input modes represent this
7306 ;; as a concatentation of the pairwise addition results of the two input
7307 ;; registers.  This allow us to avoid using intermediate 256-bit modes.
7308 (define_insn "aarch64_addp<mode>_insn"
7309   [(set (match_operand:VQ_I 0 "register_operand" "=w")
7310         (vec_concat:VQ_I
7311           (plus:<VHALF>
7312             (vec_select:<VHALF>
7313               (match_operand:VQ_I 1 "register_operand" "w")
7314               (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
7315             (vec_select:<VHALF>
7316               (match_dup 1)
7317               (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
7318           (plus:<VHALF>
7319             (vec_select:<VHALF>
7320               (match_operand:VQ_I 2 "register_operand" "w")
7321               (match_dup 3))
7322             (vec_select:<VHALF>
7323               (match_dup 2)
7324               (match_dup 4)))))]
7325   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7326   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7327   [(set_attr "type" "neon_reduc_add<q>")]
7328 )
7329
7330 ;; For 64-bit input modes an ADDP is represented as a concatentation
7331 ;; of the input registers into an 128-bit register which is then fed
7332 ;; into a pairwise add.  That way we avoid having to create intermediate
7333 ;; 32-bit vector modes.
7334 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7335   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
7336         (plus:VD_BHSI
7337           (vec_select:VD_BHSI
7338             (vec_concat:<VDBL>
7339               (match_operand:VD_BHSI 1 "register_operand" "w")
7340               (match_operand:VD_BHSI 2 "register_operand" "w"))
7341             (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
7342           (vec_select:VD_BHSI
7343             (vec_concat:<VDBL>
7344               (match_dup 1)
7345               (match_dup 2))
7346             (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
7347   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7348   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7349   [(set_attr "type" "neon_reduc_add<q>")]
7350 )
7351
7352 ;; A common usecase of 64-bit ADDP is to have both operands come from the same
7353 ;; 128-bit vector and produce the pairwise addition results in the lower half.
7354 ;; Split into the 128-bit ADDP form and extract the low half.
7355 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7356   [(set (match_operand:<VHALF> 0 "register_operand" "=w")
7357         (plus:<VHALF>
7358           (vec_select:<VHALF>
7359             (match_operand:VQ_I 1 "register_operand" "w")
7360             (match_operand:VQ_I 2 "vect_par_cnst_even_or_odd_half"))
7361           (vec_select:<VHALF>
7362             (match_dup 1)
7363             (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))))]
7364   "TARGET_SIMD && !rtx_equal_p (operands[2], operands[3])"
7365   "#"
7366   "&& 1"
7367   [(const_int 0)]
7368   {
7369     rtx scratch;
7370     if (can_create_pseudo_p ())
7371       scratch = gen_reg_rtx (<MODE>mode);
7372     else
7373       scratch = lowpart_subreg (<MODE>mode, operands[0], <VHALF>mode);
7374
7375     emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[1], operands[1],
7376                                             operands[2], operands[3]));
7377     emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, scratch));
7378     DONE;
7379   }
7380 )
7381
7382 (define_expand "aarch64_addp<mode>"
7383   [(match_operand:VDQ_I 0 "register_operand")
7384    (match_operand:VDQ_I 1 "register_operand")
7385    (match_operand:VDQ_I 2 "register_operand")]
7386   "TARGET_SIMD"
7387   {
7388     int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7389     if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
7390       nunits /= 2;
7391     rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
7392     rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
7393     if (BYTES_BIG_ENDIAN)
7394       std::swap (operands[1], operands[2]);
7395     emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
7396                                             operands[2], par_even, par_odd));
7397     DONE;
7398   }
7399 )
7400
7401 ;; sqrt
7402
7403 (define_expand "sqrt<mode>2"
7404   [(set (match_operand:VHSDF 0 "register_operand")
7405         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
7406   "TARGET_SIMD"
7407 {
7408   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
7409     DONE;
7410 })
7411
7412 (define_insn "*sqrt<mode>2<vczle><vczbe>"
7413   [(set (match_operand:VHSDF 0 "register_operand" "=w")
7414         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
7415   "TARGET_SIMD"
7416   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
7417   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7418 )
7419
7420 ;; Patterns for vector struct loads and stores.
7421
7422 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7423   [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
7424         (unspec:VSTRUCT_2Q [
7425           (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
7426           UNSPEC_LD2))]
7427   "TARGET_SIMD"
7428   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7429   [(set_attr "type" "neon_load2_2reg<q>")]
7430 )
7431
7432 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
7433   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7434         (unspec:VSTRUCT_2QD [
7435           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7436           UNSPEC_LD2_DUP))]
7437   "TARGET_SIMD"
7438   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7439   [(set_attr "type" "neon_load2_all_lanes<q>")]
7440 )
7441
7442 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7443   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7444         (unspec:VSTRUCT_2QD [
7445                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7446                 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
7447                 (match_operand:SI 3 "immediate_operand" "i")]
7448                 UNSPEC_LD2_LANE))]
7449   "TARGET_SIMD"
7450   {
7451     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7452                                            INTVAL (operands[3]));
7453     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
7454   }
7455   [(set_attr "type" "neon_load2_one_lane")]
7456 )
7457
7458 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7459   [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
7460         (unspec:VSTRUCT_2Q [
7461                 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
7462                 UNSPEC_LD2))]
7463   "TARGET_SIMD"
7464 {
7465   if (BYTES_BIG_ENDIAN)
7466     {
7467       rtx tmp = gen_reg_rtx (<MODE>mode);
7468       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7469                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7470       emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
7471       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7472     }
7473   else
7474     emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
7475   DONE;
7476 })
7477
7478 (define_insn "aarch64_simd_st2<vstruct_elt>"
7479   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
7480         (unspec:VSTRUCT_2Q [
7481                 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
7482                 UNSPEC_ST2))]
7483   "TARGET_SIMD"
7484   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7485   [(set_attr "type" "neon_store2_2reg<q>")]
7486 )
7487
7488 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7489 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7490   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7491         (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
7492                      (match_operand:SI 2 "immediate_operand" "i")]
7493                      UNSPEC_ST2_LANE))]
7494   "TARGET_SIMD"
7495   {
7496     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7497                                            INTVAL (operands[2]));
7498     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
7499   }
7500   [(set_attr "type" "neon_store2_one_lane<q>")]
7501 )
7502
7503 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7504   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
7505         (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
7506                    UNSPEC_ST2))]
7507   "TARGET_SIMD"
7508 {
7509   if (BYTES_BIG_ENDIAN)
7510     {
7511       rtx tmp = gen_reg_rtx (<MODE>mode);
7512       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7513                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7514       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7515       emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
7516     }
7517   else
7518     emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
7519   DONE;
7520 })
7521
7522 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7523   [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
7524         (unspec:VSTRUCT_3Q [
7525           (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
7526           UNSPEC_LD3))]
7527   "TARGET_SIMD"
7528   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7529   [(set_attr "type" "neon_load3_3reg<q>")]
7530 )
7531
7532 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
7533   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7534         (unspec:VSTRUCT_3QD [
7535           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7536           UNSPEC_LD3_DUP))]
7537   "TARGET_SIMD"
7538   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7539   [(set_attr "type" "neon_load3_all_lanes<q>")]
7540 )
7541
7542 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7543   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7544         (unspec:VSTRUCT_3QD [
7545                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7546                 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
7547                 (match_operand:SI 3 "immediate_operand" "i")]
7548                 UNSPEC_LD3_LANE))]
7549   "TARGET_SIMD"
7550 {
7551     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7552                                            INTVAL (operands[3]));
7553     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
7554 }
7555   [(set_attr "type" "neon_load3_one_lane")]
7556 )
7557
7558 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7559   [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
7560         (unspec:VSTRUCT_3Q [
7561                 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
7562                 UNSPEC_LD3))]
7563   "TARGET_SIMD"
7564 {
7565   if (BYTES_BIG_ENDIAN)
7566     {
7567       rtx tmp = gen_reg_rtx (<MODE>mode);
7568       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7569                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7570       emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
7571       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7572     }
7573   else
7574     emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
7575   DONE;
7576 })
7577
7578 (define_insn "aarch64_simd_st3<vstruct_elt>"
7579   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
7580         (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
7581                    UNSPEC_ST3))]
7582   "TARGET_SIMD"
7583   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7584   [(set_attr "type" "neon_store3_3reg<q>")]
7585 )
7586
7587 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7588 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7589   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7590         (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
7591                      (match_operand:SI 2 "immediate_operand" "i")]
7592                      UNSPEC_ST3_LANE))]
7593   "TARGET_SIMD"
7594   {
7595     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7596                                            INTVAL (operands[2]));
7597     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
7598   }
7599   [(set_attr "type" "neon_store3_one_lane<q>")]
7600 )
7601
7602 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7603   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
7604         (unspec:VSTRUCT_3Q [
7605                 (match_operand:VSTRUCT_3Q 1 "register_operand")]
7606                 UNSPEC_ST3))]
7607   "TARGET_SIMD"
7608 {
7609   if (BYTES_BIG_ENDIAN)
7610     {
7611       rtx tmp = gen_reg_rtx (<MODE>mode);
7612       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7613                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7614       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7615       emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
7616     }
7617   else
7618     emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
7619   DONE;
7620 })
7621
7622 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7623   [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
7624         (unspec:VSTRUCT_4Q [
7625           (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
7626           UNSPEC_LD4))]
7627   "TARGET_SIMD"
7628   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7629   [(set_attr "type" "neon_load4_4reg<q>")]
7630 )
7631
7632 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
7633   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7634         (unspec:VSTRUCT_4QD [
7635           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7636           UNSPEC_LD4_DUP))]
7637   "TARGET_SIMD"
7638   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7639   [(set_attr "type" "neon_load4_all_lanes<q>")]
7640 )
7641
7642 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7643   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7644         (unspec:VSTRUCT_4QD [
7645                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7646                 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7647                 (match_operand:SI 3 "immediate_operand" "i")]
7648                 UNSPEC_LD4_LANE))]
7649   "TARGET_SIMD"
7650 {
7651     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7652                                            INTVAL (operands[3]));
7653     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7654 }
7655   [(set_attr "type" "neon_load4_one_lane")]
7656 )
7657
7658 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7659   [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7660         (unspec:VSTRUCT_4Q [
7661                 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7662                 UNSPEC_LD4))]
7663   "TARGET_SIMD"
7664 {
7665   if (BYTES_BIG_ENDIAN)
7666     {
7667       rtx tmp = gen_reg_rtx (<MODE>mode);
7668       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7669                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7670       emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7671       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7672     }
7673   else
7674     emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7675   DONE;
7676 })
7677
7678 (define_insn "aarch64_simd_st4<vstruct_elt>"
7679   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7680         (unspec:VSTRUCT_4Q [
7681                 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7682                 UNSPEC_ST4))]
7683   "TARGET_SIMD"
7684   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7685   [(set_attr "type" "neon_store4_4reg<q>")]
7686 )
7687
7688 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7689 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7690   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7691         (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7692                      (match_operand:SI 2 "immediate_operand" "i")]
7693                      UNSPEC_ST4_LANE))]
7694   "TARGET_SIMD"
7695   {
7696     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7697                                            INTVAL (operands[2]));
7698     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7699   }
7700   [(set_attr "type" "neon_store4_one_lane<q>")]
7701 )
7702
7703 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7704   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7705         (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7706                    UNSPEC_ST4))]
7707   "TARGET_SIMD"
7708 {
7709   if (BYTES_BIG_ENDIAN)
7710     {
7711       rtx tmp = gen_reg_rtx (<MODE>mode);
7712       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7713                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7714       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7715       emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7716     }
7717   else
7718     emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7719   DONE;
7720 })
7721
7722 (define_insn_and_split "aarch64_rev_reglist<mode>"
7723 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7724         (unspec:VSTRUCT_QD
7725                    [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7726                     (match_operand:V16QI 2 "register_operand" "w")]
7727                    UNSPEC_REV_REGLIST))]
7728   "TARGET_SIMD"
7729   "#"
7730   "&& reload_completed"
7731   [(const_int 0)]
7732 {
7733   int i;
7734   int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7735   for (i = 0; i < nregs; i++)
7736     {
7737       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7738       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7739       emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7740     }
7741   DONE;
7742 }
7743   [(set_attr "type" "neon_tbl1_q")
7744    (set_attr "length" "<insn_count>")]
7745 )
7746
7747 ;; Reload patterns for AdvSIMD register list operands.
7748
7749 (define_expand "mov<mode>"
7750   [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7751         (match_operand:VSTRUCT_QD 1 "general_operand"))]
7752   "TARGET_FLOAT"
7753 {
7754   if (can_create_pseudo_p ())
7755     {
7756       if (GET_CODE (operands[0]) != REG)
7757         operands[1] = force_reg (<MODE>mode, operands[1]);
7758     }
7759 })
7760
7761 (define_expand "mov<mode>"
7762   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7763         (match_operand:VSTRUCT 1 "general_operand"))]
7764   "TARGET_FLOAT"
7765 {
7766   if (can_create_pseudo_p ())
7767     {
7768       if (GET_CODE (operands[0]) != REG)
7769         operands[1] = force_reg (<MODE>mode, operands[1]);
7770     }
7771 })
7772
7773 (define_expand "movv8di"
7774   [(set (match_operand:V8DI 0 "nonimmediate_operand")
7775         (match_operand:V8DI 1 "general_operand"))]
7776   ""
7777 {
7778   if (can_create_pseudo_p () && MEM_P (operands[0]))
7779     operands[1] = force_reg (V8DImode, operands[1]);
7780 })
7781
7782 (define_expand "aarch64_ld1x3<vstruct_elt>"
7783   [(match_operand:VSTRUCT_3QD 0 "register_operand")
7784    (match_operand:DI 1 "register_operand")]
7785   "TARGET_SIMD"
7786 {
7787   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7788   emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7789   DONE;
7790 })
7791
7792 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7793   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7794         (unspec:VSTRUCT_3QD
7795           [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7796           UNSPEC_LD1))]
7797   "TARGET_SIMD"
7798   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7799   [(set_attr "type" "neon_load1_3reg<q>")]
7800 )
7801
7802 (define_expand "aarch64_ld1x4<vstruct_elt>"
7803   [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7804    (match_operand:DI 1 "register_operand" "r")]
7805   "TARGET_SIMD"
7806 {
7807   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7808   emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
7809   DONE;
7810 })
7811
7812 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7813   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7814         (unspec:VSTRUCT_4QD
7815           [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
7816         UNSPEC_LD1))]
7817   "TARGET_SIMD"
7818   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7819   [(set_attr "type" "neon_load1_4reg<q>")]
7820 )
7821
7822 (define_expand "aarch64_st1x2<vstruct_elt>"
7823   [(match_operand:DI 0 "register_operand")
7824    (match_operand:VSTRUCT_2QD 1 "register_operand")]
7825   "TARGET_SIMD"
7826 {
7827   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7828   emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
7829   DONE;
7830 })
7831
7832 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7833   [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7834         (unspec:VSTRUCT_2QD
7835                 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7836                 UNSPEC_ST1))]
7837   "TARGET_SIMD"
7838   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7839   [(set_attr "type" "neon_store1_2reg<q>")]
7840 )
7841
7842 (define_expand "aarch64_st1x3<vstruct_elt>"
7843   [(match_operand:DI 0 "register_operand")
7844    (match_operand:VSTRUCT_3QD 1 "register_operand")]
7845   "TARGET_SIMD"
7846 {
7847   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7848   emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
7849   DONE;
7850 })
7851
7852 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7853   [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7854         (unspec:VSTRUCT_3QD
7855                 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7856                 UNSPEC_ST1))]
7857   "TARGET_SIMD"
7858   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7859   [(set_attr "type" "neon_store1_3reg<q>")]
7860 )
7861
7862 (define_expand "aarch64_st1x4<vstruct_elt>"
7863   [(match_operand:DI 0 "register_operand" "")
7864    (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
7865   "TARGET_SIMD"
7866 {
7867   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7868   emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
7869   DONE;
7870 })
7871
7872 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7873   [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7874         (unspec:VSTRUCT_4QD
7875                 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7876                 UNSPEC_ST1))]
7877   "TARGET_SIMD"
7878   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7879   [(set_attr "type" "neon_store1_4reg<q>")]
7880 )
7881
7882 (define_insn "*aarch64_mov<mode>"
7883   [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7884         (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7885   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7886    && (register_operand (operands[0], <MODE>mode)
7887        || register_operand (operands[1], <MODE>mode))"
7888   "@
7889    #
7890    st1\\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7891    ld1\\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1"
7892   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7893                      neon_load<nregs>_<nregs>reg_q")
7894    (set_attr "length" "<insn_count>,4,4")]
7895 )
7896
7897 (define_insn "*aarch64_mov<mode>"
7898   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7899         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7900   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7901    && (register_operand (operands[0], <MODE>mode)
7902        || register_operand (operands[1], <MODE>mode))"
7903   "@
7904    #
7905    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
7906    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
7907   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7908                      neon_load<nregs>_<nregs>reg_q")
7909    (set_attr "length" "<insn_count>,4,4")]
7910 )
7911
7912 (define_insn "*aarch64_movv8di"
7913   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7914         (match_operand:V8DI 1 "general_operand" " r,r,m"))]
7915   "(register_operand (operands[0], V8DImode)
7916     || register_operand (operands[1], V8DImode))"
7917   "#"
7918   [(set_attr "type" "multiple,multiple,multiple")
7919    (set_attr "length" "32,16,16")]
7920 )
7921
7922 (define_insn "aarch64_be_ld1<mode>"
7923   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
7924         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7925                              "aarch64_simd_struct_operand" "Utv")]
7926         UNSPEC_LD1))]
7927   "TARGET_SIMD"
7928   "ld1\\t{%0<Vmtype>}, %1"
7929   [(set_attr "type" "neon_load1_1reg<q>")]
7930 )
7931
7932 (define_insn "aarch64_be_st1<mode>"
7933   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7934         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
7935         UNSPEC_ST1))]
7936   "TARGET_SIMD"
7937   "st1\\t{%1<Vmtype>}, %0"
7938   [(set_attr "type" "neon_store1_1reg<q>")]
7939 )
7940
7941 (define_insn "*aarch64_be_mov<mode>"
7942   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
7943         (match_operand:VSTRUCT_2D 1 "general_operand"      " w,w,m"))]
7944   "TARGET_FLOAT
7945    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7946    && (register_operand (operands[0], <MODE>mode)
7947        || register_operand (operands[1], <MODE>mode))"
7948   "@
7949    #
7950    stp\\t%d1, %R1, %0
7951    ldp\\t%d0, %R0, %1"
7952   [(set_attr "type" "multiple,neon_stp,neon_ldp")
7953    (set_attr "length" "8,4,4")]
7954 )
7955
7956 (define_insn "*aarch64_be_mov<mode>"
7957   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
7958         (match_operand:VSTRUCT_2Q 1 "general_operand"      " w,w,m"))]
7959   "TARGET_FLOAT
7960    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7961    && (register_operand (operands[0], <MODE>mode)
7962        || register_operand (operands[1], <MODE>mode))"
7963   "@
7964    #
7965    stp\\t%q1, %R1, %0
7966    ldp\\t%q0, %R0, %1"
7967   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7968    (set_attr "arch" "simd,*,*")
7969    (set_attr "length" "8,4,4")]
7970 )
7971
7972 (define_insn "*aarch64_be_movoi"
7973   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
7974         (match_operand:OI 1 "general_operand"      " w,w,m"))]
7975   "TARGET_FLOAT
7976    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7977    && (register_operand (operands[0], OImode)
7978        || register_operand (operands[1], OImode))"
7979   "@
7980    #
7981    stp\\t%q1, %R1, %0
7982    ldp\\t%q0, %R0, %1"
7983   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7984    (set_attr "arch" "simd,*,*")
7985    (set_attr "length" "8,4,4")]
7986 )
7987
7988 (define_insn "*aarch64_be_mov<mode>"
7989   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
7990         (match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
7991   "TARGET_FLOAT
7992    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7993    && (register_operand (operands[0], <MODE>mode)
7994        || register_operand (operands[1], <MODE>mode))"
7995   "#"
7996   [(set_attr "type" "multiple")
7997    (set_attr "arch" "fp<q>,*,*")
7998    (set_attr "length" "12,8,8")]
7999 )
8000
8001 (define_insn "*aarch64_be_movci"
8002   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
8003         (match_operand:CI 1 "general_operand"      " w,w,o"))]
8004   "TARGET_FLOAT
8005    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8006    && (register_operand (operands[0], CImode)
8007        || register_operand (operands[1], CImode))"
8008   "#"
8009   [(set_attr "type" "multiple")
8010    (set_attr "arch" "simd,*,*")
8011    (set_attr "length" "12,8,8")]
8012 )
8013
8014 (define_insn "*aarch64_be_mov<mode>"
8015   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
8016         (match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
8017   "TARGET_FLOAT
8018    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8019    && (register_operand (operands[0], <MODE>mode)
8020        || register_operand (operands[1], <MODE>mode))"
8021   "#"
8022   [(set_attr "type" "multiple")
8023    (set_attr "arch" "fp<q>,*,*")
8024    (set_attr "length" "16,8,8")]
8025 )
8026
8027 (define_insn "*aarch64_be_movxi"
8028   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
8029         (match_operand:XI 1 "general_operand"      " w,w,o"))]
8030   "TARGET_FLOAT
8031    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8032    && (register_operand (operands[0], XImode)
8033        || register_operand (operands[1], XImode))"
8034   "#"
8035   [(set_attr "type" "multiple")
8036    (set_attr "arch" "simd,*,*")
8037    (set_attr "length" "16,8,8")]
8038 )
8039
8040 (define_split
8041   [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
8042         (match_operand:VSTRUCT_2QD 1 "register_operand"))]
8043   "TARGET_FLOAT && reload_completed"
8044   [(const_int 0)]
8045 {
8046   aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
8047   DONE;
8048 })
8049
8050 (define_split
8051   [(set (match_operand:OI 0 "register_operand")
8052         (match_operand:OI 1 "register_operand"))]
8053   "TARGET_FLOAT && reload_completed"
8054   [(const_int 0)]
8055 {
8056   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
8057   DONE;
8058 })
8059
8060 (define_split
8061   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
8062         (match_operand:VSTRUCT_3QD 1 "general_operand"))]
8063   "TARGET_FLOAT && reload_completed"
8064   [(const_int 0)]
8065 {
8066   if (register_operand (operands[0], <MODE>mode)
8067       && register_operand (operands[1], <MODE>mode))
8068     {
8069       aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
8070       DONE;
8071     }
8072   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8073     {
8074       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8075       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8076       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8077                                            <MODE>mode, 0),
8078                       simplify_gen_subreg (pair_mode, operands[1],
8079                                            <MODE>mode, 0));
8080       emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8081                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8082                                                         operands[0],
8083                                                         <MODE>mode,
8084                                                         2 * elt_size)),
8085                       gen_lowpart (<VSTRUCT_ELT>mode,
8086                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8087                                                         operands[1],
8088                                                         <MODE>mode,
8089                                                         2 * elt_size)));
8090       DONE;
8091     }
8092   else
8093     FAIL;
8094 })
8095
8096 (define_split
8097   [(set (match_operand:CI 0 "nonimmediate_operand")
8098         (match_operand:CI 1 "general_operand"))]
8099   "TARGET_FLOAT && reload_completed"
8100   [(const_int 0)]
8101 {
8102   if (register_operand (operands[0], CImode)
8103       && register_operand (operands[1], CImode))
8104     {
8105       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
8106       DONE;
8107     }
8108   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8109     {
8110       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
8111                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
8112       emit_move_insn (gen_lowpart (V16QImode,
8113                                    simplify_gen_subreg (TImode, operands[0],
8114                                                         CImode, 32)),
8115                       gen_lowpart (V16QImode,
8116                                    simplify_gen_subreg (TImode, operands[1],
8117                                                         CImode, 32)));
8118       DONE;
8119     }
8120   else
8121     FAIL;
8122 })
8123
8124 (define_split
8125   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
8126         (match_operand:VSTRUCT_4QD 1 "general_operand"))]
8127   "TARGET_FLOAT && reload_completed"
8128   [(const_int 0)]
8129 {
8130   if (register_operand (operands[0], <MODE>mode)
8131       && register_operand (operands[1], <MODE>mode))
8132     {
8133       aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
8134       DONE;
8135     }
8136   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8137     {
8138       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8139       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8140       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8141                                            <MODE>mode, 0),
8142                       simplify_gen_subreg (pair_mode, operands[1],
8143                                            <MODE>mode, 0));
8144       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8145                                            <MODE>mode, 2 * elt_size),
8146                       simplify_gen_subreg (pair_mode, operands[1],
8147                                            <MODE>mode, 2 * elt_size));
8148       DONE;
8149     }
8150   else
8151     FAIL;
8152 })
8153
8154 (define_split
8155   [(set (match_operand:XI 0 "nonimmediate_operand")
8156         (match_operand:XI 1 "general_operand"))]
8157   "TARGET_FLOAT && reload_completed"
8158   [(const_int 0)]
8159 {
8160   if (register_operand (operands[0], XImode)
8161       && register_operand (operands[1], XImode))
8162     {
8163       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
8164       DONE;
8165     }
8166   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8167     {
8168       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
8169                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
8170       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
8171                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
8172       DONE;
8173     }
8174   else
8175     FAIL;
8176 })
8177
8178 (define_split
8179   [(set (match_operand:V8DI 0 "nonimmediate_operand")
8180         (match_operand:V8DI 1 "general_operand"))]
8181   "reload_completed"
8182   [(const_int 0)]
8183 {
8184   if (register_operand (operands[0], V8DImode)
8185       && register_operand (operands[1], V8DImode))
8186     {
8187       aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
8188       DONE;
8189     }
8190   else if ((register_operand (operands[0], V8DImode)
8191             && memory_operand (operands[1], V8DImode))
8192            || (memory_operand (operands[0], V8DImode)
8193                && register_operand (operands[1], V8DImode)))
8194     {
8195       for (int offset = 0; offset < 64; offset += 16)
8196         emit_move_insn (simplify_gen_subreg (TImode, operands[0],
8197                                              V8DImode, offset),
8198                         simplify_gen_subreg (TImode, operands[1],
8199                                              V8DImode, offset));
8200       DONE;
8201     }
8202   else
8203     FAIL;
8204 })
8205
8206 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8207   [(match_operand:VSTRUCT_QD 0 "register_operand")
8208    (match_operand:DI 1 "register_operand")]
8209   "TARGET_SIMD"
8210 {
8211   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8212   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8213
8214   emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
8215   DONE;
8216 })
8217
8218 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8219   [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
8220         (unspec:VSTRUCT_2DNX [
8221           (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
8222           UNSPEC_LD2_DREG))]
8223   "TARGET_SIMD"
8224   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8225   [(set_attr "type" "neon_load2_2reg<q>")]
8226 )
8227
8228 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8229   [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
8230         (unspec:VSTRUCT_2DX [
8231           (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
8232           UNSPEC_LD2_DREG))]
8233   "TARGET_SIMD"
8234   "ld1\\t{%S0.1d - %T0.1d}, %1"
8235   [(set_attr "type" "neon_load1_2reg<q>")]
8236 )
8237
8238 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8239   [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
8240         (unspec:VSTRUCT_3DNX [
8241           (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
8242           UNSPEC_LD3_DREG))]
8243   "TARGET_SIMD"
8244   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8245   [(set_attr "type" "neon_load3_3reg<q>")]
8246 )
8247
8248 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8249   [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
8250         (unspec:VSTRUCT_3DX [
8251           (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
8252           UNSPEC_LD3_DREG))]
8253   "TARGET_SIMD"
8254   "ld1\\t{%S0.1d - %U0.1d}, %1"
8255   [(set_attr "type" "neon_load1_3reg<q>")]
8256 )
8257
8258 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8259   [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
8260         (unspec:VSTRUCT_4DNX [
8261           (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
8262           UNSPEC_LD4_DREG))]
8263   "TARGET_SIMD"
8264   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8265   [(set_attr "type" "neon_load4_4reg<q>")]
8266 )
8267
8268 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8269   [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
8270         (unspec:VSTRUCT_4DX [
8271           (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
8272           UNSPEC_LD4_DREG))]
8273   "TARGET_SIMD"
8274   "ld1\\t{%S0.1d - %V0.1d}, %1"
8275   [(set_attr "type" "neon_load1_4reg<q>")]
8276 )
8277
8278 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8279  [(match_operand:VSTRUCT_D 0 "register_operand")
8280   (match_operand:DI 1 "register_operand")]
8281   "TARGET_SIMD"
8282 {
8283   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8284   emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
8285   DONE;
8286 })
8287
8288 (define_expand "aarch64_ld1<VALL_F16:mode>"
8289  [(match_operand:VALL_F16 0 "register_operand")
8290   (match_operand:DI 1 "register_operand")]
8291   "TARGET_SIMD"
8292 {
8293   machine_mode mode = <VALL_F16:MODE>mode;
8294   rtx mem = gen_rtx_MEM (mode, operands[1]);
8295
8296   if (BYTES_BIG_ENDIAN)
8297     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
8298   else
8299     emit_move_insn (operands[0], mem);
8300   DONE;
8301 })
8302
8303 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8304  [(match_operand:VSTRUCT_Q 0 "register_operand")
8305   (match_operand:DI 1 "register_operand")]
8306   "TARGET_SIMD"
8307 {
8308   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8309   emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
8310   DONE;
8311 })
8312
8313 (define_expand "aarch64_ld1x2<vstruct_elt>"
8314  [(match_operand:VSTRUCT_2QD 0 "register_operand")
8315   (match_operand:DI 1 "register_operand")]
8316   "TARGET_SIMD"
8317 {
8318   machine_mode mode = <MODE>mode;
8319   rtx mem = gen_rtx_MEM (mode, operands[1]);
8320
8321   emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
8322   DONE;
8323 })
8324
8325 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
8326   [(match_operand:VSTRUCT_QD 0 "register_operand")
8327         (match_operand:DI 1 "register_operand")
8328         (match_operand:VSTRUCT_QD 2 "register_operand")
8329         (match_operand:SI 3 "immediate_operand")]
8330   "TARGET_SIMD"
8331 {
8332   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8333   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8334
8335   aarch64_simd_lane_bounds (operands[3], 0,
8336                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8337   emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
8338                                 mem, operands[2], operands[3]));
8339   DONE;
8340 })
8341
8342 ;; Permuted-store expanders for neon intrinsics.
8343
8344 ;; Permute instructions
8345
8346 ;; vec_perm support
8347
8348 (define_expand "vec_perm<mode>"
8349   [(match_operand:VB 0 "register_operand")
8350    (match_operand:VB 1 "register_operand")
8351    (match_operand:VB 2 "register_operand")
8352    (match_operand:VB 3 "register_operand")]
8353   "TARGET_SIMD"
8354 {
8355   aarch64_expand_vec_perm (operands[0], operands[1],
8356                            operands[2], operands[3], <nunits>);
8357   DONE;
8358 })
8359
8360 (define_insn "aarch64_qtbl1<mode>"
8361   [(set (match_operand:VB 0 "register_operand" "=w")
8362         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
8363                     (match_operand:VB 2 "register_operand" "w")]
8364                    UNSPEC_TBL))]
8365   "TARGET_SIMD"
8366   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
8367   [(set_attr "type" "neon_tbl1<q>")]
8368 )
8369
8370 (define_insn "aarch64_qtbx1<mode>"
8371   [(set (match_operand:VB 0 "register_operand" "=w")
8372         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8373                     (match_operand:V16QI 2 "register_operand" "w")
8374                     (match_operand:VB 3 "register_operand" "w")]
8375                    UNSPEC_TBX))]
8376   "TARGET_SIMD"
8377   "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
8378   [(set_attr "type" "neon_tbl1<q>")]
8379 )
8380
8381 ;; Two source registers.
8382
8383 (define_insn "aarch64_qtbl2<mode>"
8384   [(set (match_operand:VB 0 "register_operand" "=w")
8385         (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
8386                       (match_operand:VB 2 "register_operand" "w")]
8387                       UNSPEC_TBL))]
8388   "TARGET_SIMD"
8389   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8390   [(set_attr "type" "neon_tbl2")]
8391 )
8392
8393 (define_insn "aarch64_qtbx2<mode>"
8394   [(set (match_operand:VB 0 "register_operand" "=w")
8395         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8396                       (match_operand:V2x16QI 2 "register_operand" "w")
8397                       (match_operand:VB 3 "register_operand" "w")]
8398                       UNSPEC_TBX))]
8399   "TARGET_SIMD"
8400   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8401   [(set_attr "type" "neon_tbl2")]
8402 )
8403
8404 ;; Three source registers.
8405
8406 (define_insn "aarch64_qtbl3<mode>"
8407   [(set (match_operand:VB 0 "register_operand" "=w")
8408         (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
8409                       (match_operand:VB 2 "register_operand" "w")]
8410                       UNSPEC_TBL))]
8411   "TARGET_SIMD"
8412   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8413   [(set_attr "type" "neon_tbl3")]
8414 )
8415
8416 (define_insn "aarch64_qtbx3<mode>"
8417   [(set (match_operand:VB 0 "register_operand" "=w")
8418         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8419                       (match_operand:V3x16QI 2 "register_operand" "w")
8420                       (match_operand:VB 3 "register_operand" "w")]
8421                       UNSPEC_TBX))]
8422   "TARGET_SIMD"
8423   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8424   [(set_attr "type" "neon_tbl3")]
8425 )
8426
8427 ;; Four source registers.
8428
8429 (define_insn "aarch64_qtbl4<mode>"
8430   [(set (match_operand:VB 0 "register_operand" "=w")
8431         (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
8432                       (match_operand:VB 2 "register_operand" "w")]
8433                       UNSPEC_TBL))]
8434   "TARGET_SIMD"
8435   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8436   [(set_attr "type" "neon_tbl4")]
8437 )
8438
8439 (define_insn "aarch64_qtbx4<mode>"
8440   [(set (match_operand:VB 0 "register_operand" "=w")
8441         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8442                       (match_operand:V4x16QI 2 "register_operand" "w")
8443                       (match_operand:VB 3 "register_operand" "w")]
8444                       UNSPEC_TBX))]
8445   "TARGET_SIMD"
8446   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8447   [(set_attr "type" "neon_tbl4")]
8448 )
8449
8450 (define_insn_and_split "aarch64_combinev16qi"
8451   [(set (match_operand:V2x16QI 0 "register_operand" "=w")
8452         (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
8453                          (match_operand:V16QI 2 "register_operand" "w")]
8454                         UNSPEC_CONCAT))]
8455   "TARGET_SIMD"
8456   "#"
8457   "&& reload_completed"
8458   [(const_int 0)]
8459 {
8460   aarch64_split_combinev16qi (operands);
8461   DONE;
8462 }
8463 [(set_attr "type" "multiple")]
8464 )
8465
8466 ;; This instruction's pattern is generated directly by
8467 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8468 ;; need corresponding changes there.
8469 (define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8470   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8471         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8472                           (match_operand:VALL_F16 2 "register_operand" "w")]
8473          PERMUTE))]
8474   "TARGET_SIMD"
8475   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8476   [(set_attr "type" "neon_permute<q>")]
8477 )
8478
8479 ;; This instruction's pattern is generated directly by
8480 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8481 ;; need corresponding changes there.  Note that the immediate (third)
8482 ;; operand is a lane index not a byte index.
8483 (define_insn "aarch64_ext<mode>"
8484   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8485         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8486                           (match_operand:VALL_F16 2 "register_operand" "w")
8487                           (match_operand:SI 3 "immediate_operand" "i")]
8488          UNSPEC_EXT))]
8489   "TARGET_SIMD"
8490 {
8491   operands[3] = GEN_INT (INTVAL (operands[3])
8492       * GET_MODE_UNIT_SIZE (<MODE>mode));
8493   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
8494 }
8495   [(set_attr "type" "neon_ext<q>")]
8496 )
8497
8498 ;; This instruction's pattern is generated directly by
8499 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8500 ;; need corresponding changes there.
8501 (define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8502   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8503         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
8504                     REVERSE))]
8505   "TARGET_SIMD"
8506   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
8507   [(set_attr "type" "neon_rev<q>")]
8508 )
8509
8510 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8511   [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
8512         (unspec:VSTRUCT_2DNX [
8513                 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
8514                 UNSPEC_ST2))]
8515   "TARGET_SIMD"
8516   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8517   [(set_attr "type" "neon_store2_2reg")]
8518 )
8519
8520 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8521   [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
8522         (unspec:VSTRUCT_2DX [
8523                 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
8524                 UNSPEC_ST2))]
8525   "TARGET_SIMD"
8526   "st1\\t{%S1.1d - %T1.1d}, %0"
8527   [(set_attr "type" "neon_store1_2reg")]
8528 )
8529
8530 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8531   [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
8532         (unspec:VSTRUCT_3DNX [
8533                 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
8534                 UNSPEC_ST3))]
8535   "TARGET_SIMD"
8536   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8537   [(set_attr "type" "neon_store3_3reg")]
8538 )
8539
8540 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8541   [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
8542         (unspec:VSTRUCT_3DX [
8543                 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
8544                 UNSPEC_ST3))]
8545   "TARGET_SIMD"
8546   "st1\\t{%S1.1d - %U1.1d}, %0"
8547   [(set_attr "type" "neon_store1_3reg")]
8548 )
8549
8550 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8551   [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
8552         (unspec:VSTRUCT_4DNX [
8553                 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
8554                 UNSPEC_ST4))]
8555   "TARGET_SIMD"
8556   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8557   [(set_attr "type" "neon_store4_4reg")]
8558 )
8559
8560 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8561   [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
8562         (unspec:VSTRUCT_4DX [
8563                 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
8564                 UNSPEC_ST4))]
8565   "TARGET_SIMD"
8566   "st1\\t{%S1.1d - %V1.1d}, %0"
8567   [(set_attr "type" "neon_store1_4reg")]
8568 )
8569
8570 (define_expand "aarch64_st<nregs><vstruct_elt>"
8571  [(match_operand:DI 0 "register_operand")
8572   (match_operand:VSTRUCT_D 1 "register_operand")]
8573   "TARGET_SIMD"
8574 {
8575   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8576   emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
8577   DONE;
8578 })
8579
8580 (define_expand "aarch64_st<nregs><vstruct_elt>"
8581  [(match_operand:DI 0 "register_operand")
8582   (match_operand:VSTRUCT_Q 1 "register_operand")]
8583   "TARGET_SIMD"
8584 {
8585   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8586   emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
8587   DONE;
8588 })
8589
8590 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
8591  [(match_operand:DI 0 "register_operand")
8592   (match_operand:VSTRUCT_QD 1 "register_operand")
8593   (match_operand:SI 2 "immediate_operand")]
8594   "TARGET_SIMD"
8595 {
8596   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
8597   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8598
8599   aarch64_simd_lane_bounds (operands[2], 0,
8600                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8601   emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8602                                         operands[1], operands[2]));
8603   DONE;
8604 })
8605
8606 (define_expand "aarch64_st1<VALL_F16:mode>"
8607  [(match_operand:DI 0 "register_operand")
8608   (match_operand:VALL_F16 1 "register_operand")]
8609   "TARGET_SIMD"
8610 {
8611   machine_mode mode = <VALL_F16:MODE>mode;
8612   rtx mem = gen_rtx_MEM (mode, operands[0]);
8613
8614   if (BYTES_BIG_ENDIAN)
8615     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
8616   else
8617     emit_move_insn (mem, operands[1]);
8618   DONE;
8619 })
8620
8621 ;; Standard pattern name vec_init<mode><Vel>.
8622
8623 (define_expand "vec_init<mode><Vel>"
8624   [(match_operand:VALL_F16 0 "register_operand")
8625    (match_operand 1 "" "")]
8626   "TARGET_SIMD"
8627 {
8628   aarch64_expand_vector_init (operands[0], operands[1]);
8629   DONE;
8630 })
8631
8632 (define_expand "vec_init<mode><Vhalf>"
8633   [(match_operand:VQ_NO2E 0 "register_operand")
8634    (match_operand 1 "" "")]
8635   "TARGET_SIMD"
8636 {
8637   aarch64_expand_vector_init (operands[0], operands[1]);
8638   DONE;
8639 })
8640
8641 (define_insn "*aarch64_simd_ld1r<mode>"
8642   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8643         (vec_duplicate:VALL_F16
8644           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
8645   "TARGET_SIMD"
8646   "ld1r\\t{%0.<Vtype>}, %1"
8647   [(set_attr "type" "neon_load1_all_lanes")]
8648 )
8649
8650 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8651   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
8652         (unspec:VSTRUCT_2QD [
8653             (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
8654             UNSPEC_LD1))]
8655   "TARGET_SIMD"
8656   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8657   [(set_attr "type" "neon_load1_2reg<q>")]
8658 )
8659
8660
8661 (define_insn "@aarch64_frecpe<mode>"
8662   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8663         (unspec:VHSDF_HSDF
8664          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8665          UNSPEC_FRECPE))]
8666   "TARGET_SIMD"
8667   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8668   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8669 )
8670
8671 (define_insn "aarch64_frecpx<mode>"
8672   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8673         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8674          UNSPEC_FRECPX))]
8675   "TARGET_SIMD"
8676   "frecpx\t%<s>0, %<s>1"
8677   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8678 )
8679
8680 (define_insn "@aarch64_frecps<mode>"
8681   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8682         (unspec:VHSDF_HSDF
8683           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8684           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8685           UNSPEC_FRECPS))]
8686   "TARGET_SIMD"
8687   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8688   [(set_attr "type" "neon_fp_recps_<stype><q>")]
8689 )
8690
8691 (define_insn "aarch64_urecpe<mode>"
8692   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8693         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8694                 UNSPEC_URECPE))]
8695  "TARGET_SIMD"
8696  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8697   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8698
8699 ;; Standard pattern name vec_extract<mode><Vel>.
8700
8701 (define_expand "vec_extract<mode><Vel>"
8702   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8703    (match_operand:VALL_F16 1 "register_operand")
8704    (match_operand:SI 2 "immediate_operand")]
8705   "TARGET_SIMD"
8706 {
8707     emit_insn
8708       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8709     DONE;
8710 })
8711
8712 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8713 (define_expand "vec_extract<mode><Vhalf>"
8714   [(match_operand:<VHALF> 0 "register_operand")
8715    (match_operand:VQMOV_NO2E 1 "register_operand")
8716    (match_operand 2 "immediate_operand")]
8717   "TARGET_SIMD"
8718 {
8719   int start = INTVAL (operands[2]);
8720   gcc_assert (start == 0 || start == 1);
8721   start *= <nunits> / 2;
8722   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8723   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8724   DONE;
8725 })
8726
8727 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8728 (define_expand "vec_extract<mode><V1half>"
8729   [(match_operand:<V1HALF> 0 "register_operand")
8730    (match_operand:VQ_2E 1 "register_operand")
8731    (match_operand 2 "immediate_operand")]
8732   "TARGET_SIMD"
8733 {
8734   /* V1DI and V1DF are rarely used by other patterns, so it should be better
8735      to hide it in a subreg destination of a normal DI or DF op.  */
8736   rtx scalar0 = gen_lowpart (<VHALF>mode, operands[0]);
8737   emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[1], operands[2]));
8738   DONE;
8739 })
8740
8741 ;; aes
8742
8743 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8744   [(set (match_operand:V16QI 0 "register_operand" "=w")
8745         (unspec:V16QI
8746                 [(xor:V16QI
8747                  (match_operand:V16QI 1 "register_operand" "%0")
8748                  (match_operand:V16QI 2 "register_operand" "w"))]
8749          CRYPTO_AES))]
8750   "TARGET_AES"
8751   "aes<aes_op>\\t%0.16b, %2.16b"
8752   [(set_attr "type" "crypto_aese")]
8753 )
8754
8755 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8756   [(set (match_operand:V16QI 0 "register_operand" "=w")
8757         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8758          CRYPTO_AESMC))]
8759   "TARGET_AES"
8760   "aes<aesmc_op>\\t%0.16b, %1.16b"
8761   [(set_attr "type" "crypto_aesmc")]
8762 )
8763
8764 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8765 ;; and enforce the register dependency without scheduling or register
8766 ;; allocation messing up the order or introducing moves inbetween.
8767 ;;  Mash the two together during combine.
8768
8769 (define_insn "*aarch64_crypto_aese_fused"
8770   [(set (match_operand:V16QI 0 "register_operand" "=w")
8771         (unspec:V16QI
8772           [(unspec:V16QI
8773            [(xor:V16QI
8774                 (match_operand:V16QI 1 "register_operand" "%0")
8775                 (match_operand:V16QI 2 "register_operand" "w"))]
8776              UNSPEC_AESE)]
8777         UNSPEC_AESMC))]
8778   "TARGET_AES
8779    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8780   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8781   [(set_attr "type" "crypto_aese")
8782    (set_attr "length" "8")]
8783 )
8784
8785 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8786 ;; and enforce the register dependency without scheduling or register
8787 ;; allocation messing up the order or introducing moves inbetween.
8788 ;;  Mash the two together during combine.
8789
8790 (define_insn "*aarch64_crypto_aesd_fused"
8791   [(set (match_operand:V16QI 0 "register_operand" "=w")
8792         (unspec:V16QI
8793           [(unspec:V16QI
8794                     [(xor:V16QI
8795                         (match_operand:V16QI 1 "register_operand" "%0")
8796                         (match_operand:V16QI 2 "register_operand" "w"))]
8797                 UNSPEC_AESD)]
8798           UNSPEC_AESIMC))]
8799   "TARGET_AES
8800    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8801   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8802   [(set_attr "type" "crypto_aese")
8803    (set_attr "length" "8")]
8804 )
8805
8806 ;; sha1
8807
8808 (define_insn "aarch64_crypto_sha1hsi"
8809   [(set (match_operand:SI 0 "register_operand" "=w")
8810         (unspec:SI [(match_operand:SI 1
8811                        "register_operand" "w")]
8812          UNSPEC_SHA1H))]
8813   "TARGET_SHA2"
8814   "sha1h\\t%s0, %s1"
8815   [(set_attr "type" "crypto_sha1_fast")]
8816 )
8817
8818 (define_insn "aarch64_crypto_sha1hv4si"
8819   [(set (match_operand:SI 0 "register_operand" "=w")
8820         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8821                      (parallel [(const_int 0)]))]
8822          UNSPEC_SHA1H))]
8823   "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8824   "sha1h\\t%s0, %s1"
8825   [(set_attr "type" "crypto_sha1_fast")]
8826 )
8827
8828 (define_insn "aarch64_be_crypto_sha1hv4si"
8829   [(set (match_operand:SI 0 "register_operand" "=w")
8830         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8831                      (parallel [(const_int 3)]))]
8832          UNSPEC_SHA1H))]
8833   "TARGET_SHA2 && BYTES_BIG_ENDIAN"
8834   "sha1h\\t%s0, %s1"
8835   [(set_attr "type" "crypto_sha1_fast")]
8836 )
8837
8838 (define_insn "aarch64_crypto_sha1su1v4si"
8839   [(set (match_operand:V4SI 0 "register_operand" "=w")
8840         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8841                       (match_operand:V4SI 2 "register_operand" "w")]
8842          UNSPEC_SHA1SU1))]
8843   "TARGET_SHA2"
8844   "sha1su1\\t%0.4s, %2.4s"
8845   [(set_attr "type" "crypto_sha1_fast")]
8846 )
8847
8848 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8849   [(set (match_operand:V4SI 0 "register_operand" "=w")
8850         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8851                       (match_operand:SI 2 "register_operand" "w")
8852                       (match_operand:V4SI 3 "register_operand" "w")]
8853          CRYPTO_SHA1))]
8854   "TARGET_SHA2"
8855   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8856   [(set_attr "type" "crypto_sha1_slow")]
8857 )
8858
8859 (define_insn "aarch64_crypto_sha1su0v4si"
8860   [(set (match_operand:V4SI 0 "register_operand" "=w")
8861         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8862                       (match_operand:V4SI 2 "register_operand" "w")
8863                       (match_operand:V4SI 3 "register_operand" "w")]
8864          UNSPEC_SHA1SU0))]
8865   "TARGET_SHA2"
8866   "sha1su0\\t%0.4s, %2.4s, %3.4s"
8867   [(set_attr "type" "crypto_sha1_xor")]
8868 )
8869
8870 ;; sha256
8871
8872 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8873   [(set (match_operand:V4SI 0 "register_operand" "=w")
8874         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8875                       (match_operand:V4SI 2 "register_operand" "w")
8876                       (match_operand:V4SI 3 "register_operand" "w")]
8877          CRYPTO_SHA256))]
8878   "TARGET_SHA2"
8879   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8880   [(set_attr "type" "crypto_sha256_slow")]
8881 )
8882
8883 (define_insn "aarch64_crypto_sha256su0v4si"
8884   [(set (match_operand:V4SI 0 "register_operand" "=w")
8885         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8886                       (match_operand:V4SI 2 "register_operand" "w")]
8887          UNSPEC_SHA256SU0))]
8888   "TARGET_SHA2"
8889   "sha256su0\\t%0.4s, %2.4s"
8890   [(set_attr "type" "crypto_sha256_fast")]
8891 )
8892
8893 (define_insn "aarch64_crypto_sha256su1v4si"
8894   [(set (match_operand:V4SI 0 "register_operand" "=w")
8895         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8896                       (match_operand:V4SI 2 "register_operand" "w")
8897                       (match_operand:V4SI 3 "register_operand" "w")]
8898          UNSPEC_SHA256SU1))]
8899   "TARGET_SHA2"
8900   "sha256su1\\t%0.4s, %2.4s, %3.4s"
8901   [(set_attr "type" "crypto_sha256_slow")]
8902 )
8903
8904 ;; sha512
8905
8906 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8907   [(set (match_operand:V2DI 0 "register_operand" "=w")
8908         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8909                       (match_operand:V2DI 2 "register_operand" "w")
8910                       (match_operand:V2DI 3 "register_operand" "w")]
8911          CRYPTO_SHA512))]
8912   "TARGET_SHA3"
8913   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8914   [(set_attr "type" "crypto_sha512")]
8915 )
8916
8917 (define_insn "aarch64_crypto_sha512su0qv2di"
8918   [(set (match_operand:V2DI 0 "register_operand" "=w")
8919         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8920                       (match_operand:V2DI 2 "register_operand" "w")]
8921          UNSPEC_SHA512SU0))]
8922   "TARGET_SHA3"
8923   "sha512su0\\t%0.2d, %2.2d"
8924   [(set_attr "type" "crypto_sha512")]
8925 )
8926
8927 (define_insn "aarch64_crypto_sha512su1qv2di"
8928   [(set (match_operand:V2DI 0 "register_operand" "=w")
8929         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8930                       (match_operand:V2DI 2 "register_operand" "w")
8931                       (match_operand:V2DI 3 "register_operand" "w")]
8932          UNSPEC_SHA512SU1))]
8933   "TARGET_SHA3"
8934   "sha512su1\\t%0.2d, %2.2d, %3.2d"
8935   [(set_attr "type" "crypto_sha512")]
8936 )
8937
8938 ;; sha3
8939
8940 (define_insn "eor3q<mode>4"
8941   [(set (match_operand:VQ_I 0 "register_operand" "=w")
8942         (xor:VQ_I
8943          (xor:VQ_I
8944           (match_operand:VQ_I 2 "register_operand" "w")
8945           (match_operand:VQ_I 3 "register_operand" "w"))
8946          (match_operand:VQ_I 1 "register_operand" "w")))]
8947   "TARGET_SHA3"
8948   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
8949   [(set_attr "type" "crypto_sha3")]
8950 )
8951
8952 (define_insn "aarch64_rax1qv2di"
8953   [(set (match_operand:V2DI 0 "register_operand" "=w")
8954         (xor:V2DI
8955          (rotate:V2DI
8956           (match_operand:V2DI 2 "register_operand" "w")
8957           (const_int 1))
8958          (match_operand:V2DI 1 "register_operand" "w")))]
8959   "TARGET_SHA3"
8960   "rax1\\t%0.2d, %1.2d, %2.2d"
8961   [(set_attr "type" "crypto_sha3")]
8962 )
8963
8964 (define_insn "aarch64_xarqv2di"
8965   [(set (match_operand:V2DI 0 "register_operand" "=w")
8966         (rotatert:V2DI
8967          (xor:V2DI
8968           (match_operand:V2DI 1 "register_operand" "%w")
8969           (match_operand:V2DI 2 "register_operand" "w"))
8970          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
8971   "TARGET_SHA3"
8972   "xar\\t%0.2d, %1.2d, %2.2d, %3"
8973   [(set_attr "type" "crypto_sha3")]
8974 )
8975
8976 (define_insn "bcaxq<mode>4"
8977   [(set (match_operand:VQ_I 0 "register_operand" "=w")
8978         (xor:VQ_I
8979          (and:VQ_I
8980           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
8981           (match_operand:VQ_I 2 "register_operand" "w"))
8982          (match_operand:VQ_I 1 "register_operand" "w")))]
8983   "TARGET_SHA3"
8984   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
8985   [(set_attr "type" "crypto_sha3")]
8986 )
8987
8988 ;; SM3
8989
8990 (define_insn "aarch64_sm3ss1qv4si"
8991   [(set (match_operand:V4SI 0 "register_operand" "=w")
8992         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8993                       (match_operand:V4SI 2 "register_operand" "w")
8994                       (match_operand:V4SI 3 "register_operand" "w")]
8995          UNSPEC_SM3SS1))]
8996   "TARGET_SM4"
8997   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
8998   [(set_attr "type" "crypto_sm3")]
8999 )
9000
9001
9002 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9003   [(set (match_operand:V4SI 0 "register_operand" "=w")
9004         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9005                       (match_operand:V4SI 2 "register_operand" "w")
9006                       (match_operand:V4SI 3 "register_operand" "w")
9007                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
9008          CRYPTO_SM3TT))]
9009   "TARGET_SM4"
9010   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
9011   [(set_attr "type" "crypto_sm3")]
9012 )
9013
9014 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9015   [(set (match_operand:V4SI 0 "register_operand" "=w")
9016         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9017                       (match_operand:V4SI 2 "register_operand" "w")
9018                       (match_operand:V4SI 3 "register_operand" "w")]
9019          CRYPTO_SM3PART))]
9020   "TARGET_SM4"
9021   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
9022   [(set_attr "type" "crypto_sm3")]
9023 )
9024
9025 ;; SM4
9026
9027 (define_insn "aarch64_sm4eqv4si"
9028   [(set (match_operand:V4SI 0 "register_operand" "=w")
9029         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9030                       (match_operand:V4SI 2 "register_operand" "w")]
9031          UNSPEC_SM4E))]
9032   "TARGET_SM4"
9033   "sm4e\\t%0.4s, %2.4s"
9034   [(set_attr "type" "crypto_sm4")]
9035 )
9036
9037 (define_insn "aarch64_sm4ekeyqv4si"
9038   [(set (match_operand:V4SI 0 "register_operand" "=w")
9039         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9040                       (match_operand:V4SI 2 "register_operand" "w")]
9041          UNSPEC_SM4EKEY))]
9042   "TARGET_SM4"
9043   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
9044   [(set_attr "type" "crypto_sm4")]
9045 )
9046
9047 ;; fp16fml
9048
9049 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9050   [(set (match_operand:VDQSF 0 "register_operand")
9051         (unspec:VDQSF
9052          [(match_operand:VDQSF 1 "register_operand")
9053           (match_operand:<VFMLA_W> 2 "register_operand")
9054           (match_operand:<VFMLA_W> 3 "register_operand")]
9055          VFMLA16_LOW))]
9056   "TARGET_F16FML"
9057 {
9058   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9059                                             <nunits> * 2, false);
9060   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9061                                             <nunits> * 2, false);
9062
9063   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
9064                                                                 operands[1],
9065                                                                 operands[2],
9066                                                                 operands[3],
9067                                                                 p1, p2));
9068   DONE;
9069
9070 })
9071
9072 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9073   [(set (match_operand:VDQSF 0 "register_operand")
9074         (unspec:VDQSF
9075          [(match_operand:VDQSF 1 "register_operand")
9076           (match_operand:<VFMLA_W> 2 "register_operand")
9077           (match_operand:<VFMLA_W> 3 "register_operand")]
9078          VFMLA16_HIGH))]
9079   "TARGET_F16FML"
9080 {
9081   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9082   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9083
9084   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
9085                                                                  operands[1],
9086                                                                  operands[2],
9087                                                                  operands[3],
9088                                                                  p1, p2));
9089   DONE;
9090 })
9091
9092 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9093   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9094         (fma:VDQSF
9095          (float_extend:VDQSF
9096           (vec_select:<VFMLA_SEL_W>
9097            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9098            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
9099          (float_extend:VDQSF
9100           (vec_select:<VFMLA_SEL_W>
9101            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9102            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9103          (match_operand:VDQSF 1 "register_operand" "0")))]
9104   "TARGET_F16FML"
9105   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9106   [(set_attr "type" "neon_fp_mul_s")]
9107 )
9108
9109 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9110   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9111         (fma:VDQSF
9112          (float_extend:VDQSF
9113           (neg:<VFMLA_SEL_W>
9114            (vec_select:<VFMLA_SEL_W>
9115             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9116             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
9117          (float_extend:VDQSF
9118           (vec_select:<VFMLA_SEL_W>
9119            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9120            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9121          (match_operand:VDQSF 1 "register_operand" "0")))]
9122   "TARGET_F16FML"
9123   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9124   [(set_attr "type" "neon_fp_mul_s")]
9125 )
9126
9127 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9128   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9129         (fma:VDQSF
9130          (float_extend:VDQSF
9131           (vec_select:<VFMLA_SEL_W>
9132            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9133            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
9134          (float_extend:VDQSF
9135           (vec_select:<VFMLA_SEL_W>
9136            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9137            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9138          (match_operand:VDQSF 1 "register_operand" "0")))]
9139   "TARGET_F16FML"
9140   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9141   [(set_attr "type" "neon_fp_mul_s")]
9142 )
9143
9144 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9145   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9146         (fma:VDQSF
9147          (float_extend:VDQSF
9148           (neg:<VFMLA_SEL_W>
9149            (vec_select:<VFMLA_SEL_W>
9150             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9151             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
9152          (float_extend:VDQSF
9153           (vec_select:<VFMLA_SEL_W>
9154            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9155            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9156          (match_operand:VDQSF 1 "register_operand" "0")))]
9157   "TARGET_F16FML"
9158   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9159   [(set_attr "type" "neon_fp_mul_s")]
9160 )
9161
9162 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9163   [(set (match_operand:V2SF 0 "register_operand")
9164         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9165                            (match_operand:V4HF 2 "register_operand")
9166                            (match_operand:V4HF 3 "register_operand")
9167                            (match_operand:SI 4 "aarch64_imm2")]
9168          VFMLA16_LOW))]
9169   "TARGET_F16FML"
9170 {
9171     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9172     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9173
9174     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
9175                                                             operands[1],
9176                                                             operands[2],
9177                                                             operands[3],
9178                                                             p1, lane));
9179     DONE;
9180 }
9181 )
9182
9183 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9184   [(set (match_operand:V2SF 0 "register_operand")
9185         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9186                            (match_operand:V4HF 2 "register_operand")
9187                            (match_operand:V4HF 3 "register_operand")
9188                            (match_operand:SI 4 "aarch64_imm2")]
9189          VFMLA16_HIGH))]
9190   "TARGET_F16FML"
9191 {
9192     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9193     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9194
9195     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
9196                                                              operands[1],
9197                                                              operands[2],
9198                                                              operands[3],
9199                                                              p1, lane));
9200     DONE;
9201 })
9202
9203 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9204   [(set (match_operand:V2SF 0 "register_operand" "=w")
9205         (fma:V2SF
9206          (float_extend:V2SF
9207            (vec_select:V2HF
9208             (match_operand:V4HF 2 "register_operand" "w")
9209             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9210          (float_extend:V2SF
9211            (vec_duplicate:V2HF
9212             (vec_select:HF
9213              (match_operand:V4HF 3 "register_operand" "x")
9214              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9215          (match_operand:V2SF 1 "register_operand" "0")))]
9216   "TARGET_F16FML"
9217   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9218   [(set_attr "type" "neon_fp_mul_s")]
9219 )
9220
9221 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9222   [(set (match_operand:V2SF 0 "register_operand" "=w")
9223         (fma:V2SF
9224          (float_extend:V2SF
9225           (neg:V2HF
9226            (vec_select:V2HF
9227             (match_operand:V4HF 2 "register_operand" "w")
9228             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9229          (float_extend:V2SF
9230           (vec_duplicate:V2HF
9231            (vec_select:HF
9232             (match_operand:V4HF 3 "register_operand" "x")
9233             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9234          (match_operand:V2SF 1 "register_operand" "0")))]
9235   "TARGET_F16FML"
9236   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9237   [(set_attr "type" "neon_fp_mul_s")]
9238 )
9239
9240 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9241   [(set (match_operand:V2SF 0 "register_operand" "=w")
9242         (fma:V2SF
9243          (float_extend:V2SF
9244            (vec_select:V2HF
9245             (match_operand:V4HF 2 "register_operand" "w")
9246             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9247          (float_extend:V2SF
9248            (vec_duplicate:V2HF
9249             (vec_select:HF
9250              (match_operand:V4HF 3 "register_operand" "x")
9251              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9252          (match_operand:V2SF 1 "register_operand" "0")))]
9253   "TARGET_F16FML"
9254   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9255   [(set_attr "type" "neon_fp_mul_s")]
9256 )
9257
9258 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9259   [(set (match_operand:V2SF 0 "register_operand" "=w")
9260         (fma:V2SF
9261          (float_extend:V2SF
9262            (neg:V2HF
9263             (vec_select:V2HF
9264              (match_operand:V4HF 2 "register_operand" "w")
9265              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9266          (float_extend:V2SF
9267            (vec_duplicate:V2HF
9268             (vec_select:HF
9269              (match_operand:V4HF 3 "register_operand" "x")
9270              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9271          (match_operand:V2SF 1 "register_operand" "0")))]
9272   "TARGET_F16FML"
9273   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9274   [(set_attr "type" "neon_fp_mul_s")]
9275 )
9276
9277 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9278   [(set (match_operand:V4SF 0 "register_operand")
9279         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9280                            (match_operand:V8HF 2 "register_operand")
9281                            (match_operand:V8HF 3 "register_operand")
9282                            (match_operand:SI 4 "aarch64_lane_imm3")]
9283          VFMLA16_LOW))]
9284   "TARGET_F16FML"
9285 {
9286     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9287     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9288
9289     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
9290                                                               operands[1],
9291                                                               operands[2],
9292                                                               operands[3],
9293                                                               p1, lane));
9294     DONE;
9295 })
9296
9297 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9298   [(set (match_operand:V4SF 0 "register_operand")
9299         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9300                            (match_operand:V8HF 2 "register_operand")
9301                            (match_operand:V8HF 3 "register_operand")
9302                            (match_operand:SI 4 "aarch64_lane_imm3")]
9303          VFMLA16_HIGH))]
9304   "TARGET_F16FML"
9305 {
9306     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9307     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9308
9309     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
9310                                                                operands[1],
9311                                                                operands[2],
9312                                                                operands[3],
9313                                                                p1, lane));
9314     DONE;
9315 })
9316
9317 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9318   [(set (match_operand:V4SF 0 "register_operand" "=w")
9319         (fma:V4SF
9320          (float_extend:V4SF
9321           (vec_select:V4HF
9322             (match_operand:V8HF 2 "register_operand" "w")
9323             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9324          (float_extend:V4SF
9325           (vec_duplicate:V4HF
9326            (vec_select:HF
9327             (match_operand:V8HF 3 "register_operand" "x")
9328             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9329          (match_operand:V4SF 1 "register_operand" "0")))]
9330   "TARGET_F16FML"
9331   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9332   [(set_attr "type" "neon_fp_mul_s")]
9333 )
9334
9335 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9336   [(set (match_operand:V4SF 0 "register_operand" "=w")
9337         (fma:V4SF
9338           (float_extend:V4SF
9339            (neg:V4HF
9340             (vec_select:V4HF
9341              (match_operand:V8HF 2 "register_operand" "w")
9342              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9343          (float_extend:V4SF
9344           (vec_duplicate:V4HF
9345            (vec_select:HF
9346             (match_operand:V8HF 3 "register_operand" "x")
9347             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9348          (match_operand:V4SF 1 "register_operand" "0")))]
9349   "TARGET_F16FML"
9350   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9351   [(set_attr "type" "neon_fp_mul_s")]
9352 )
9353
9354 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9355   [(set (match_operand:V4SF 0 "register_operand" "=w")
9356         (fma:V4SF
9357          (float_extend:V4SF
9358           (vec_select:V4HF
9359             (match_operand:V8HF 2 "register_operand" "w")
9360             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9361          (float_extend:V4SF
9362           (vec_duplicate:V4HF
9363            (vec_select:HF
9364             (match_operand:V8HF 3 "register_operand" "x")
9365             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9366          (match_operand:V4SF 1 "register_operand" "0")))]
9367   "TARGET_F16FML"
9368   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9369   [(set_attr "type" "neon_fp_mul_s")]
9370 )
9371
9372 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9373   [(set (match_operand:V4SF 0 "register_operand" "=w")
9374         (fma:V4SF
9375          (float_extend:V4SF
9376           (neg:V4HF
9377            (vec_select:V4HF
9378             (match_operand:V8HF 2 "register_operand" "w")
9379             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9380          (float_extend:V4SF
9381           (vec_duplicate:V4HF
9382            (vec_select:HF
9383             (match_operand:V8HF 3 "register_operand" "x")
9384             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9385          (match_operand:V4SF 1 "register_operand" "0")))]
9386   "TARGET_F16FML"
9387   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9388   [(set_attr "type" "neon_fp_mul_s")]
9389 )
9390
9391 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9392   [(set (match_operand:V2SF 0 "register_operand")
9393         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9394                       (match_operand:V4HF 2 "register_operand")
9395                       (match_operand:V8HF 3 "register_operand")
9396                       (match_operand:SI 4 "aarch64_lane_imm3")]
9397          VFMLA16_LOW))]
9398   "TARGET_F16FML"
9399 {
9400     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9401     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9402
9403     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
9404                                                              operands[1],
9405                                                              operands[2],
9406                                                              operands[3],
9407                                                              p1, lane));
9408     DONE;
9409
9410 })
9411
9412 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9413   [(set (match_operand:V2SF 0 "register_operand")
9414         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9415                       (match_operand:V4HF 2 "register_operand")
9416                       (match_operand:V8HF 3 "register_operand")
9417                       (match_operand:SI 4 "aarch64_lane_imm3")]
9418          VFMLA16_HIGH))]
9419   "TARGET_F16FML"
9420 {
9421     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9422     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9423
9424     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
9425                                                               operands[1],
9426                                                               operands[2],
9427                                                               operands[3],
9428                                                               p1, lane));
9429     DONE;
9430
9431 })
9432
9433 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9434   [(set (match_operand:V2SF 0 "register_operand" "=w")
9435         (fma:V2SF
9436          (float_extend:V2SF
9437            (vec_select:V2HF
9438             (match_operand:V4HF 2 "register_operand" "w")
9439             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9440          (float_extend:V2SF
9441           (vec_duplicate:V2HF
9442            (vec_select:HF
9443             (match_operand:V8HF 3 "register_operand" "x")
9444             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9445          (match_operand:V2SF 1 "register_operand" "0")))]
9446   "TARGET_F16FML"
9447   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9448   [(set_attr "type" "neon_fp_mul_s")]
9449 )
9450
9451 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9452   [(set (match_operand:V2SF 0 "register_operand" "=w")
9453         (fma:V2SF
9454          (float_extend:V2SF
9455           (neg:V2HF
9456            (vec_select:V2HF
9457             (match_operand:V4HF 2 "register_operand" "w")
9458             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9459          (float_extend:V2SF
9460           (vec_duplicate:V2HF
9461            (vec_select:HF
9462             (match_operand:V8HF 3 "register_operand" "x")
9463             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9464          (match_operand:V2SF 1 "register_operand" "0")))]
9465   "TARGET_F16FML"
9466   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9467   [(set_attr "type" "neon_fp_mul_s")]
9468 )
9469
9470 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9471   [(set (match_operand:V2SF 0 "register_operand" "=w")
9472         (fma:V2SF
9473          (float_extend:V2SF
9474            (vec_select:V2HF
9475             (match_operand:V4HF 2 "register_operand" "w")
9476             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9477          (float_extend:V2SF
9478           (vec_duplicate:V2HF
9479            (vec_select:HF
9480             (match_operand:V8HF 3 "register_operand" "x")
9481             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9482          (match_operand:V2SF 1 "register_operand" "0")))]
9483   "TARGET_F16FML"
9484   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9485   [(set_attr "type" "neon_fp_mul_s")]
9486 )
9487
9488 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9489   [(set (match_operand:V2SF 0 "register_operand" "=w")
9490         (fma:V2SF
9491          (float_extend:V2SF
9492           (neg:V2HF
9493            (vec_select:V2HF
9494             (match_operand:V4HF 2 "register_operand" "w")
9495             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9496          (float_extend:V2SF
9497           (vec_duplicate:V2HF
9498            (vec_select:HF
9499             (match_operand:V8HF 3 "register_operand" "x")
9500             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9501          (match_operand:V2SF 1 "register_operand" "0")))]
9502   "TARGET_F16FML"
9503   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9504   [(set_attr "type" "neon_fp_mul_s")]
9505 )
9506
9507 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9508   [(set (match_operand:V4SF 0 "register_operand")
9509         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9510                       (match_operand:V8HF 2 "register_operand")
9511                       (match_operand:V4HF 3 "register_operand")
9512                       (match_operand:SI 4 "aarch64_imm2")]
9513          VFMLA16_LOW))]
9514   "TARGET_F16FML"
9515 {
9516     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9517     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9518
9519     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
9520                                                              operands[1],
9521                                                              operands[2],
9522                                                              operands[3],
9523                                                              p1, lane));
9524     DONE;
9525 })
9526
9527 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9528   [(set (match_operand:V4SF 0 "register_operand")
9529         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9530                       (match_operand:V8HF 2 "register_operand")
9531                       (match_operand:V4HF 3 "register_operand")
9532                       (match_operand:SI 4 "aarch64_imm2")]
9533          VFMLA16_HIGH))]
9534   "TARGET_F16FML"
9535 {
9536     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9537     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9538
9539     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
9540                                                               operands[1],
9541                                                               operands[2],
9542                                                               operands[3],
9543                                                               p1, lane));
9544     DONE;
9545 })
9546
9547 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9548   [(set (match_operand:V4SF 0 "register_operand" "=w")
9549         (fma:V4SF
9550          (float_extend:V4SF
9551           (vec_select:V4HF
9552            (match_operand:V8HF 2 "register_operand" "w")
9553            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9554          (float_extend:V4SF
9555           (vec_duplicate:V4HF
9556            (vec_select:HF
9557             (match_operand:V4HF 3 "register_operand" "x")
9558             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9559          (match_operand:V4SF 1 "register_operand" "0")))]
9560   "TARGET_F16FML"
9561   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9562   [(set_attr "type" "neon_fp_mul_s")]
9563 )
9564
9565 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9566   [(set (match_operand:V4SF 0 "register_operand" "=w")
9567         (fma:V4SF
9568          (float_extend:V4SF
9569           (neg:V4HF
9570            (vec_select:V4HF
9571             (match_operand:V8HF 2 "register_operand" "w")
9572             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9573          (float_extend:V4SF
9574           (vec_duplicate:V4HF
9575            (vec_select:HF
9576             (match_operand:V4HF 3 "register_operand" "x")
9577             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9578          (match_operand:V4SF 1 "register_operand" "0")))]
9579   "TARGET_F16FML"
9580   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9581   [(set_attr "type" "neon_fp_mul_s")]
9582 )
9583
9584 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9585   [(set (match_operand:V4SF 0 "register_operand" "=w")
9586         (fma:V4SF
9587          (float_extend:V4SF
9588           (vec_select:V4HF
9589            (match_operand:V8HF 2 "register_operand" "w")
9590            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9591          (float_extend:V4SF
9592           (vec_duplicate:V4HF
9593            (vec_select:HF
9594             (match_operand:V4HF 3 "register_operand" "x")
9595             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9596          (match_operand:V4SF 1 "register_operand" "0")))]
9597   "TARGET_F16FML"
9598   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9599   [(set_attr "type" "neon_fp_mul_s")]
9600 )
9601
9602 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9603   [(set (match_operand:V4SF 0 "register_operand" "=w")
9604         (fma:V4SF
9605          (float_extend:V4SF
9606           (neg:V4HF
9607            (vec_select:V4HF
9608             (match_operand:V8HF 2 "register_operand" "w")
9609             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9610          (float_extend:V4SF
9611           (vec_duplicate:V4HF
9612            (vec_select:HF
9613             (match_operand:V4HF 3 "register_operand" "x")
9614             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9615          (match_operand:V4SF 1 "register_operand" "0")))]
9616   "TARGET_F16FML"
9617   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9618   [(set_attr "type" "neon_fp_mul_s")]
9619 )
9620
9621 ;; pmull
9622
9623 (define_insn "aarch64_crypto_pmulldi"
9624   [(set (match_operand:TI 0 "register_operand" "=w")
9625         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
9626                      (match_operand:DI 2 "register_operand" "w")]
9627                     UNSPEC_PMULL))]
9628  "TARGET_AES"
9629  "pmull\\t%0.1q, %1.1d, %2.1d"
9630   [(set_attr "type" "crypto_pmull")]
9631 )
9632
9633 (define_insn "aarch64_crypto_pmullv2di"
9634  [(set (match_operand:TI 0 "register_operand" "=w")
9635        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
9636                    (match_operand:V2DI 2 "register_operand" "w")]
9637                   UNSPEC_PMULL2))]
9638   "TARGET_AES"
9639   "pmull2\\t%0.1q, %1.2d, %2.2d"
9640   [(set_attr "type" "crypto_pmull")]
9641 )
9642
9643 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
9644 (define_insn "<optab><Vnarrowq><mode>2"
9645   [(set (match_operand:VQN 0 "register_operand" "=w")
9646         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9647   "TARGET_SIMD"
9648   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
9649   [(set_attr "type" "neon_shift_imm_long")]
9650 )
9651
9652 (define_expand "aarch64_<su>xtl<mode>"
9653   [(set (match_operand:VQN 0 "register_operand" "=w")
9654         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9655   "TARGET_SIMD"
9656   ""
9657 )
9658
9659 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9660 (define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
9661   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9662         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9663   "TARGET_SIMD"
9664   "xtn\t%0.<Vntype>, %1.<Vtype>"
9665   [(set_attr "type" "neon_move_narrow_q")]
9666 )
9667
9668 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9669 ;; trunc optab.
9670 (define_expand "aarch64_xtn<mode>"
9671   [(set (match_operand:<VNARROWQ> 0 "register_operand")
9672        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
9673   "TARGET_SIMD"
9674   {}
9675 )
9676
9677 (define_insn "aarch64_bfdot<mode>"
9678   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9679         (plus:VDQSF
9680           (unspec:VDQSF
9681            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9682             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9683             UNSPEC_BFDOT)
9684           (match_operand:VDQSF 1 "register_operand" "0")))]
9685   "TARGET_BF16_SIMD"
9686   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9687   [(set_attr "type" "neon_dot<q>")]
9688 )
9689
9690 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9691   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9692         (plus:VDQSF
9693           (unspec:VDQSF
9694            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9695             (match_operand:VBF 3 "register_operand" "w")
9696             (match_operand:SI 4 "const_int_operand" "n")]
9697             UNSPEC_BFDOT)
9698           (match_operand:VDQSF 1 "register_operand" "0")))]
9699   "TARGET_BF16_SIMD"
9700 {
9701   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9702   int lane = INTVAL (operands[4]);
9703   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9704   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9705 }
9706   [(set_attr "type" "neon_dot<VDQSF:q>")]
9707 )
9708
9709 ;; vget_low/high_bf16
9710 (define_expand "aarch64_vget_lo_halfv8bf"
9711   [(match_operand:V4BF 0 "register_operand")
9712    (match_operand:V8BF 1 "register_operand")]
9713   "TARGET_BF16_SIMD"
9714 {
9715   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9716   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9717   DONE;
9718 })
9719
9720 (define_expand "aarch64_vget_hi_halfv8bf"
9721   [(match_operand:V4BF 0 "register_operand")
9722    (match_operand:V8BF 1 "register_operand")]
9723   "TARGET_BF16_SIMD"
9724 {
9725   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9726   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9727   DONE;
9728 })
9729
9730 ;; bfmmla
9731 (define_insn "aarch64_bfmmlaqv4sf"
9732   [(set (match_operand:V4SF 0 "register_operand" "=w")
9733         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9734                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9735                                  (match_operand:V8BF 3 "register_operand" "w")]
9736                     UNSPEC_BFMMLA)))]
9737   "TARGET_BF16_SIMD"
9738   "bfmmla\\t%0.4s, %2.8h, %3.8h"
9739   [(set_attr "type" "neon_fp_mla_s_q")]
9740 )
9741
9742 ;; bfmlal<bt>
9743 (define_insn "aarch64_bfmlal<bt>v4sf"
9744   [(set (match_operand:V4SF 0 "register_operand" "=w")
9745         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9746                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9747                                   (match_operand:V8BF 3 "register_operand" "w")]
9748                      BF_MLA)))]
9749   "TARGET_BF16_SIMD"
9750   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9751   [(set_attr "type" "neon_fp_mla_s_q")]
9752 )
9753
9754 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9755   [(set (match_operand:V4SF 0 "register_operand" "=w")
9756         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9757                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9758                                   (match_operand:VBF 3 "register_operand" "x")
9759                                   (match_operand:SI 4 "const_int_operand" "n")]
9760                      BF_MLA)))]
9761   "TARGET_BF16_SIMD"
9762 {
9763   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9764   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9765 }
9766   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9767 )
9768
9769 ;; 8-bit integer matrix multiply-accumulate
9770 (define_insn "aarch64_simd_<sur>mmlav16qi"
9771   [(set (match_operand:V4SI 0 "register_operand" "=w")
9772         (plus:V4SI
9773          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9774                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9775          (match_operand:V4SI 1 "register_operand" "0")))]
9776   "TARGET_I8MM"
9777   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9778   [(set_attr "type" "neon_mla_s_q")]
9779 )
9780
9781 ;; bfcvtn
9782 (define_insn "aarch64_bfcvtn<q><mode>"
9783   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9784         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9785                             UNSPEC_BFCVTN))]
9786   "TARGET_BF16_SIMD"
9787   "bfcvtn\\t%0.4h, %1.4s"
9788   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9789 )
9790
9791 (define_insn "aarch64_bfcvtn2v8bf"
9792   [(set (match_operand:V8BF 0 "register_operand" "=w")
9793         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9794                       (match_operand:V4SF 2 "register_operand" "w")]
9795                       UNSPEC_BFCVTN2))]
9796   "TARGET_BF16_SIMD"
9797   "bfcvtn2\\t%0.8h, %2.4s"
9798   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9799 )
9800
9801 (define_insn "aarch64_bfcvtbf"
9802   [(set (match_operand:BF 0 "register_operand" "=w")
9803         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9804                     UNSPEC_BFCVT))]
9805   "TARGET_BF16_FP"
9806   "bfcvt\\t%h0, %s1"
9807   [(set_attr "type" "f_cvt")]
9808 )
9809
9810 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9811 (define_insn "aarch64_vbfcvt<mode>"
9812   [(set (match_operand:V4SF 0 "register_operand" "=w")
9813         (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9814                       UNSPEC_BFCVTN))]
9815   "TARGET_BF16_SIMD"
9816   "shll\\t%0.4s, %1.4h, #16"
9817   [(set_attr "type" "neon_shift_imm_long")]
9818 )
9819
9820 (define_insn "aarch64_vbfcvt_highv8bf"
9821   [(set (match_operand:V4SF 0 "register_operand" "=w")
9822         (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9823                       UNSPEC_BFCVTN2))]
9824   "TARGET_BF16_SIMD"
9825   "shll2\\t%0.4s, %1.8h, #16"
9826   [(set_attr "type" "neon_shift_imm_long")]
9827 )
9828
9829 (define_insn "aarch64_bfcvtsf"
9830   [(set (match_operand:SF 0 "register_operand" "=w")
9831         (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9832                     UNSPEC_BFCVT))]
9833   "TARGET_BF16_FP"
9834   "shl\\t%d0, %d1, #16"
9835   [(set_attr "type" "neon_shift_imm")]
9836 )