gcc/config/rs6000/vsx.md

   1 ;; VSX patterns.
   2 ;; Copyright (C) 2009-2023 Free Software Foundation, Inc.
   3 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
   4
   5 ;; This file is part of GCC.
   6
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published
   9 ;; by the Free Software Foundation; either version 3, or (at your
  10 ;; option) any later version.
  11
  12 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  15 ;; License for more details.
  16
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Iterator for comparison types
  22 (define_code_iterator CMP_TEST [eq lt gt unordered])
  23
  24 ;; Mode attribute for vector floate and floato conversions
  25 (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
  26
  27 ;; Iterator for both scalar and vector floating point types supported by VSX
  28 (define_mode_iterator VSX_B [DF V4SF V2DF])
  29
  30 ;; Iterator for the 2 64-bit vector types
  31 (define_mode_iterator VSX_D [V2DF V2DI])
  32
  33 ;; Mode iterator to handle swapping words on little endian for the 128-bit
  34 ;; types that goes in a single vector register.
  35 (define_mode_iterator VSX_LE_128 [(KF   "FLOAT128_VECTOR_P (KFmode)")
  36                                   (TF   "FLOAT128_VECTOR_P (TFmode)")
  37                                   TI
  38                                   V1TI])
  39
  40 ;; Iterator for the 2 32-bit vector types
  41 (define_mode_iterator VSX_W [V4SF V4SI])
  42
  43 ;; Iterator for the DF types
  44 (define_mode_iterator VSX_DF [V2DF DF])
  45
  46 ;; Iterator for vector floating point types supported by VSX
  47 (define_mode_iterator VSX_F [V4SF V2DF])
  48
  49 ;; Iterator for logical types supported by VSX
  50 (define_mode_iterator VSX_L [V16QI
  51                              V8HI
  52                              V4SI
  53                              V2DI
  54                              V4SF
  55                              V2DF
  56                              V1TI
  57                              TI
  58                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  59                              (TF        "FLOAT128_VECTOR_P (TFmode)")])
  60
  61 ;; Iterator for memory moves.
  62 (define_mode_iterator VSX_M [V16QI
  63                              V8HI
  64                              V4SI
  65                              V2DI
  66                              V4SF
  67                              V2DF
  68                              V1TI
  69                              (KF        "FLOAT128_VECTOR_P (KFmode)")
  70                              (TF        "FLOAT128_VECTOR_P (TFmode)")
  71                              TI])
  72
  73 (define_mode_attr VSX_XXBR  [(V8HI  "h")
  74                              (V4SI  "w")
  75                              (V4SF  "w")
  76                              (V2DF  "d")
  77                              (V2DI  "d")
  78                              (V1TI  "q")])
  79
  80 ;; Map into the appropriate load/store name based on the type
  81 (define_mode_attr VSm  [(V16QI "vw4")
  82                         (V8HI  "vw4")
  83                         (V4SI  "vw4")
  84                         (V4SF  "vw4")
  85                         (V2DF  "vd2")
  86                         (V2DI  "vd2")
  87                         (DF    "d")
  88                         (TF    "vd2")
  89                         (KF    "vd2")
  90                         (V1TI  "vd2")
  91                         (TI    "vd2")])
  92
  93 ;; Map the register class used
  94 (define_mode_attr VSr   [(V16QI "v")
  95                          (V8HI  "v")
  96                          (V4SI  "v")
  97                          (V4SF  "wa")
  98                          (V2DI  "wa")
  99                          (V2DF  "wa")
 100                          (DI    "wa")
 101                          (DF    "wa")
 102                          (SF    "wa")
 103                          (TF    "wa")
 104                          (KF    "wa")
 105                          (V1TI  "v")
 106                          (TI    "wa")])
 107
 108 ;; What value we need in the "isa" field, to make the IEEE QP float work.
 109 (define_mode_attr VSisa [(V16QI "*")
 110                          (V8HI  "*")
 111                          (V4SI  "*")
 112                          (V4SF  "*")
 113                          (V2DI  "*")
 114                          (V2DF  "*")
 115                          (DI    "*")
 116                          (DF    "*")
 117                          (SF    "*")
 118                          (V1TI  "*")
 119                          (TI    "*")
 120                          (TF    "p9tf")
 121                          (KF    "p9kf")])
 122
 123 ;; A mode attribute to disparage use of GPR registers, except for scalar
 124 ;; integer modes.
 125 (define_mode_attr ??r   [(V16QI "??r")
 126                          (V8HI  "??r")
 127                          (V4SI  "??r")
 128                          (V4SF  "??r")
 129                          (V2DI  "??r")
 130                          (V2DF  "??r")
 131                          (V1TI  "??r")
 132                          (KF    "??r")
 133                          (TF    "??r")
 134                          (TI    "r")])
 135
 136 ;; A mode attribute used for 128-bit constant values.
 137 (define_mode_attr nW    [(V16QI "W")
 138                          (V8HI  "W")
 139                          (V4SI  "W")
 140                          (V4SF  "W")
 141                          (V2DI  "W")
 142                          (V2DF  "W")
 143                          (V1TI  "W")
 144                          (KF    "W")
 145                          (TF    "W")
 146                          (TI    "n")])
 147
 148 ;; Same size integer type for floating point data
 149 (define_mode_attr VSi [(V4SF  "v4si")
 150                        (V2DF  "v2di")
 151                        (DF    "di")])
 152
 153 (define_mode_attr VSI [(V4SF  "V4SI")
 154                        (V2DF  "V2DI")
 155                        (DF    "DI")])
 156
 157 ;; Word size for same size conversion
 158 (define_mode_attr VSc [(V4SF "w")
 159                        (V2DF "d")
 160                        (DF   "d")])
 161
 162 ;; Map into either s or v, depending on whether this is a scalar or vector
 163 ;; operation
 164 (define_mode_attr VSv   [(V16QI "v")
 165                          (V8HI  "v")
 166                          (V4SI  "v")
 167                          (V4SF  "v")
 168                          (V2DI  "v")
 169                          (V2DF  "v")
 170                          (V1TI  "v")
 171                          (DF    "s")
 172                          (KF    "v")])
 173
 174 ;; Appropriate type for add ops (and other simple FP ops)
 175 (define_mode_attr VStype_simple [(V2DF "vecdouble")
 176                                  (V4SF "vecfloat")
 177                                  (DF   "fp")])
 178
 179 ;; Appropriate type for multiply ops
 180 (define_mode_attr VStype_mul    [(V2DF "vecdouble")
 181                                  (V4SF "vecfloat")
 182                                  (DF   "dmul")])
 183
 184 ;; Appropriate type for divide ops.
 185 (define_mode_attr VStype_div    [(V2DF "vecdiv")
 186                                  (V4SF "vecfdiv")
 187                                  (DF   "ddiv")])
 188
 189 ;; Map to a double-sized vector mode
 190 (define_mode_attr VS_double [(V4SI      "V8SI")
 191                              (V4SF      "V8SF")
 192                              (V2DI      "V4DI")
 193                              (V2DF      "V4DF")
 194                              (V1TI      "V2TI")])
 195
 196 ;; Iterators for loading constants with xxspltib
 197 (define_mode_iterator VSINT_84  [V4SI V2DI DI SI])
 198 (define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
 199
 200 ;; Vector reverse byte modes
 201 (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI])
 202
 203 ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors.
 204 ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be
 205 ;; done on ISA 2.07 and not just ISA 3.0.
 206 (define_mode_iterator VSX_EXTRACT_I  [V16QI V8HI V4SI])
 207 (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI])
 208 (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI])
 209
 210 (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b")
 211                                      (V8HI "h")
 212                                      (V4SI "w")])
 213
 214 ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and
 215 ;; insert to validate the operand number.
 216 (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand")
 217                                          (V8HI  "const_0_to_7_operand")
 218                                          (V4SI  "const_0_to_3_operand")])
 219
 220 ;; Mode attribute to give the constraint for vector extract and insert
 221 ;; operations.
 222 (define_mode_attr VSX_EX [(V16QI "v")
 223                           (V8HI  "v")
 224                           (V4SI  "wa")])
 225
 226 ;; Mode iterator for binary floating types other than double to
 227 ;; optimize convert to that floating point type from an extract
 228 ;; of an integer type
 229 (define_mode_iterator VSX_EXTRACT_FL [SF
 230                                       (IF "FLOAT128_2REG_P (IFmode)")
 231                                       (KF "TARGET_FLOAT128_HW")
 232                                       (TF "FLOAT128_2REG_P (TFmode)
 233                                            || (FLOAT128_IEEE_P (TFmode)
 234                                                && TARGET_FLOAT128_HW)")])
 235
 236 ;; Mode iterator for binary floating types that have a direct conversion
 237 ;; from 64-bit integer to floating point
 238 (define_mode_iterator FL_CONV [SF
 239                                DF
 240                                (KF "TARGET_FLOAT128_HW")
 241                                (TF "TARGET_FLOAT128_HW
 242                                     && FLOAT128_IEEE_P (TFmode)")])
 243
 244 ;; Iterator for the 2 short vector types to do a splat from an integer
 245 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 246
 247 ;; Mode attribute to give the count for the splat instruction to splat
 248 ;; the value in the 64-bit integer slot
 249 (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
 250
 251 ;; Mode attribute to give the suffix for the splat instruction
 252 (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
 253
 254 ;; Iterator for the move to mask instructions
 255 (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI])
 256 (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI])
 257
 258 ;; Longer vec int modes for rotate/mask ops
 259 ;; and Vector Integer Multiply/Divide/Modulo Instructions
 260 (define_mode_iterator VIlong [V2DI V4SI])
 261
 262 ;; Constants for creating unspecs
 263 (define_c_enum "unspec"
 264   [UNSPEC_VSX_CONCAT
 265    UNSPEC_VSX_CVDPSXWS
 266    UNSPEC_VSX_CVDPUXWS
 267    UNSPEC_VSX_CVSPDP
 268    UNSPEC_VSX_CVHPSP
 269    UNSPEC_VSX_CVSPDPN
 270    UNSPEC_VSX_CVDPSPN
 271    UNSPEC_VSX_CVSXWDP
 272    UNSPEC_VSX_CVUXWDP
 273    UNSPEC_VSX_CVSXDSP
 274    UNSPEC_VSX_CVUXDSP
 275    UNSPEC_VSX_FLOAT2
 276    UNSPEC_VSX_UNS_FLOAT2
 277    UNSPEC_VSX_FLOATE
 278    UNSPEC_VSX_UNS_FLOATE
 279    UNSPEC_VSX_FLOATO
 280    UNSPEC_VSX_UNS_FLOATO
 281    UNSPEC_VSX_TDIV
 282    UNSPEC_VSX_TSQRT
 283    UNSPEC_VSX_SET
 284    UNSPEC_VSX_ROUND_I
 285    UNSPEC_VSX_ROUND_IC
 286    UNSPEC_VSX_SLDWI
 287    UNSPEC_VSX_XXPERM
 288
 289    UNSPEC_VSX_XXSPLTW
 290    UNSPEC_VSX_XXSPLTD
 291    UNSPEC_VSX_DIVSD
 292    UNSPEC_VSX_DIVUD
 293    UNSPEC_VSX_DIVSQ
 294    UNSPEC_VSX_DIVUQ
 295    UNSPEC_VSX_DIVESQ
 296    UNSPEC_VSX_DIVEUQ
 297    UNSPEC_VSX_MODSQ
 298    UNSPEC_VSX_MODUQ
 299    UNSPEC_VSX_MULSD
 300    UNSPEC_VSX_SIGN_EXTEND
 301    UNSPEC_VSX_XVCVBF16SPN
 302    UNSPEC_VSX_XVCVSPBF16
 303    UNSPEC_VSX_XVCVSPSXDS
 304    UNSPEC_VSX_XVCVSPHP
 305    UNSPEC_VSX_VSLO
 306    UNSPEC_VSX_EXTRACT
 307    UNSPEC_VSX_SXEXPDP
 308    UNSPEC_VSX_SXSIG
 309    UNSPEC_VSX_SIEXPDP
 310    UNSPEC_VSX_SIEXPQP
 311    UNSPEC_VSX_SCMPEXPDP
 312    UNSPEC_VSX_SCMPEXPQP
 313    UNSPEC_VSX_STSTDC
 314    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
 315    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
 316    UNSPEC_VSX_VXEXP
 317    UNSPEC_VSX_VXSIG
 318    UNSPEC_VSX_VIEXP
 319    UNSPEC_VSX_VTSTDC
 320    UNSPEC_VSX_VSIGNED2
 321
 322    UNSPEC_LXVL
 323    UNSPEC_LXVLL
 324    UNSPEC_LVSL_REG
 325    UNSPEC_LVSR_REG
 326    UNSPEC_STXVL
 327    UNSPEC_STXVLL
 328    UNSPEC_XL_LEN_R
 329    UNSPEC_XST_LEN_R
 330
 331    UNSPEC_VCLZLSBB
 332    UNSPEC_VCTZLSBB
 333    UNSPEC_VEXTUBLX
 334    UNSPEC_VEXTUHLX
 335    UNSPEC_VEXTUWLX
 336    UNSPEC_VEXTUBRX
 337    UNSPEC_VEXTUHRX
 338    UNSPEC_VEXTUWRX
 339    UNSPEC_VCMPNEB
 340    UNSPEC_VCMPNEZB
 341    UNSPEC_VCMPNEH
 342    UNSPEC_VCMPNEZH
 343    UNSPEC_VCMPNEW
 344    UNSPEC_VCMPNEZW
 345    UNSPEC_XXEXTRACTUW
 346    UNSPEC_XXINSERTW
 347    UNSPEC_VSX_FIRST_MATCH_INDEX
 348    UNSPEC_VSX_FIRST_MATCH_EOS_INDEX
 349    UNSPEC_VSX_FIRST_MISMATCH_INDEX
 350    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
 351    UNSPEC_XXGENPCV
 352    UNSPEC_MTVSBM
 353    UNSPEC_EXTENDDITI2
 354    UNSPEC_VCNTMB
 355    UNSPEC_VEXPAND
 356    UNSPEC_VEXTRACT
 357    UNSPEC_EXTRACTL
 358    UNSPEC_EXTRACTR
 359    UNSPEC_INSERTL
 360    UNSPEC_INSERTR
 361    UNSPEC_REPLACE_ELT
 362    UNSPEC_REPLACE_UN
 363    UNSPEC_VDIVES
 364    UNSPEC_VDIVEU
 365    UNSPEC_VMSUMCUD
 366    UNSPEC_XXEVAL
 367    UNSPEC_XXSPLTIW
 368    UNSPEC_XXSPLTIDP
 369    UNSPEC_XXSPLTI32DX
 370    UNSPEC_XXBLEND
 371    UNSPEC_XXPERMX
 372   ])
 373
 374 (define_int_iterator XVCVBF16   [UNSPEC_VSX_XVCVSPBF16
 375                                  UNSPEC_VSX_XVCVBF16SPN])
 376
 377 (define_int_attr xvcvbf16       [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16")
 378                                  (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")])
 379
 380 ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
 381 (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
 382
 383 ;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements
 384 (define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF])
 385 (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w")
 386                                     (V2DI  "d") (V2DF "d")])
 387 (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2")
 388                                   (V2DI  "3") (V2DF "3")])
 389 (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12")
 390                                    (V2DI  "8") (V2DF "8")])
 391
 392 ;; Like VM2 in altivec.md, just do char, short, int, long, float and double
 393 (define_mode_iterator VM3 [V4SI
 394                            V8HI
 395                            V16QI
 396                            V4SF
 397                            V2DF
 398                            V2DI])
 399
 400 (define_mode_attr VM3_char [(V2DI "d")
 401                            (V4SI "w")
 402                            (V8HI "h")
 403                            (V16QI "b")
 404                            (V2DF  "d")
 405                            (V4SF  "w")])
 406
 407
 408 ;; VSX moves
 409
 410 ;; The patterns for LE permuted loads and stores come before the general
 411 ;; VSX moves so they match first.
 412 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 413   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
 414         (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))]
 415   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 416   "#"
 417   "&& 1"
 418   [(set (match_dup 2)
 419         (vec_select:<MODE>
 420           (match_dup 1)
 421           (parallel [(const_int 1) (const_int 0)])))
 422    (set (match_dup 0)
 423         (vec_select:<MODE>
 424           (match_dup 2)
 425           (parallel [(const_int 1) (const_int 0)])))]
 426 {
 427   rtx mem = operands[1];
 428
 429   /* Don't apply the swap optimization if we've already performed register
 430      allocation and the hard register destination is not in the altivec
 431      range.  */
 432   if ((MEM_ALIGN (mem) >= 128)
 433       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0]))
 434           || ALTIVEC_REGNO_P (reg_or_subregno (operands[0]))))
 435     {
 436       rtx mem_address = XEXP (mem, 0);
 437       enum machine_mode mode = GET_MODE (mem);
 438
 439       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 440         {
 441           /* Replace the source memory address with masked address.  */
 442           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 443           emit_insn (lvx_set_expr);
 444           DONE;
 445         }
 446       else if (rs6000_quadword_masked_address_p (mem_address))
 447         {
 448           /* This rtl is already in the form that matches lvx
 449              instruction, so leave it alone.  */
 450           DONE;
 451         }
 452       /* Otherwise, fall through to transform into a swapping load.  */
 453     }
 454   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 455                                        : operands[0];
 456 }
 457   [(set_attr "type" "vecload")
 458    (set_attr "length" "8")])
 459
 460 (define_insn_and_split "*vsx_le_perm_load_<mode>"
 461   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
 462         (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))]
 463   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 464   "#"
 465   "&& 1"
 466   [(set (match_dup 2)
 467         (vec_select:<MODE>
 468           (match_dup 1)
 469           (parallel [(const_int 2) (const_int 3)
 470                      (const_int 0) (const_int 1)])))
 471    (set (match_dup 0)
 472         (vec_select:<MODE>
 473           (match_dup 2)
 474           (parallel [(const_int 2) (const_int 3)
 475                      (const_int 0) (const_int 1)])))]
 476 {
 477   rtx mem = operands[1];
 478
 479   /* Don't apply the swap optimization if we've already performed register
 480      allocation and the hard register destination is not in the altivec
 481      range.  */
 482   if ((MEM_ALIGN (mem) >= 128)
 483       && (!HARD_REGISTER_P (operands[0])
 484           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 485     {
 486       rtx mem_address = XEXP (mem, 0);
 487       enum machine_mode mode = GET_MODE (mem);
 488
 489       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 490         {
 491           /* Replace the source memory address with masked address.  */
 492           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 493           emit_insn (lvx_set_expr);
 494           DONE;
 495         }
 496       else if (rs6000_quadword_masked_address_p (mem_address))
 497         {
 498           /* This rtl is already in the form that matches lvx
 499              instruction, so leave it alone.  */
 500           DONE;
 501         }
 502       /* Otherwise, fall through to transform into a swapping load.  */
 503     }
 504   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 505                                        : operands[0];
 506 }
 507   [(set_attr "type" "vecload")
 508    (set_attr "length" "8")])
 509
 510 (define_insn_and_split "*vsx_le_perm_load_v8hi"
 511   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
 512         (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))]
 513   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 514   "#"
 515   "&& 1"
 516   [(set (match_dup 2)
 517         (vec_select:V8HI
 518           (match_dup 1)
 519           (parallel [(const_int 4) (const_int 5)
 520                      (const_int 6) (const_int 7)
 521                      (const_int 0) (const_int 1)
 522                      (const_int 2) (const_int 3)])))
 523    (set (match_dup 0)
 524         (vec_select:V8HI
 525           (match_dup 2)
 526           (parallel [(const_int 4) (const_int 5)
 527                      (const_int 6) (const_int 7)
 528                      (const_int 0) (const_int 1)
 529                      (const_int 2) (const_int 3)])))]
 530 {
 531   rtx mem = operands[1];
 532
 533   /* Don't apply the swap optimization if we've already performed register
 534      allocation and the hard register destination is not in the altivec
 535      range.  */
 536   if ((MEM_ALIGN (mem) >= 128)
 537       && (!HARD_REGISTER_P (operands[0])
 538           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 539     {
 540       rtx mem_address = XEXP (mem, 0);
 541       enum machine_mode mode = GET_MODE (mem);
 542
 543       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 544         {
 545           /* Replace the source memory address with masked address.  */
 546           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 547           emit_insn (lvx_set_expr);
 548           DONE;
 549         }
 550       else if (rs6000_quadword_masked_address_p (mem_address))
 551         {
 552           /* This rtl is already in the form that matches lvx
 553              instruction, so leave it alone.  */
 554           DONE;
 555         }
 556       /* Otherwise, fall through to transform into a swapping load.  */
 557     }
 558   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 559                                        : operands[0];
 560 }
 561   [(set_attr "type" "vecload")
 562    (set_attr "length" "8")])
 563
 564 (define_insn_and_split "*vsx_le_perm_load_v16qi"
 565   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
 566         (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))]
 567   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 568   "#"
 569   "&& 1"
 570   [(set (match_dup 2)
 571         (vec_select:V16QI
 572           (match_dup 1)
 573           (parallel [(const_int 8) (const_int 9)
 574                      (const_int 10) (const_int 11)
 575                      (const_int 12) (const_int 13)
 576                      (const_int 14) (const_int 15)
 577                      (const_int 0) (const_int 1)
 578                      (const_int 2) (const_int 3)
 579                      (const_int 4) (const_int 5)
 580                      (const_int 6) (const_int 7)])))
 581    (set (match_dup 0)
 582         (vec_select:V16QI
 583           (match_dup 2)
 584           (parallel [(const_int 8) (const_int 9)
 585                      (const_int 10) (const_int 11)
 586                      (const_int 12) (const_int 13)
 587                      (const_int 14) (const_int 15)
 588                      (const_int 0) (const_int 1)
 589                      (const_int 2) (const_int 3)
 590                      (const_int 4) (const_int 5)
 591                      (const_int 6) (const_int 7)])))]
 592 {
 593   rtx mem = operands[1];
 594
 595   /* Don't apply the swap optimization if we've already performed register
 596      allocation and the hard register destination is not in the altivec
 597      range.  */
 598   if ((MEM_ALIGN (mem) >= 128)
 599       && (!HARD_REGISTER_P (operands[0])
 600           || ALTIVEC_REGNO_P (REGNO(operands[0]))))
 601     {
 602       rtx mem_address = XEXP (mem, 0);
 603       enum machine_mode mode = GET_MODE (mem);
 604
 605       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 606         {
 607           /* Replace the source memory address with masked address.  */
 608           rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem);
 609           emit_insn (lvx_set_expr);
 610           DONE;
 611         }
 612       else if (rs6000_quadword_masked_address_p (mem_address))
 613         {
 614           /* This rtl is already in the form that matches lvx
 615              instruction, so leave it alone.  */
 616           DONE;
 617         }
 618       /* Otherwise, fall through to transform into a swapping load.  */
 619     }
 620   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
 621                                        : operands[0];
 622 }
 623   [(set_attr "type" "vecload")
 624    (set_attr "length" "8")])
 625
 626 (define_insn "*vsx_le_perm_store_<mode>"
 627   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z")
 628         (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
 629   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 630   "#"
 631   [(set_attr "type" "vecstore")
 632    (set_attr "length" "12")])
 633
 634 (define_split
 635   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 636         (match_operand:VSX_D 1 "vsx_register_operand"))]
 637   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 638   [(set (match_dup 2)
 639         (vec_select:<MODE>
 640           (match_dup 1)
 641           (parallel [(const_int 1) (const_int 0)])))
 642    (set (match_dup 0)
 643         (vec_select:<MODE>
 644           (match_dup 2)
 645           (parallel [(const_int 1) (const_int 0)])))]
 646 {
 647   rtx mem = operands[0];
 648
 649   /* Don't apply the swap optimization if we've already performed register
 650      allocation and the hard register source is not in the altivec range.  */
 651   if ((MEM_ALIGN (mem) >= 128)
 652       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 653           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 654     {
 655       rtx mem_address = XEXP (mem, 0);
 656       enum machine_mode mode = GET_MODE (mem);
 657       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 658         {
 659           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 660           emit_insn (stvx_set_expr);
 661           DONE;
 662         }
 663       else if (rs6000_quadword_masked_address_p (mem_address))
 664         {
 665           /* This rtl is already in the form that matches stvx instruction,
 666              so leave it alone.  */
 667           DONE;
 668         }
 669       /* Otherwise, fall through to transform into a swapping store.  */
 670     }
 671
 672   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 673                                        : operands[1];
 674 })
 675
 676 ;; The post-reload split requires that we re-permute the source
 677 ;; register in case it is still live.
 678 (define_split
 679   [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand")
 680         (match_operand:VSX_D 1 "vsx_register_operand"))]
 681   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 682   [(set (match_dup 1)
 683         (vec_select:<MODE>
 684           (match_dup 1)
 685           (parallel [(const_int 1) (const_int 0)])))
 686    (set (match_dup 0)
 687         (vec_select:<MODE>
 688           (match_dup 1)
 689           (parallel [(const_int 1) (const_int 0)])))
 690    (set (match_dup 1)
 691         (vec_select:<MODE>
 692           (match_dup 1)
 693           (parallel [(const_int 1) (const_int 0)])))]
 694   "")
 695
 696 (define_insn "*vsx_le_perm_store_<mode>"
 697   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z")
 698         (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
 699   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 700   "#"
 701   [(set_attr "type" "vecstore")
 702    (set_attr "length" "12")])
 703
 704 (define_split
 705   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 706         (match_operand:VSX_W 1 "vsx_register_operand"))]
 707   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 708   [(set (match_dup 2)
 709         (vec_select:<MODE>
 710           (match_dup 1)
 711           (parallel [(const_int 2) (const_int 3)
 712                      (const_int 0) (const_int 1)])))
 713    (set (match_dup 0)
 714         (vec_select:<MODE>
 715           (match_dup 2)
 716           (parallel [(const_int 2) (const_int 3)
 717                      (const_int 0) (const_int 1)])))]
 718 {
 719   rtx mem = operands[0];
 720
 721   /* Don't apply the swap optimization if we've already performed register
 722      allocation and the hard register source is not in the altivec range.  */
 723   if ((MEM_ALIGN (mem) >= 128)
 724       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 725           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 726     {
 727       rtx mem_address = XEXP (mem, 0);
 728       enum machine_mode mode = GET_MODE (mem);
 729       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 730         {
 731           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 732           emit_insn (stvx_set_expr);
 733           DONE;
 734         }
 735       else if (rs6000_quadword_masked_address_p (mem_address))
 736         {
 737           /* This rtl is already in the form that matches stvx instruction,
 738              so leave it alone.  */
 739           DONE;
 740         }
 741       /* Otherwise, fall through to transform into a swapping store.  */
 742     }
 743
 744   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 745                                        : operands[1];
 746 })
 747
 748 ;; The post-reload split requires that we re-permute the source
 749 ;; register in case it is still live.
 750 (define_split
 751   [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand")
 752         (match_operand:VSX_W 1 "vsx_register_operand"))]
 753   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 754   [(set (match_dup 1)
 755         (vec_select:<MODE>
 756           (match_dup 1)
 757           (parallel [(const_int 2) (const_int 3)
 758                      (const_int 0) (const_int 1)])))
 759    (set (match_dup 0)
 760         (vec_select:<MODE>
 761           (match_dup 1)
 762           (parallel [(const_int 2) (const_int 3)
 763                      (const_int 0) (const_int 1)])))
 764    (set (match_dup 1)
 765         (vec_select:<MODE>
 766           (match_dup 1)
 767           (parallel [(const_int 2) (const_int 3)
 768                      (const_int 0) (const_int 1)])))]
 769   "")
 770
 771 (define_insn "*vsx_le_perm_store_v8hi"
 772   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z")
 773         (match_operand:V8HI 1 "vsx_register_operand" "+wa"))]
 774   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 775   "#"
 776   [(set_attr "type" "vecstore")
 777    (set_attr "length" "12")])
 778
 779 (define_split
 780   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 781         (match_operand:V8HI 1 "vsx_register_operand"))]
 782   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 783   [(set (match_dup 2)
 784         (vec_select:V8HI
 785           (match_dup 1)
 786           (parallel [(const_int 4) (const_int 5)
 787                      (const_int 6) (const_int 7)
 788                      (const_int 0) (const_int 1)
 789                      (const_int 2) (const_int 3)])))
 790    (set (match_dup 0)
 791         (vec_select:V8HI
 792           (match_dup 2)
 793           (parallel [(const_int 4) (const_int 5)
 794                      (const_int 6) (const_int 7)
 795                      (const_int 0) (const_int 1)
 796                      (const_int 2) (const_int 3)])))]
 797 {
 798   rtx mem = operands[0];
 799
 800   /* Don't apply the swap optimization if we've already performed register
 801      allocation and the hard register source is not in the altivec range.  */
 802   if ((MEM_ALIGN (mem) >= 128)
 803       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 804           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 805     {
 806       rtx mem_address = XEXP (mem, 0);
 807       enum machine_mode mode = GET_MODE (mem);
 808       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 809         {
 810           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 811           emit_insn (stvx_set_expr);
 812           DONE;
 813         }
 814       else if (rs6000_quadword_masked_address_p (mem_address))
 815         {
 816           /* This rtl is already in the form that matches stvx instruction,
 817              so leave it alone.  */
 818           DONE;
 819         }
 820       /* Otherwise, fall through to transform into a swapping store.  */
 821     }
 822
 823   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 824                                        : operands[1];
 825 })
 826
 827 ;; The post-reload split requires that we re-permute the source
 828 ;; register in case it is still live.
 829 (define_split
 830   [(set (match_operand:V8HI 0 "indexed_or_indirect_operand")
 831         (match_operand:V8HI 1 "vsx_register_operand"))]
 832   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 833   [(set (match_dup 1)
 834         (vec_select:V8HI
 835           (match_dup 1)
 836           (parallel [(const_int 4) (const_int 5)
 837                      (const_int 6) (const_int 7)
 838                      (const_int 0) (const_int 1)
 839                      (const_int 2) (const_int 3)])))
 840    (set (match_dup 0)
 841         (vec_select:V8HI
 842           (match_dup 1)
 843           (parallel [(const_int 4) (const_int 5)
 844                      (const_int 6) (const_int 7)
 845                      (const_int 0) (const_int 1)
 846                      (const_int 2) (const_int 3)])))
 847    (set (match_dup 1)
 848         (vec_select:V8HI
 849           (match_dup 1)
 850           (parallel [(const_int 4) (const_int 5)
 851                      (const_int 6) (const_int 7)
 852                      (const_int 0) (const_int 1)
 853                      (const_int 2) (const_int 3)])))]
 854   "")
 855
 856 (define_insn "*vsx_le_perm_store_v16qi"
 857   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z")
 858         (match_operand:V16QI 1 "vsx_register_operand" "+wa"))]
 859   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 860   "#"
 861   [(set_attr "type" "vecstore")
 862    (set_attr "length" "12")])
 863
 864 (define_split
 865   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 866         (match_operand:V16QI 1 "vsx_register_operand"))]
 867   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed"
 868   [(set (match_dup 2)
 869         (vec_select:V16QI
 870           (match_dup 1)
 871           (parallel [(const_int 8) (const_int 9)
 872                      (const_int 10) (const_int 11)
 873                      (const_int 12) (const_int 13)
 874                      (const_int 14) (const_int 15)
 875                      (const_int 0) (const_int 1)
 876                      (const_int 2) (const_int 3)
 877                      (const_int 4) (const_int 5)
 878                      (const_int 6) (const_int 7)])))
 879    (set (match_dup 0)
 880         (vec_select:V16QI
 881           (match_dup 2)
 882           (parallel [(const_int 8) (const_int 9)
 883                      (const_int 10) (const_int 11)
 884                      (const_int 12) (const_int 13)
 885                      (const_int 14) (const_int 15)
 886                      (const_int 0) (const_int 1)
 887                      (const_int 2) (const_int 3)
 888                      (const_int 4) (const_int 5)
 889                      (const_int 6) (const_int 7)])))]
 890 {
 891   rtx mem = operands[0];
 892
 893   /* Don't apply the swap optimization if we've already performed register
 894      allocation and the hard register source is not in the altivec range.  */
 895   if ((MEM_ALIGN (mem) >= 128)
 896       && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1]))
 897           || ALTIVEC_REGNO_P (reg_or_subregno (operands[1]))))
 898     {
 899       rtx mem_address = XEXP (mem, 0);
 900       enum machine_mode mode = GET_MODE (mem);
 901       if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address))
 902         {
 903           rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]);
 904           emit_insn (stvx_set_expr);
 905           DONE;
 906         }
 907       else if (rs6000_quadword_masked_address_p (mem_address))
 908         {
 909           /* This rtl is already in the form that matches stvx instruction,
 910              so leave it alone.  */
 911           DONE;
 912         }
 913       /* Otherwise, fall through to transform into a swapping store.  */
 914     }
 915
 916   operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
 917                                        : operands[1];
 918 })
 919
 920 ;; The post-reload split requires that we re-permute the source
 921 ;; register in case it is still live.
 922 (define_split
 923   [(set (match_operand:V16QI 0 "indexed_or_indirect_operand")
 924         (match_operand:V16QI 1 "vsx_register_operand"))]
 925   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed"
 926   [(set (match_dup 1)
 927         (vec_select:V16QI
 928           (match_dup 1)
 929           (parallel [(const_int 8) (const_int 9)
 930                      (const_int 10) (const_int 11)
 931                      (const_int 12) (const_int 13)
 932                      (const_int 14) (const_int 15)
 933                      (const_int 0) (const_int 1)
 934                      (const_int 2) (const_int 3)
 935                      (const_int 4) (const_int 5)
 936                      (const_int 6) (const_int 7)])))
 937    (set (match_dup 0)
 938         (vec_select:V16QI
 939           (match_dup 1)
 940           (parallel [(const_int 8) (const_int 9)
 941                      (const_int 10) (const_int 11)
 942                      (const_int 12) (const_int 13)
 943                      (const_int 14) (const_int 15)
 944                      (const_int 0) (const_int 1)
 945                      (const_int 2) (const_int 3)
 946                      (const_int 4) (const_int 5)
 947                      (const_int 6) (const_int 7)])))
 948    (set (match_dup 1)
 949         (vec_select:V16QI
 950           (match_dup 1)
 951           (parallel [(const_int 8) (const_int 9)
 952                      (const_int 10) (const_int 11)
 953                      (const_int 12) (const_int 13)
 954                      (const_int 14) (const_int 15)
 955                      (const_int 0) (const_int 1)
 956                      (const_int 2) (const_int 3)
 957                      (const_int 4) (const_int 5)
 958                      (const_int 6) (const_int 7)])))]
 959   "")
 960
 961 ;; Little endian word swapping for 128-bit types that are either scalars or the
 962 ;; special V1TI container class, which it is not appropriate to use vec_select
 963 ;; for the type.
 964 (define_insn "*vsx_le_permute_<mode>"
 965   [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
 966         (rotate:VEC_TI
 967          (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
 968          (const_int 64)))]
 969   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
 970   "@
 971    xxpermdi %x0,%x1,%x1,2
 972    lxvd2x %x0,%y1
 973    stxvd2x %x1,%y0
 974    mr %0,%L1\;mr %L0,%1
 975    ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1
 976    std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0"
 977   [(set_attr "length" "*,*,*,8,8,8")
 978    (set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
 979
 980 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
 981   [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa")
 982         (rotate:VEC_TI
 983          (rotate:VEC_TI
 984           (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa")
 985           (const_int 64))
 986          (const_int 64)))]
 987   "!BYTES_BIG_ENDIAN && TARGET_VSX"
 988   "@
 989    #
 990    xxlor %x0,%x1"
 991   "&& 1"
 992   [(set (match_dup 0) (match_dup 1))]
 993 {
 994   if (reload_completed && REGNO (operands[0]) == REGNO (operands[1]))
 995     {
 996       emit_note (NOTE_INSN_DELETED);
 997       DONE;
 998     }
 999 }
1000   [(set_attr "length" "0,4")
1001    (set_attr "type" "veclogical")])
1002
1003 (define_insn_and_split "*vsx_le_perm_load_<mode>"
1004   [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r")
1005         (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))]
1006   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1007    && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
1008   "@
1009    #
1010    #"
1011   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1012    && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)"
1013   [(const_int 0)]
1014 {
1015   rtx tmp = (can_create_pseudo_p ()
1016              ? gen_reg_rtx_and_attrs (operands[0])
1017              : operands[0]);
1018   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1019   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1020   DONE;
1021 }
1022   [(set_attr "type" "vecload,load")
1023    (set_attr "length" "8,8")
1024    (set_attr "isa" "<VSisa>,*")])
1025
1026 (define_insn "*vsx_le_perm_store_<mode>"
1027   [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q")
1028         (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))]
1029   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1030    && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1031   "@
1032    #
1033    #"
1034   [(set_attr "type" "vecstore,store")
1035    (set_attr "length" "12,8")
1036    (set_attr "isa" "<VSisa>,*")])
1037
1038 (define_split
1039   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1040         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1041   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR
1042    && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1043   [(const_int 0)]
1044 {
1045   rtx tmp = (can_create_pseudo_p ()
1046              ? gen_reg_rtx_and_attrs (operands[0])
1047              : operands[0]);
1048   rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
1049   rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
1050   DONE;
1051 })
1052
1053 ;; Peepholes to catch loads and stores for TImode if TImode landed in
1054 ;; GPR registers on a little endian system.
1055 (define_peephole2
1056   [(set (match_operand:VEC_TI 0 "int_reg_operand")
1057         (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand")
1058                        (const_int 64)))
1059    (set (match_operand:VEC_TI 2 "int_reg_operand")
1060         (rotate:VEC_TI (match_dup 0)
1061                        (const_int 64)))]
1062   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1063    && (rtx_equal_p (operands[0], operands[2])
1064        || peep2_reg_dead_p (2, operands[0]))"
1065    [(set (match_dup 2) (match_dup 1))])
1066
1067 (define_peephole2
1068   [(set (match_operand:VEC_TI 0 "int_reg_operand")
1069         (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand")
1070                        (const_int 64)))
1071    (set (match_operand:VEC_TI 2 "memory_operand")
1072         (rotate:VEC_TI (match_dup 0)
1073                        (const_int 64)))]
1074   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1075    && peep2_reg_dead_p (2, operands[0])"
1076    [(set (match_dup 2) (match_dup 1))])
1077
1078 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
1079 ;; VSX registers on a little endian system.  The vector types and IEEE 128-bit
1080 ;; floating point are handled by the more generic swap elimination pass.
1081 (define_peephole2
1082   [(set (match_operand:TI 0 "vsx_register_operand")
1083         (rotate:TI (match_operand:TI 1 "vsx_register_operand")
1084                    (const_int 64)))
1085    (set (match_operand:TI 2 "vsx_register_operand")
1086         (rotate:TI (match_dup 0)
1087                    (const_int 64)))]
1088   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
1089    && (rtx_equal_p (operands[0], operands[2])
1090        || peep2_reg_dead_p (2, operands[0]))"
1091    [(set (match_dup 2) (match_dup 1))])
1092
1093 ;; The post-reload split requires that we re-permute the source
1094 ;; register in case it is still live.
1095 (define_split
1096   [(set (match_operand:VSX_LE_128 0 "memory_operand")
1097         (match_operand:VSX_LE_128 1 "vsx_register_operand"))]
1098   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR
1099    && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)"
1100   [(const_int 0)]
1101 {
1102   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1103   rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
1104   rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
1105   DONE;
1106 })
1107
1108 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
1109 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
1110 (define_insn "xxspltib_v16qi"
1111   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1112         (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))]
1113   "TARGET_P9_VECTOR"
1114 {
1115   operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
1116   return "xxspltib %x0,%2";
1117 }
1118   [(set_attr "type" "vecperm")])
1119
1120 (define_insn "xxspltib_<mode>_nosplit"
1121   [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa")
1122         (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))]
1123   "TARGET_P9_VECTOR"
1124 {
1125   rtx op1 = operands[1];
1126   int value = 256;
1127   int num_insns = -1;
1128
1129   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1130       || num_insns != 1)
1131     gcc_unreachable ();
1132
1133   operands[2] = GEN_INT (value & 0xff);
1134   return "xxspltib %x0,%2";
1135 }
1136   [(set_attr "type" "vecperm")])
1137
1138 (define_insn_and_split "*xxspltib_<mode>_split"
1139   [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
1140         (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
1141   "TARGET_P9_VECTOR"
1142   "#"
1143   "&& 1"
1144   [(const_int 0)]
1145 {
1146   int value = 256;
1147   int num_insns = -1;
1148   rtx op0 = operands[0];
1149   rtx op1 = operands[1];
1150   rtx tmp = ((can_create_pseudo_p ())
1151              ? gen_reg_rtx (V16QImode)
1152              : gen_lowpart (V16QImode, op0));
1153
1154   if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
1155       || num_insns != 2)
1156     gcc_unreachable ();
1157
1158   emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
1159
1160   if (<MODE>mode == V2DImode)
1161     emit_insn (gen_vsx_sign_extend_v16qi_v2di (op0, tmp));
1162
1163   else if (<MODE>mode == V4SImode)
1164     emit_insn (gen_vsx_sign_extend_v16qi_v4si (op0, tmp));
1165
1166   else if (<MODE>mode == V8HImode)
1167     emit_insn (gen_altivec_vupkhsb  (op0, tmp));
1168
1169   else
1170     gcc_unreachable ();
1171
1172   DONE;
1173 }
1174   [(set_attr "type" "vecperm")
1175    (set_attr "length" "8")])
1176
1177
1178 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
1179 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
1180 ;; all 1's, since the machine does not have to wait for the previous
1181 ;; instruction using the register being set (such as a store waiting on a slow
1182 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
1183
1184 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
1185 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
1186 ;;              LXVKQ      XXSPLTI*
1187 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
1188 (define_insn "vsx_mov<mode>_64bit"
1189   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1190                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
1191                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
1192                 wa,        wa,
1193                 ?wa,       v,         <??r>,     wZ,        v")
1194
1195         (match_operand:VSX_M 1 "input_operand"
1196                "wa,        ZwO,       wa,        we,        r,         r,
1197                 wQ,        Y,         r,         r,         wE,        jwM,
1198                 eQ,        eP,
1199                 ?jwM,      W,         <nW>,      v,         wZ"))]
1200
1201   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1202    && (register_operand (operands[0], <MODE>mode)
1203        || register_operand (operands[1], <MODE>mode))"
1204 {
1205   return rs6000_output_move_128bit (operands);
1206 }
1207   [(set_attr "type"
1208                "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
1209                 store,     load,      store,     *,         vecsimple, vecsimple,
1210                 vecperm,   vecperm,
1211                 vecsimple, *,         *,         vecstore,  vecload")
1212    (set_attr "num_insns"
1213                "*,         *,         *,         2,         *,         2,
1214                 2,         2,         2,         2,         *,         *,
1215                 *,         *,
1216                 *,         5,         2,         *,         *")
1217    (set_attr "max_prefixed_insns"
1218                "*,         *,         *,         *,         *,         2,
1219                 2,         2,         2,         2,         *,         *,
1220                 *,         *,
1221                 *,         *,         *,         *,         *")
1222    (set_attr "length"
1223                "*,         *,         *,         8,         *,         8,
1224                 8,         8,         8,         8,         *,         *,
1225                 *,         *,
1226                 *,         20,        8,         *,         *")
1227    (set_attr "isa"
1228                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1229                 *,         *,         *,         *,         p9v,       *,
1230                 p10,       p10,
1231                 <VSisa>,   *,         *,         *,         *")
1232    (set_attr "prefixed"
1233                "*,         *,         *,         *,         *,         *,
1234                 *,         *,         *,         *,         *,         *,
1235                 *,         yes,
1236                 *,         *,         *,         *,         *")])
1237
1238 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
1239 ;;              LXVKQ      XXSPLTI*
1240 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
1241 ;;              LVX (VMX)  STVX (VMX)
1242 (define_insn "*vsx_mov<mode>_32bit"
1243   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
1244                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
1245                 wa,        wa,
1246                 wa,        v,         ?wa,       v,         <??r>,
1247                 wZ,        v")
1248
1249         (match_operand:VSX_M 1 "input_operand"
1250                "wa,        ZwO,       wa,        Y,         r,         r,
1251                 eQ,        eP,
1252                 wE,        jwM,       ?jwM,      W,         <nW>,
1253                 v,         wZ"))]
1254
1255   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
1256    && (register_operand (operands[0], <MODE>mode)
1257        || register_operand (operands[1], <MODE>mode))"
1258 {
1259   return rs6000_output_move_128bit (operands);
1260 }
1261   [(set_attr "type"
1262                "vecstore,  vecload,   vecsimple, load,      store,    *,
1263                 vecperm,   vecperm,
1264                 vecsimple, vecsimple, vecsimple, *,         *,
1265                 vecstore,  vecload")
1266    (set_attr "length"
1267                "*,         *,         *,         16,        16,        16,
1268                 *,         *,
1269                 *,         *,         *,         20,        16,
1270                 *,         *")
1271    (set_attr "isa"
1272                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
1273                 p10,       p10,
1274                 p9v,       *,         <VSisa>,   *,         *,
1275                 *,         *")
1276    (set_attr "prefixed"
1277                "*,         *,         *,         *,         *,         *,
1278                 *,         yes,
1279                 *,         *,         *,         *,         *,
1280                 *,         *")])
1281
1282 ;; Explicit  load/store expanders for the builtin functions
1283 (define_expand "vsx_load_<mode>"
1284   [(set (match_operand:VSX_M 0 "vsx_register_operand")
1285         (match_operand:VSX_M 1 "memory_operand"))]
1286   "VECTOR_MEM_VSX_P (<MODE>mode)"
1287 {
1288   /* Expand to swaps if needed, prior to swap optimization.  */
1289   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1290       && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode))
1291     {
1292       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1293       DONE;
1294     }
1295 })
1296
1297 (define_expand "vsx_store_<mode>"
1298   [(set (match_operand:VSX_M 0 "memory_operand")
1299         (match_operand:VSX_M 1 "vsx_register_operand"))]
1300   "VECTOR_MEM_VSX_P (<MODE>mode)"
1301 {
1302   /* Expand to swaps if needed, prior to swap optimization.  */
1303   if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR
1304       && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode))
1305     {
1306       rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
1307       DONE;
1308     }
1309 })
1310
1311 ;; Load rightmost element from load_data
1312 ;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
1313 (define_insn "vsx_lxvr<wd>x"
1314   [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
1315         (zero_extend:TI (match_operand:INT_ISA3  1 "memory_operand" "Z")))]
1316   "TARGET_POWER10"
1317   "lxvr<wd>x %x0,%y1"
1318   [(set_attr "type" "vecload")])
1319
1320 ;; Store rightmost element into store_data
1321 ;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
1322 (define_insn "vsx_stxvr<wd>x"
1323   [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
1324         (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
1325   "TARGET_POWER10"
1326   "stxvr<wd>x %x1,%y0"
1327   [(set_attr "type" "vecstore")])
1328
1329 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
1330 ;; when you really want their element-reversing behavior.
1331 (define_insn "vsx_ld_elemrev_v2di"
1332   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1333         (vec_select:V2DI
1334           (match_operand:V2DI 1 "memory_operand" "Z")
1335           (parallel [(const_int 1) (const_int 0)])))]
1336   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1337   "lxvd2x %x0,%y1"
1338   [(set_attr "type" "vecload")])
1339
1340 (define_insn "vsx_ld_elemrev_v1ti"
1341   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
1342         (vec_select:V1TI
1343           (match_operand:V1TI 1 "memory_operand" "Z")
1344           (parallel [(const_int 0)])))]
1345   "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN"
1346 {
1347    return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2";
1348 }
1349   [(set_attr "type" "vecload")])
1350
1351 (define_insn "vsx_ld_elemrev_v2df"
1352   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
1353         (vec_select:V2DF
1354           (match_operand:V2DF 1 "memory_operand" "Z")
1355           (parallel [(const_int 1) (const_int 0)])))]
1356   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1357   "lxvd2x %x0,%y1"
1358   [(set_attr "type" "vecload")])
1359
1360 (define_insn "vsx_ld_elemrev_v4si"
1361   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
1362         (vec_select:V4SI
1363           (match_operand:V4SI 1 "memory_operand" "Z")
1364           (parallel [(const_int 3) (const_int 2)
1365                      (const_int 1) (const_int 0)])))]
1366   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1367   "lxvw4x %x0,%y1"
1368   [(set_attr "type" "vecload")])
1369
1370 (define_insn "vsx_ld_elemrev_v4sf"
1371   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
1372         (vec_select:V4SF
1373           (match_operand:V4SF 1 "memory_operand" "Z")
1374           (parallel [(const_int 3) (const_int 2)
1375                      (const_int 1) (const_int 0)])))]
1376   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1377   "lxvw4x %x0,%y1"
1378   [(set_attr "type" "vecload")])
1379
1380 (define_expand "vsx_ld_elemrev_v8hi"
1381   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1382         (vec_select:V8HI
1383           (match_operand:V8HI 1 "memory_operand" "Z")
1384           (parallel [(const_int 7) (const_int 6)
1385                      (const_int 5) (const_int 4)
1386                      (const_int 3) (const_int 2)
1387                      (const_int 1) (const_int 0)])))]
1388   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1389 {
1390   if (!TARGET_P9_VECTOR)
1391     {
1392       rtx tmp = gen_reg_rtx (V4SImode);
1393       rtx subreg, subreg2, perm[16], pcv;
1394       /* 2 is leftmost element in register */
1395       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1396       int i;
1397
1398       subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0);
1399       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1400       subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0);
1401
1402       for (i = 0; i < 16; ++i)
1403         perm[i] = GEN_INT (reorder[i]);
1404
1405       pcv = force_reg (V16QImode,
1406                        gen_rtx_CONST_VECTOR (V16QImode,
1407                                              gen_rtvec_v (16, perm)));
1408       emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2,
1409                                                 subreg2, pcv));
1410       DONE;
1411     }
1412 })
1413
1414 (define_insn "*vsx_ld_elemrev_v8hi_internal"
1415   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
1416         (vec_select:V8HI
1417           (match_operand:V8HI 1 "memory_operand" "Z")
1418           (parallel [(const_int 7) (const_int 6)
1419                      (const_int 5) (const_int 4)
1420                      (const_int 3) (const_int 2)
1421                      (const_int 1) (const_int 0)])))]
1422   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1423   "lxvh8x %x0,%y1"
1424   [(set_attr "type" "vecload")])
1425
1426 (define_expand "vsx_ld_elemrev_v16qi"
1427   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1428         (vec_select:V16QI
1429           (match_operand:V16QI 1 "memory_operand" "Z")
1430           (parallel [(const_int 15) (const_int 14)
1431                      (const_int 13) (const_int 12)
1432                      (const_int 11) (const_int 10)
1433                      (const_int  9) (const_int  8)
1434                      (const_int  7) (const_int  6)
1435                      (const_int  5) (const_int  4)
1436                      (const_int  3) (const_int  2)
1437                      (const_int  1) (const_int  0)])))]
1438   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1439 {
1440   if (!TARGET_P9_VECTOR)
1441     {
1442       rtx tmp = gen_reg_rtx (V4SImode);
1443       rtx subreg, subreg2, perm[16], pcv;
1444       /* 3 is leftmost element in register */
1445       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1446       int i;
1447
1448       subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0);
1449       emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg));
1450       subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0);
1451
1452       for (i = 0; i < 16; ++i)
1453         perm[i] = GEN_INT (reorder[i]);
1454
1455       pcv = force_reg (V16QImode,
1456                        gen_rtx_CONST_VECTOR (V16QImode,
1457                                              gen_rtvec_v (16, perm)));
1458       emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2,
1459                                                  subreg2, pcv));
1460       DONE;
1461     }
1462 })
1463
1464 (define_insn "vsx_ld_elemrev_v16qi_internal"
1465   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
1466         (vec_select:V16QI
1467           (match_operand:V16QI 1 "memory_operand" "Z")
1468           (parallel [(const_int 15) (const_int 14)
1469                      (const_int 13) (const_int 12)
1470                      (const_int 11) (const_int 10)
1471                      (const_int  9) (const_int  8)
1472                      (const_int  7) (const_int  6)
1473                      (const_int  5) (const_int  4)
1474                      (const_int  3) (const_int  2)
1475                      (const_int  1) (const_int  0)])))]
1476   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1477   "lxvb16x %x0,%y1"
1478   [(set_attr "type" "vecload")])
1479
1480 (define_insn "vsx_st_elemrev_v1ti"
1481   [(set (match_operand:V1TI 0 "memory_operand" "=Z")
1482         (vec_select:V1TI
1483           (match_operand:V1TI 1 "vsx_register_operand" "+wa")
1484           (parallel [(const_int 0)])))
1485    (clobber (match_dup 1))]
1486   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1487 {
1488   return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0";
1489 }
1490   [(set_attr "type" "vecstore")])
1491
1492 (define_insn "vsx_st_elemrev_v2df"
1493   [(set (match_operand:V2DF 0 "memory_operand" "=Z")
1494         (vec_select:V2DF
1495           (match_operand:V2DF 1 "vsx_register_operand" "wa")
1496           (parallel [(const_int 1) (const_int 0)])))]
1497   "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
1498   "stxvd2x %x1,%y0"
1499   [(set_attr "type" "vecstore")])
1500
1501 (define_insn "vsx_st_elemrev_v2di"
1502   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1503         (vec_select:V2DI
1504           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1505           (parallel [(const_int 1) (const_int 0)])))]
1506   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN"
1507   "stxvd2x %x1,%y0"
1508   [(set_attr "type" "vecstore")])
1509
1510 (define_insn "vsx_st_elemrev_v4sf"
1511   [(set (match_operand:V4SF 0 "memory_operand" "=Z")
1512         (vec_select:V4SF
1513           (match_operand:V4SF 1 "vsx_register_operand" "wa")
1514           (parallel [(const_int 3) (const_int 2)
1515                      (const_int 1) (const_int 0)])))]
1516   "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
1517   "stxvw4x %x1,%y0"
1518   [(set_attr "type" "vecstore")])
1519
1520 (define_insn "vsx_st_elemrev_v4si"
1521   [(set (match_operand:V4SI 0 "memory_operand" "=Z")
1522         (vec_select:V4SI
1523           (match_operand:V4SI 1 "vsx_register_operand" "wa")
1524           (parallel [(const_int 3) (const_int 2)
1525                      (const_int 1) (const_int 0)])))]
1526   "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN"
1527   "stxvw4x %x1,%y0"
1528   [(set_attr "type" "vecstore")])
1529
1530 (define_expand "vsx_st_elemrev_v8hi"
1531   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1532         (vec_select:V8HI
1533           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1534           (parallel [(const_int 7) (const_int 6)
1535                      (const_int 5) (const_int 4)
1536                      (const_int 3) (const_int 2)
1537                      (const_int 1) (const_int 0)])))]
1538   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN"
1539 {
1540   if (!TARGET_P9_VECTOR)
1541     {
1542       rtx mem_subreg, subreg, perm[16], pcv;
1543       rtx tmp = gen_reg_rtx (V8HImode);
1544       /* 2 is leftmost element in register */
1545       unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2};
1546       int i;
1547
1548       for (i = 0; i < 16; ++i)
1549         perm[i] = GEN_INT (reorder[i]);
1550
1551       pcv = force_reg (V16QImode,
1552                        gen_rtx_CONST_VECTOR (V16QImode,
1553                                              gen_rtvec_v (16, perm)));
1554       emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1],
1555                                                 operands[1], pcv));
1556       subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0);
1557       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0);
1558       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1559       DONE;
1560     }
1561 })
1562
1563 (define_insn "*vsx_st_elemrev_v2di_internal"
1564   [(set (match_operand:V2DI 0 "memory_operand" "=Z")
1565         (vec_select:V2DI
1566           (match_operand:V2DI 1 "vsx_register_operand" "wa")
1567           (parallel [(const_int 1) (const_int 0)])))]
1568   "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1569   "stxvd2x %x1,%y0"
1570   [(set_attr "type" "vecstore")])
1571
1572 (define_insn "*vsx_st_elemrev_v8hi_internal"
1573   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
1574         (vec_select:V8HI
1575           (match_operand:V8HI 1 "vsx_register_operand" "wa")
1576           (parallel [(const_int 7) (const_int 6)
1577                      (const_int 5) (const_int 4)
1578                      (const_int 3) (const_int 2)
1579                      (const_int 1) (const_int 0)])))]
1580   "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1581   "stxvh8x %x1,%y0"
1582   [(set_attr "type" "vecstore")])
1583
1584 (define_expand "vsx_st_elemrev_v16qi"
1585   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1586         (vec_select:V16QI
1587           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1588           (parallel [(const_int 15) (const_int 14)
1589                      (const_int 13) (const_int 12)
1590                      (const_int 11) (const_int 10)
1591                      (const_int  9) (const_int  8)
1592                      (const_int  7) (const_int  6)
1593                      (const_int  5) (const_int  4)
1594                      (const_int  3) (const_int  2)
1595                      (const_int  1) (const_int  0)])))]
1596   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN"
1597 {
1598   if (!TARGET_P9_VECTOR)
1599     {
1600       rtx mem_subreg, subreg, perm[16], pcv;
1601       rtx tmp = gen_reg_rtx (V16QImode);
1602       /* 3 is leftmost element in register */
1603       unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
1604       int i;
1605
1606       for (i = 0; i < 16; ++i)
1607         perm[i] = GEN_INT (reorder[i]);
1608
1609       pcv = force_reg (V16QImode,
1610                        gen_rtx_CONST_VECTOR (V16QImode,
1611                                              gen_rtvec_v (16, perm)));
1612       emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1],
1613                                                  operands[1], pcv));
1614       subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0);
1615       mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0);
1616       emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg));
1617       DONE;
1618     }
1619 })
1620
1621 (define_insn "*vsx_st_elemrev_v16qi_internal"
1622   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
1623         (vec_select:V16QI
1624           (match_operand:V16QI 1 "vsx_register_operand" "wa")
1625           (parallel [(const_int 15) (const_int 14)
1626                      (const_int 13) (const_int 12)
1627                      (const_int 11) (const_int 10)
1628                      (const_int  9) (const_int  8)
1629                      (const_int  7) (const_int  6)
1630                      (const_int  5) (const_int  4)
1631                      (const_int  3) (const_int  2)
1632                      (const_int  1) (const_int  0)])))]
1633   "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR"
1634   "stxvb16x %x1,%y0"
1635   [(set_attr "type" "vecstore")])
1636
1637 \f
1638 ;; VSX vector floating point arithmetic instructions.  The VSX scalar
1639 ;; instructions are now combined with the insn for the traditional floating
1640 ;; point unit.
1641 (define_insn "*vsx_add<mode>3"
1642   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1643         (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1644                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1645   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1646   "xvadd<sd>p %x0,%x1,%x2"
1647   [(set_attr "type" "<VStype_simple>")])
1648
1649 (define_insn "*vsx_sub<mode>3"
1650   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>")
1651         (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1652                      (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1653   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1654   "xvsub<sd>p %x0,%x1,%x2"
1655   [(set_attr "type" "<VStype_simple>")])
1656
1657 (define_insn "*vsx_mul<mode>3"
1658   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1659         (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1660                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1661   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1662   "xvmul<sd>p %x0,%x1,%x2"
1663   [(set_attr "type" "<VStype_simple>")])
1664
1665 ; Emulate vector with scalar for vec_mul in V2DImode
1666 (define_insn_and_split "vsx_mul_v2di"
1667   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1668         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1669                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1670                      UNSPEC_VSX_MULSD))]
1671   "VECTOR_MEM_VSX_P (V2DImode)"
1672   "#"
1673   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1674   [(const_int 0)]
1675 {
1676   rtx op0 = operands[0];
1677   rtx op1 = operands[1];
1678   rtx op2 = operands[2];
1679
1680   if (TARGET_POWER10)
1681     emit_insn (gen_mulv2di3 (op0, op1, op2) );
1682
1683   else
1684     {
1685       rtx op3 = gen_reg_rtx (DImode);
1686       rtx op4 = gen_reg_rtx (DImode);
1687       rtx op5 = gen_reg_rtx (DImode);
1688       emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1689       emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1690       if (TARGET_POWERPC64)
1691         emit_insn (gen_muldi3 (op5, op3, op4));
1692       else
1693         {
1694           rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1695           emit_move_insn (op5, ret);
1696         }
1697       emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1698       emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1699       if (TARGET_POWERPC64)
1700         emit_insn (gen_muldi3 (op3, op3, op4));
1701       else
1702         {
1703           rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false);
1704           emit_move_insn (op3, ret);
1705         }
1706       emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1707     }
1708   DONE;
1709 }
1710   [(set_attr "type" "mul")])
1711
1712 (define_insn "*vsx_div<mode>3"
1713   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1714         (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1715                    (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1716   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1717   "xvdiv<sd>p %x0,%x1,%x2"
1718   [(set_attr "type" "<VStype_div>")])
1719
1720 ; Emulate vector with scalar for vec_div in V2DImode
1721 (define_insn_and_split "vsx_div_v2di"
1722   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1723         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1724                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1725                      UNSPEC_VSX_DIVSD))]
1726   "VECTOR_MEM_VSX_P (V2DImode)"
1727   "#"
1728   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1729   [(const_int 0)]
1730 {
1731   rtx op0 = operands[0];
1732   rtx op1 = operands[1];
1733   rtx op2 = operands[2];
1734   rtx op3 = gen_reg_rtx (DImode);
1735   rtx op4 = gen_reg_rtx (DImode);
1736   rtx op5 = gen_reg_rtx (DImode);
1737   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1738   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1739   if (TARGET_POWERPC64)
1740     emit_insn (gen_divdi3 (op5, op3, op4));
1741   else
1742     {
1743       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1744       rtx target = emit_library_call_value (libfunc,
1745                                             op5, LCT_NORMAL, DImode,
1746                                             op3, DImode,
1747                                             op4, DImode);
1748       emit_move_insn (op5, target);
1749     }
1750   emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1751   emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1752   if (TARGET_POWERPC64)
1753     emit_insn (gen_divdi3 (op3, op3, op4));
1754   else
1755     {
1756       rtx libfunc = optab_libfunc (sdiv_optab, DImode);
1757       rtx target = emit_library_call_value (libfunc,
1758                                             op3, LCT_NORMAL, DImode,
1759                                             op3, DImode,
1760                                             op4, DImode);
1761       emit_move_insn (op3, target);
1762     }
1763   emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1764   DONE;
1765 }
1766   [(set_attr "type" "div")])
1767
1768 (define_insn_and_split "vsx_udiv_v2di"
1769   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
1770         (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa")
1771                       (match_operand:V2DI 2 "vsx_register_operand" "wa")]
1772                      UNSPEC_VSX_DIVUD))]
1773   "VECTOR_MEM_VSX_P (V2DImode)"
1774   "#"
1775   "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
1776   [(const_int 0)]
1777 {
1778   rtx op0 = operands[0];
1779   rtx op1 = operands[1];
1780   rtx op2 = operands[2];
1781
1782     if (TARGET_POWER10)
1783       emit_insn (gen_udivv2di3 (op0, op1, op2) );
1784     else
1785       {
1786         rtx op3 = gen_reg_rtx (DImode);
1787         rtx op4 = gen_reg_rtx (DImode);
1788         rtx op5 = gen_reg_rtx (DImode);
1789
1790         emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0)));
1791         emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0)));
1792
1793         if (TARGET_POWERPC64)
1794           emit_insn (gen_udivdi3 (op5, op3, op4));
1795         else
1796           {
1797             rtx libfunc = optab_libfunc (udiv_optab, DImode);
1798             rtx target = emit_library_call_value (libfunc,
1799                                                   op5, LCT_NORMAL, DImode,
1800                                                   op3, DImode,
1801                                                   op4, DImode);
1802             emit_move_insn (op5, target);
1803           }
1804         emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1)));
1805         emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1)));
1806
1807         if (TARGET_POWERPC64)
1808           emit_insn (gen_udivdi3 (op3, op3, op4));
1809         else
1810           {
1811             rtx libfunc = optab_libfunc (udiv_optab, DImode);
1812             rtx target = emit_library_call_value (libfunc,
1813                                                   op3, LCT_NORMAL, DImode,
1814                                                   op3, DImode,
1815                                                   op4, DImode);
1816             emit_move_insn (op3, target);
1817           }
1818         emit_insn (gen_vsx_concat_v2di (op0, op5, op3));
1819       }
1820     DONE;
1821 }
1822   [(set_attr "type" "div")])
1823
1824 ;; Vector integer signed/unsigned divide
1825 (define_insn "vsx_div_v1ti"
1826   [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1827         (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1828                       (match_operand:V1TI 2 "vsx_register_operand" "v")]
1829                      UNSPEC_VSX_DIVSQ))]
1830   "TARGET_POWER10"
1831   "vdivsq %0,%1,%2"
1832   [(set_attr "type" "div")])
1833
1834 (define_insn "vsx_udiv_v1ti"
1835   [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1836         (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1837                       (match_operand:V1TI 2 "vsx_register_operand" "v")]
1838                      UNSPEC_VSX_DIVUQ))]
1839   "TARGET_POWER10"
1840   "vdivuq %0,%1,%2"
1841   [(set_attr "type" "div")])
1842
1843 (define_insn "vsx_dives_v1ti"
1844   [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1845         (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1846                       (match_operand:V1TI 2 "vsx_register_operand" "v")]
1847                      UNSPEC_VSX_DIVESQ))]
1848   "TARGET_POWER10"
1849   "vdivesq %0,%1,%2"
1850   [(set_attr "type" "div")])
1851
1852 (define_insn "vsx_diveu_v1ti"
1853   [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1854         (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1855                       (match_operand:V1TI 2 "vsx_register_operand" "v")]
1856                      UNSPEC_VSX_DIVEUQ))]
1857   "TARGET_POWER10"
1858   "vdiveuq %0,%1,%2"
1859   [(set_attr "type" "div")])
1860
1861 (define_insn "vsx_mods_v1ti"
1862   [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1863         (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1864                       (match_operand:V1TI 2 "vsx_register_operand" "v")]
1865                      UNSPEC_VSX_MODSQ))]
1866   "TARGET_POWER10"
1867   "vmodsq %0,%1,%2"
1868   [(set_attr "type" "div")])
1869
1870 (define_insn "vsx_modu_v1ti"
1871   [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
1872         (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v")
1873                       (match_operand:V1TI 2 "vsx_register_operand" "v")]
1874                      UNSPEC_VSX_MODUQ))]
1875   "TARGET_POWER10"
1876   "vmoduq %0,%1,%2"
1877   [(set_attr "type" "div")])
1878
1879 ;; *tdiv* instruction returning the FG flag
1880 (define_expand "vsx_tdiv<mode>3_fg"
1881   [(set (match_dup 3)
1882         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1883                       (match_operand:VSX_B 2 "vsx_register_operand")]
1884                      UNSPEC_VSX_TDIV))
1885    (set (match_operand:SI 0 "gpc_reg_operand")
1886         (gt:SI (match_dup 3)
1887                (const_int 0)))]
1888   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1889 {
1890   operands[3] = gen_reg_rtx (CCFPmode);
1891 })
1892
1893 ;; *tdiv* instruction returning the FE flag
1894 (define_expand "vsx_tdiv<mode>3_fe"
1895   [(set (match_dup 3)
1896         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")
1897                       (match_operand:VSX_B 2 "vsx_register_operand")]
1898                      UNSPEC_VSX_TDIV))
1899    (set (match_operand:SI 0 "gpc_reg_operand")
1900         (eq:SI (match_dup 3)
1901                (const_int 0)))]
1902   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1903 {
1904   operands[3] = gen_reg_rtx (CCFPmode);
1905 })
1906
1907 (define_insn "*vsx_tdiv<mode>3_internal"
1908   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
1909         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")
1910                       (match_operand:VSX_B 2 "vsx_register_operand" "wa")]
1911                    UNSPEC_VSX_TDIV))]
1912   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1913   "x<VSv>tdiv<sd>p %0,%x1,%x2"
1914   [(set_attr "type" "<VStype_simple>")])
1915
1916 (define_insn "vsx_fre<mode>2"
1917   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1918         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1919                       UNSPEC_FRES))]
1920   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1921   "xvre<sd>p %x0,%x1"
1922   [(set_attr "type" "<VStype_simple>")])
1923
1924 (define_insn "*vsx_neg<mode>2"
1925   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1926         (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1927   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1928   "xvneg<sd>p %x0,%x1"
1929   [(set_attr "type" "<VStype_simple>")])
1930
1931 (define_insn "*vsx_abs<mode>2"
1932   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1933         (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1934   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1935   "xvabs<sd>p %x0,%x1"
1936   [(set_attr "type" "<VStype_simple>")])
1937
1938 (define_insn "vsx_nabs<mode>2"
1939   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1940         (neg:VSX_F
1941          (abs:VSX_F
1942           (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))]
1943   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1944   "xvnabs<sd>p %x0,%x1"
1945   [(set_attr "type" "<VStype_simple>")])
1946
1947 (define_insn "vsx_smax<mode>3"
1948   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1949         (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1950                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1951   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1952   "xvmax<sd>p %x0,%x1,%x2"
1953   [(set_attr "type" "<VStype_simple>")])
1954
1955 (define_insn "*vsx_smin<mode>3"
1956   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1957         (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
1958                     (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
1959   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1960   "xvmin<sd>p %x0,%x1,%x2"
1961   [(set_attr "type" "<VStype_simple>")])
1962
1963 (define_insn "*vsx_sqrt<mode>2"
1964   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1965         (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
1966   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1967   "xvsqrt<sd>p %x0,%x1"
1968   [(set_attr "type" "<sd>sqrt")])
1969
1970 (define_insn "*vsx_rsqrte<mode>2"
1971   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
1972         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
1973                       UNSPEC_RSQRT))]
1974   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1975   "xvrsqrte<sd>p %x0,%x1"
1976   [(set_attr "type" "<VStype_simple>")])
1977
1978 ;; *tsqrt* returning the fg flag
1979 (define_expand "vsx_tsqrt<mode>2_fg"
1980   [(set (match_dup 2)
1981         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1982                      UNSPEC_VSX_TSQRT))
1983    (set (match_operand:SI 0 "gpc_reg_operand")
1984         (gt:SI (match_dup 2)
1985                (const_int 0)))]
1986   "VECTOR_UNIT_VSX_P (<MODE>mode)"
1987 {
1988   operands[2] = gen_reg_rtx (CCFPmode);
1989 })
1990
1991 ;; *tsqrt* returning the fe flag
1992 (define_expand "vsx_tsqrt<mode>2_fe"
1993   [(set (match_dup 2)
1994         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")]
1995                      UNSPEC_VSX_TSQRT))
1996    (set (match_operand:SI 0 "gpc_reg_operand")
1997         (eq:SI (match_dup 2)
1998                (const_int 0)))]
1999   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2000 {
2001   operands[2] = gen_reg_rtx (CCFPmode);
2002 })
2003
2004 (define_insn "*vsx_tsqrt<mode>2_internal"
2005   [(set (match_operand:CCFP 0 "cc_reg_operand" "=x")
2006         (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2007                      UNSPEC_VSX_TSQRT))]
2008   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2009   "x<VSv>tsqrt<sd>p %0,%x1"
2010   [(set_attr "type" "<VStype_simple>")])
2011
2012 ;; Fused vector multiply/add instructions. Do not generate the Altivec versions
2013 ;; of fma (vmaddfp and vnmsubfp).  These instructions allows the target to be a
2014 ;; separate register from the 3 inputs, which can possibly save an extra move
2015 ;; being generated (assuming all registers are AltiVec registers).  However,
2016 ;; vmaddfp and vnmsubfp can have different behaviors than the VSX instructions
2017 ;; in some corner cases due to VSCR[NJ] being set or if the addend is +0.0
2018 ;; instead of -0.0.
2019 (define_insn "*vsx_fmav4sf4"
2020   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
2021         (fma:V4SF
2022           (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
2023           (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
2024           (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))]
2025   "VECTOR_UNIT_VSX_P (V4SFmode)"
2026   "@
2027    xvmaddasp %x0,%x1,%x2
2028    xvmaddmsp %x0,%x1,%x3"
2029   [(set_attr "type" "vecfloat")])
2030
2031 (define_insn "*vsx_fmav2df4"
2032   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
2033         (fma:V2DF
2034           (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
2035           (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
2036           (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))]
2037   "VECTOR_UNIT_VSX_P (V2DFmode)"
2038   "@
2039    xvmaddadp %x0,%x1,%x2
2040    xvmaddmdp %x0,%x1,%x3"
2041   [(set_attr "type" "vecdouble")])
2042
2043 (define_insn "*vsx_fms<mode>4"
2044   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
2045         (fma:VSX_F
2046           (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa")
2047           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
2048           (neg:VSX_F
2049             (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
2050   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2051   "@
2052    xvmsuba<sd>p %x0,%x1,%x2
2053    xvmsubm<sd>p %x0,%x1,%x3"
2054   [(set_attr "type" "<VStype_mul>")])
2055
2056 (define_insn "*vsx_nfma<mode>4"
2057   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa")
2058         (neg:VSX_F
2059          (fma:VSX_F
2060           (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa")
2061           (match_operand:VSX_F 2 "vsx_register_operand" "wa,0")
2062           (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))]
2063   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2064   "@
2065    xvnmadda<sd>p %x0,%x1,%x2
2066    xvnmaddm<sd>p %x0,%x1,%x3"
2067   [(set_attr "type" "<VStype_mul>")])
2068
2069 (define_insn "*vsx_nfmsv4sf4"
2070   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa")
2071         (neg:V4SF
2072          (fma:V4SF
2073            (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa")
2074            (match_operand:V4SF 2 "vsx_register_operand" "wa,0")
2075            (neg:V4SF
2076              (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))]
2077   "VECTOR_UNIT_VSX_P (V4SFmode)"
2078   "@
2079    xvnmsubasp %x0,%x1,%x2
2080    xvnmsubmsp %x0,%x1,%x3"
2081   [(set_attr "type" "vecfloat")])
2082
2083 (define_insn "*vsx_nfmsv2df4"
2084   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
2085         (neg:V2DF
2086          (fma:V2DF
2087            (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa")
2088            (match_operand:V2DF 2 "vsx_register_operand" "wa,0")
2089            (neg:V2DF
2090              (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))]
2091   "VECTOR_UNIT_VSX_P (V2DFmode)"
2092   "@
2093    xvnmsubadp %x0,%x1,%x2
2094    xvnmsubmdp %x0,%x1,%x3"
2095   [(set_attr "type" "vecdouble")])
2096
2097 ;; Vector conditional expressions (no scalar version for these instructions)
2098 (define_insn "vsx_eq<mode>"
2099   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2100         (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2101                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2102   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2103   "xvcmpeq<sd>p %x0,%x1,%x2"
2104   [(set_attr "type" "<VStype_simple>")])
2105
2106 (define_insn "vsx_gt<mode>"
2107   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2108         (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2109                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2110   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2111   "xvcmpgt<sd>p %x0,%x1,%x2"
2112   [(set_attr "type" "<VStype_simple>")])
2113
2114 (define_insn "*vsx_ge<mode>"
2115   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2116         (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2117                   (match_operand:VSX_F 2 "vsx_register_operand" "wa")))]
2118   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2119   "xvcmpge<sd>p %x0,%x1,%x2"
2120   [(set_attr "type" "<VStype_simple>")])
2121
2122 ;; Compare vectors producing a vector result and a predicate, setting CR6 to
2123 ;; indicate a combined status
2124 (define_insn "*vsx_eq_<mode>_p"
2125   [(set (reg:CC CR6_REGNO)
2126         (unspec:CC
2127          [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2128                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2129          UNSPEC_PREDICATE))
2130    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2131         (eq:VSX_F (match_dup 1)
2132                   (match_dup 2)))]
2133   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2134   "xvcmpeq<sd>p. %x0,%x1,%x2"
2135   [(set_attr "type" "<VStype_simple>")])
2136
2137 (define_insn "*vsx_gt_<mode>_p"
2138   [(set (reg:CC CR6_REGNO)
2139         (unspec:CC
2140          [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2141                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2142          UNSPEC_PREDICATE))
2143    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2144         (gt:VSX_F (match_dup 1)
2145                   (match_dup 2)))]
2146   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2147   "xvcmpgt<sd>p. %x0,%x1,%x2"
2148   [(set_attr "type" "<VStype_simple>")])
2149
2150 ;; xvtlsbb BF,XB
2151 ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte
2152 ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE).
2153 (define_insn "*xvtlsbb_internal"
2154   [(set (match_operand:CC 0 "cc_reg_operand" "=y")
2155         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
2156          UNSPEC_XVTLSBB))]
2157   "TARGET_POWER10"
2158   "xvtlsbb %0,%x1"
2159   [(set_attr "type" "logical")])
2160
2161 ;; Vector Test Least Significant Bit by Byte
2162 ;; for the implementation of the builtin
2163 ;;     __builtin_vec_test_lsbb_all_ones
2164 ;;     int vec_test_lsbb_all_ones (vector unsigned char);
2165 ;; and
2166 ;;     __builtin_vec_test_lsbb_all_zeros
2167 ;;     int vec_test_lsbb_all_zeros (vector unsigned char);
2168 (define_expand "xvtlsbbo"
2169   [(set (match_dup 2)
2170         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2171          UNSPEC_XVTLSBB))
2172    (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2173         (lt:SI (match_dup 2) (const_int 0)))]
2174   "TARGET_POWER10"
2175 {
2176    operands[2] = gen_reg_rtx (CCmode);
2177 })
2178 (define_expand "xvtlsbbz"
2179   [(set (match_dup 2)
2180         (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")]
2181          UNSPEC_XVTLSBB))
2182    (set (match_operand:SI 0 "gpc_reg_operand" "=r")
2183         (eq:SI (match_dup 2) (const_int 0)))]
2184   "TARGET_POWER10"
2185 {
2186    operands[2] = gen_reg_rtx (CCmode);
2187 })
2188
2189 (define_insn "*vsx_ge_<mode>_p"
2190   [(set (reg:CC CR6_REGNO)
2191         (unspec:CC
2192          [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa")
2193                  (match_operand:VSX_F 2 "vsx_register_operand" "wa"))]
2194          UNSPEC_PREDICATE))
2195    (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2196         (ge:VSX_F (match_dup 1)
2197                   (match_dup 2)))]
2198   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2199   "xvcmpge<sd>p. %x0,%x1,%x2"
2200   [(set_attr "type" "<VStype_simple>")])
2201
2202 ;; Copy sign
2203 (define_insn "vsx_copysign<mode>3"
2204   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2205         (unspec:VSX_F
2206          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
2207           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
2208          UNSPEC_COPYSIGN))]
2209   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2210   "xvcpsgn<sd>p %x0,%x2,%x1"
2211   [(set_attr "type" "<VStype_simple>")])
2212
2213 ;; For the conversions, limit the register class for the integer value to be
2214 ;; the fprs because we don't want to add the altivec registers to movdi/movsi.
2215 ;; For the unsigned tests, there isn't a generic double -> unsigned conversion
2216 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
2217 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md
2218 ;; in allowing virtual registers.
2219 (define_insn "vsx_float<VSi><mode>2"
2220   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2221         (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2222   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2223   "xvcvsx<VSc><sd>p %x0,%x1"
2224   [(set_attr "type" "<VStype_simple>")])
2225
2226 (define_insn "vsx_floatuns<VSi><mode>2"
2227   [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa")
2228         (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))]
2229   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2230   "xvcvux<VSc><sd>p %x0,%x1"
2231   [(set_attr "type" "<VStype_simple>")])
2232
2233 (define_insn "vsx_fix_trunc<mode><VSi>2"
2234   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2235         (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2236   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2237   "x<VSv>cv<sd>psx<VSc>s %x0,%x1"
2238   [(set_attr "type" "<VStype_simple>")])
2239
2240 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
2241   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa")
2242         (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))]
2243   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2244   "x<VSv>cv<sd>pux<VSc>s %x0,%x1"
2245   [(set_attr "type" "<VStype_simple>")])
2246
2247 ;; Math rounding functions
2248 (define_insn "vsx_x<VSv>r<sd>pi"
2249   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2250         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2251                       UNSPEC_VSX_ROUND_I))]
2252   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2253   "x<VSv>r<sd>pi %x0,%x1"
2254   [(set_attr "type" "<VStype_simple>")])
2255
2256 (define_insn "vsx_x<VSv>r<sd>pic"
2257   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2258         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2259                       UNSPEC_VSX_ROUND_IC))]
2260   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2261   "x<VSv>r<sd>pic %x0,%x1"
2262   [(set_attr "type" "<VStype_simple>")])
2263
2264 (define_insn "vsx_btrunc<mode>2"
2265   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2266         (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))]
2267   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2268   "xvr<sd>piz %x0,%x1"
2269   [(set_attr "type" "<VStype_simple>")])
2270
2271 (define_insn "*vsx_b2trunc<mode>2"
2272   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa")
2273         (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")]
2274                       UNSPEC_FRIZ))]
2275   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2276   "x<VSv>r<sd>piz %x0,%x1"
2277   [(set_attr "type" "<VStype_simple>")])
2278
2279 (define_insn "vsx_floor<mode>2"
2280   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2281         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2282                       UNSPEC_FRIM))]
2283   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2284   "xvr<sd>pim %x0,%x1"
2285   [(set_attr "type" "<VStype_simple>")])
2286
2287 (define_insn "vsx_ceil<mode>2"
2288   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
2289         (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
2290                       UNSPEC_FRIP))]
2291   "VECTOR_UNIT_VSX_P (<MODE>mode)"
2292   "xvr<sd>pip %x0,%x1"
2293   [(set_attr "type" "<VStype_simple>")])
2294
2295 \f
2296 ;; VSX convert to/from double vector
2297
2298 ;; Convert between single and double precision
2299 ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal
2300 ;; scalar single precision instructions internally use the double format.
2301 ;; Prefer the altivec registers, since we likely will need to do a vperm
2302 (define_insn "vsx_xscvdpsp"
2303   [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa")
2304         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")]
2305                               UNSPEC_VSX_CVSPDP))]
2306   "VECTOR_UNIT_VSX_P (DFmode)"
2307   "xscvdpsp %x0,%x1"
2308   [(set_attr "type" "fp")])
2309
2310 (define_insn "vsx_xvcvspdp_be"
2311   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2312      (float_extend:V2DF
2313        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2314          (parallel [(const_int 0) (const_int 2)]))))]
2315   "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN"
2316   "xvcvspdp %x0,%x1"
2317   [(set_attr "type" "vecdouble")])
2318
2319 (define_insn "vsx_xvcvspdp_le"
2320   [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa")
2321      (float_extend:V2DF
2322        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2323          (parallel [(const_int 1) (const_int 3)]))))]
2324   "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN"
2325   "xvcvspdp %x0,%x1"
2326   [(set_attr "type" "vecdouble")])
2327
2328 (define_expand "vsx_xvcvspdp"
2329   [(match_operand:V2DF 0 "vsx_register_operand")
2330    (match_operand:V4SF 1 "vsx_register_operand")]
2331   "VECTOR_UNIT_VSX_P (V4SFmode)"
2332 {
2333   if (BYTES_BIG_ENDIAN)
2334     emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1]));
2335   else
2336     emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1]));
2337   DONE;
2338 })
2339
2340 (define_insn "vsx_xvcvdpsp"
2341   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa")
2342         (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")]
2343                               UNSPEC_VSX_CVSPDP))]
2344   "VECTOR_UNIT_VSX_P (V2DFmode)"
2345   "xvcvdpsp %x0,%x1"
2346   [(set_attr "type" "vecdouble")])
2347
2348 ;; xscvspdp, represent the scalar SF type as V4SF
2349 (define_insn "vsx_xscvspdp"
2350   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2351         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2352                    UNSPEC_VSX_CVSPDP))]
2353   "VECTOR_UNIT_VSX_P (V4SFmode)"
2354   "xscvspdp %x0,%x1"
2355   [(set_attr "type" "fp")])
2356
2357 ;; Same as vsx_xscvspdp, but use SF as the type
2358 (define_insn "vsx_xscvspdp_scalar2"
2359   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2360         (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2361                    UNSPEC_VSX_CVSPDP))]
2362   "VECTOR_UNIT_VSX_P (V4SFmode)"
2363   "xscvspdp %x0,%x1"
2364   [(set_attr "type" "fp")])
2365
2366 ;; Generate xvcvhpsp instruction
2367 (define_insn "vsx_xvcvhpsp"
2368   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2369         (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
2370                      UNSPEC_VSX_CVHPSP))]
2371   "TARGET_P9_VECTOR"
2372   "xvcvhpsp %x0,%x1"
2373   [(set_attr "type" "vecfloat")])
2374
2375 ;; Generate xvcvsphp
2376 (define_insn "vsx_xvcvsphp"
2377   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2378         (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2379                      UNSPEC_VSX_XVCVSPHP))]
2380   "TARGET_P9_VECTOR"
2381   "xvcvsphp %x0,%x1"
2382 [(set_attr "type" "vecfloat")])
2383
2384 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
2385 ;; format of scalars is actually DF.
2386 (define_insn "vsx_xscvdpsp_scalar"
2387   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2388         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2389                      UNSPEC_VSX_CVSPDP))]
2390   "VECTOR_UNIT_VSX_P (V4SFmode)"
2391   "xscvdpsp %x0,%x1"
2392   [(set_attr "type" "fp")])
2393
2394 ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
2395 (define_insn "vsx_xscvdpspn"
2396   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2397         (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")]
2398                      UNSPEC_VSX_CVDPSPN))]
2399   "TARGET_XSCVDPSPN"
2400   "xscvdpspn %x0,%x1"
2401   [(set_attr "type" "fp")])
2402
2403 (define_insn "vsx_xscvspdpn"
2404   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2405         (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
2406                    UNSPEC_VSX_CVSPDPN))]
2407   "TARGET_XSCVSPDPN"
2408   "xscvspdpn %x0,%x1"
2409   [(set_attr "type" "fp")])
2410
2411 (define_insn "vsx_xscvdpspn_scalar"
2412   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2413         (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2414                      UNSPEC_VSX_CVDPSPN))]
2415   "TARGET_XSCVDPSPN"
2416   "xscvdpspn %x0,%x1"
2417   [(set_attr "type" "fp")])
2418
2419 ;; Used by direct move to move a SFmode value from GPR to VSX register
2420 (define_insn "vsx_xscvspdpn_directmove"
2421   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
2422         (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
2423                    UNSPEC_VSX_CVSPDPN))]
2424   "TARGET_XSCVSPDPN"
2425   "xscvspdpn %x0,%x1"
2426   [(set_attr "type" "fp")])
2427
2428 ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long)
2429
2430 (define_insn "vsx_xvcv<su>xwsp"
2431   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2432      (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
2433   "VECTOR_UNIT_VSX_P (V4SFmode)"
2434   "xvcv<su>xwsp %x0,%x1"
2435   [(set_attr "type" "vecfloat")])
2436
2437 (define_insn "vsx_xvcv<su>xddp"
2438   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2439         (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
2440   "VECTOR_UNIT_VSX_P (V2DFmode)"
2441   "xvcv<su>xddp %x0,%x1"
2442   [(set_attr "type" "vecdouble")])
2443
2444 (define_insn "vsx_xvcvsp<su>xws"
2445   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
2446         (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))]
2447   "VECTOR_UNIT_VSX_P (V4SFmode)"
2448   "xvcvsp<su>xws %x0,%x1"
2449   [(set_attr "type" "vecfloat")])
2450
2451 (define_insn "vsx_xvcvdp<su>xds"
2452   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
2453         (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))]
2454   "VECTOR_UNIT_VSX_P (V2DFmode)"
2455   "xvcvdp<su>xds %x0,%x1"
2456   [(set_attr "type" "vecdouble")])
2457
2458 (define_expand "vsx_xvcvsxddp_scale"
2459   [(match_operand:V2DF 0 "vsx_register_operand")
2460    (match_operand:V2DI 1 "vsx_register_operand")
2461    (match_operand:QI 2 "immediate_operand")]
2462   "VECTOR_UNIT_VSX_P (V2DFmode)"
2463 {
2464   rtx op0 = operands[0];
2465   rtx op1 = operands[1];
2466   int scale = INTVAL(operands[2]);
2467   emit_insn (gen_vsx_xvcvsxddp (op0, op1));
2468   if (scale != 0)
2469     rs6000_scale_v2df (op0, op0, -scale);
2470   DONE;
2471 })
2472
2473 (define_expand "vsx_xvcvuxddp_scale"
2474   [(match_operand:V2DF 0 "vsx_register_operand")
2475    (match_operand:V2DI 1 "vsx_register_operand")
2476    (match_operand:QI 2 "immediate_operand")]
2477   "VECTOR_UNIT_VSX_P (V2DFmode)"
2478 {
2479   rtx op0 = operands[0];
2480   rtx op1 = operands[1];
2481   int scale = INTVAL(operands[2]);
2482   emit_insn (gen_vsx_xvcvuxddp (op0, op1));
2483   if (scale != 0)
2484     rs6000_scale_v2df (op0, op0, -scale);
2485   DONE;
2486 })
2487
2488 (define_expand "vsx_xvcvdpsxds_scale"
2489   [(match_operand:V2DI 0 "vsx_register_operand")
2490    (match_operand:V2DF 1 "vsx_register_operand")
2491    (match_operand:QI 2 "immediate_operand")]
2492   "VECTOR_UNIT_VSX_P (V2DFmode)"
2493 {
2494   rtx op0 = operands[0];
2495   rtx op1 = operands[1];
2496   rtx tmp;
2497   int scale = INTVAL (operands[2]);
2498   if (scale == 0)
2499     tmp = op1;
2500   else
2501     {
2502       tmp  = gen_reg_rtx (V2DFmode);
2503       rs6000_scale_v2df (tmp, op1, scale);
2504     }
2505   emit_insn (gen_vsx_xvcvdpsxds (op0, tmp));
2506   DONE;
2507 })
2508
2509 ;; convert vector of 64-bit floating point numbers to vector of
2510 ;; 64-bit unsigned integer
2511 (define_expand "vsx_xvcvdpuxds_scale"
2512   [(match_operand:V2DI 0 "vsx_register_operand")
2513    (match_operand:V2DF 1 "vsx_register_operand")
2514    (match_operand:QI 2 "immediate_operand")]
2515   "VECTOR_UNIT_VSX_P (V2DFmode)"
2516 {
2517   rtx op0 = operands[0];
2518   rtx op1 = operands[1];
2519   rtx tmp;
2520   int scale = INTVAL (operands[2]);
2521   if (scale == 0)
2522     tmp = op1;
2523   else
2524     {
2525       tmp = gen_reg_rtx (V2DFmode);
2526       rs6000_scale_v2df (tmp, op1, scale);
2527     }
2528   emit_insn (gen_vsx_xvcvdpuxds (op0, tmp));
2529   DONE;
2530 })
2531
2532 ;; Convert from 64-bit to 32-bit types
2533 ;; Note, favor the Altivec registers since the usual use of these instructions
2534 ;; is in vector converts and we need to use the Altivec vperm instruction.
2535
2536 (define_insn "vsx_xvcvdpsxws"
2537   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2538         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2539                      UNSPEC_VSX_CVDPSXWS))]
2540   "VECTOR_UNIT_VSX_P (V2DFmode)"
2541   "xvcvdpsxws %x0,%x1"
2542   [(set_attr "type" "vecdouble")])
2543
2544 (define_insn "vsx_xvcvdpuxws"
2545   [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
2546         (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")]
2547                      UNSPEC_VSX_CVDPUXWS))]
2548   "VECTOR_UNIT_VSX_P (V2DFmode)"
2549   "xvcvdpuxws %x0,%x1"
2550   [(set_attr "type" "vecdouble")])
2551
2552 (define_insn "vsx_xvcvsxdsp"
2553   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2554         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2555                      UNSPEC_VSX_CVSXDSP))]
2556   "VECTOR_UNIT_VSX_P (V2DFmode)"
2557   "xvcvsxdsp %x0,%x1"
2558   [(set_attr "type" "vecfloat")])
2559
2560 (define_insn "vsx_xvcvuxdsp"
2561   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
2562         (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")]
2563                      UNSPEC_VSX_CVUXDSP))]
2564   "VECTOR_UNIT_VSX_P (V2DFmode)"
2565   "xvcvuxdsp %x0,%x1"
2566   [(set_attr "type" "vecdouble")])
2567
2568 ;; Convert vector of 32-bit signed/unsigned integers to vector of
2569 ;; 64-bit floating point numbers.
2570 (define_insn "vsx_xvcv<su>xwdp_be"
2571   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2572      (any_float:V2DF
2573        (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2574          (parallel [(const_int 0) (const_int 2)]))))]
2575   "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2576   "xvcv<su>xwdp %x0,%x1"
2577   [(set_attr "type" "vecdouble")])
2578
2579 (define_insn "vsx_xvcv<su>xwdp_le"
2580   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
2581      (any_float:V2DF
2582        (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa")
2583          (parallel [(const_int 1) (const_int 3)]))))]
2584   "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2585   "xvcv<su>xwdp %x0,%x1"
2586   [(set_attr "type" "vecdouble")])
2587
2588 (define_expand "vsx_xvcv<su>xwdp"
2589   [(match_operand:V2DF 0 "vsx_register_operand")
2590    (match_operand:V4SI 1 "vsx_register_operand")
2591    (any_float (pc))]
2592   "VECTOR_UNIT_VSX_P (V2DFmode)"
2593 {
2594   if (BYTES_BIG_ENDIAN)
2595     emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1]));
2596   else
2597     emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1]));
2598   DONE;
2599 })
2600
2601 (define_insn "vsx_xvcvsxwdp_df"
2602   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2603         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2604                    UNSPEC_VSX_CVSXWDP))]
2605   "TARGET_VSX"
2606   "xvcvsxwdp %x0,%x1"
2607   [(set_attr "type" "vecdouble")])
2608
2609 (define_insn "vsx_xvcvuxwdp_df"
2610   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
2611         (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
2612                    UNSPEC_VSX_CVUXWDP))]
2613   "TARGET_VSX"
2614   "xvcvuxwdp %x0,%x1"
2615   [(set_attr "type" "vecdouble")])
2616
2617 ;; Convert vector of 32-bit floating point numbers to vector of
2618 ;; 64-bit signed/unsigned integers.
2619 (define_insn "vsx_xvcvsp<su>xds_be"
2620   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2621      (any_fix:V2DI
2622        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2623          (parallel [(const_int 0) (const_int 2)]))))]
2624   "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN"
2625   "xvcvsp<su>xds %x0,%x1"
2626   [(set_attr "type" "vecdouble")])
2627
2628 (define_insn "vsx_xvcvsp<su>xds_le"
2629   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
2630      (any_fix:V2DI
2631        (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
2632          (parallel [(const_int 1) (const_int 3)]))))]
2633   "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN"
2634   "xvcvsp<su>xds %x0,%x1"
2635   [(set_attr "type" "vecdouble")])
2636
2637 (define_expand "vsx_xvcvsp<su>xds"
2638   [(match_operand:V2DI 0 "vsx_register_operand")
2639    (match_operand:V4SF 1 "vsx_register_operand")
2640    (any_fix (pc))]
2641   "VECTOR_UNIT_VSX_P (V2DFmode)"
2642 {
2643   if (BYTES_BIG_ENDIAN)
2644     emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1]));
2645   else
2646     emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1]));
2647   DONE;
2648 })
2649
2650 ;; Generate float2 double
2651 ;; convert two double to float
2652 (define_expand "float2_v2df"
2653   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2654    (use (match_operand:V2DF 1 "register_operand" "wa"))
2655    (use (match_operand:V2DF 2 "register_operand" "wa"))]
2656  "VECTOR_UNIT_VSX_P (V4SFmode)"
2657 {
2658   rtx rtx_src1, rtx_src2, rtx_dst;
2659
2660   rtx_dst = operands[0];
2661   rtx_src1 = operands[1];
2662   rtx_src2 = operands[2];
2663
2664   rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2);
2665   DONE;
2666 })
2667
2668 ;; Generate float2
2669 ;; convert two long long signed ints to float
2670 (define_expand "float2_v2di"
2671   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2672    (use (match_operand:V2DI 1 "register_operand" "wa"))
2673    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2674  "VECTOR_UNIT_VSX_P (V4SFmode)"
2675 {
2676   rtx rtx_src1, rtx_src2, rtx_dst;
2677
2678   rtx_dst = operands[0];
2679   rtx_src1 = operands[1];
2680   rtx_src2 = operands[2];
2681
2682   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2683   DONE;
2684 })
2685
2686 ;; Generate uns_float2
2687 ;; convert two long long unsigned ints to float
2688 (define_expand "uns_float2_v2di"
2689   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2690    (use (match_operand:V2DI 1 "register_operand" "wa"))
2691    (use (match_operand:V2DI 2 "register_operand" "wa"))]
2692  "VECTOR_UNIT_VSX_P (V4SFmode)"
2693 {
2694   rtx rtx_src1, rtx_src2, rtx_dst;
2695
2696   rtx_dst = operands[0];
2697   rtx_src1 = operands[1];
2698   rtx_src2 = operands[2];
2699
2700   rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
2701   DONE;
2702 })
2703
2704 ;; Generate floate
2705 ;; convert  double or long long signed to float
2706 ;; (Only even words are valid, BE numbering)
2707 (define_expand "floate<mode>"
2708   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2709    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2710   "VECTOR_UNIT_VSX_P (V4SFmode)"
2711 {
2712   if (BYTES_BIG_ENDIAN)
2713     {
2714       /* Shift left one word to put even word correct location */
2715       rtx rtx_tmp;
2716       rtx rtx_val = GEN_INT (4);
2717
2718       rtx_tmp = gen_reg_rtx (V4SFmode);
2719       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2720       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2721                  rtx_tmp, rtx_tmp, rtx_val));
2722     }
2723   else
2724     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2725
2726   DONE;
2727 })
2728
2729 ;; Generate uns_floate
2730 ;; convert long long unsigned to float
2731 ;; (Only even words are valid, BE numbering)
2732 (define_expand "unsfloatev2di"
2733   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2734    (use (match_operand:V2DI 1 "register_operand" "wa"))]
2735   "VECTOR_UNIT_VSX_P (V4SFmode)"
2736 {
2737   if (BYTES_BIG_ENDIAN)
2738     {
2739       /* Shift left one word to put even word correct location */
2740       rtx rtx_tmp;
2741       rtx rtx_val = GEN_INT (4);
2742
2743       rtx_tmp = gen_reg_rtx (V4SFmode);
2744       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2745       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2746                  rtx_tmp, rtx_tmp, rtx_val));
2747     }
2748   else
2749     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2750
2751   DONE;
2752 })
2753
2754 ;; Generate floato
2755 ;; convert double or long long signed to float
2756 ;; Only odd words are valid, BE numbering)
2757 (define_expand "floato<mode>"
2758   [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2759    (use (match_operand:VSX_D 1 "register_operand" "wa"))]
2760   "VECTOR_UNIT_VSX_P (V4SFmode)"
2761 {
2762   if (BYTES_BIG_ENDIAN)
2763     emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
2764   else
2765     {
2766       /* Shift left one word to put odd word correct location */
2767       rtx rtx_tmp;
2768       rtx rtx_val = GEN_INT (4);
2769
2770       rtx_tmp = gen_reg_rtx (V4SFmode);
2771       emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
2772       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2773                  rtx_tmp, rtx_tmp, rtx_val));
2774     }
2775   DONE;
2776 })
2777
2778 ;; Generate uns_floato
2779 ;; convert long long unsigned to float
2780 ;; (Only odd words are valid, BE numbering)
2781 (define_expand "unsfloatov2di"
2782  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
2783   (use (match_operand:V2DI 1 "register_operand" "wa"))]
2784  "VECTOR_UNIT_VSX_P (V4SFmode)"
2785 {
2786   if (BYTES_BIG_ENDIAN)
2787     emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
2788   else
2789     {
2790       /* Shift left one word to put odd word correct location */
2791       rtx rtx_tmp;
2792       rtx rtx_val = GEN_INT (4);
2793
2794       rtx_tmp = gen_reg_rtx (V4SFmode);
2795       emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
2796       emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
2797                  rtx_tmp, rtx_tmp, rtx_val));
2798     }
2799   DONE;
2800 })
2801
2802 ;; Generate vsigned2
2803 ;; convert two double float vectors to a vector of single precision ints
2804 (define_expand "vsigned2_v2df"
2805   [(match_operand:V4SI 0 "register_operand" "=wa")
2806    (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa")
2807                  (match_operand:V2DF 2 "register_operand" "wa")]
2808   UNSPEC_VSX_VSIGNED2)]
2809   "TARGET_VSX"
2810 {
2811   rtx rtx_src1, rtx_src2, rtx_dst;
2812   bool signed_convert=true;
2813
2814   rtx_dst = operands[0];
2815   rtx_src1 = operands[1];
2816   rtx_src2 = operands[2];
2817
2818   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2819   DONE;
2820 })
2821
2822 ;; Generate vsignedo_v2df
2823 ;; signed double float to int convert odd word
2824 (define_expand "vsignedo_v2df"
2825   [(set (match_operand:V4SI 0 "register_operand" "=wa")
2826         (match_operand:V2DF 1 "register_operand" "wa"))]
2827   "TARGET_VSX"
2828 {
2829   if (BYTES_BIG_ENDIAN)
2830     {
2831       rtx rtx_tmp;
2832       rtx rtx_val = GEN_INT (12);
2833       rtx_tmp = gen_reg_rtx (V4SImode);
2834
2835       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2836
2837       /* Big endian word numbering for words in operand is 0 1 2 3.
2838          take (operand[1] operand[1]) and shift left one word
2839          0 1 2 3    0 1 2 3  =>  1 2 3 0
2840          Words 1 and 3 are now are now where they need to be for result.  */
2841
2842       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2843                  rtx_tmp, rtx_val));
2844     }
2845   else
2846     /* Little endian word numbering for operand is 3 2 1 0.
2847        Result words 3 and 1 are where they need to be.  */
2848     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2849
2850   DONE;
2851 }
2852   [(set_attr "type" "veccomplex")])
2853
2854 ;; Generate vsignede_v2df
2855 ;; signed double float to int even word
2856 (define_expand "vsignede_v2df"
2857   [(set (match_operand:V4SI 0 "register_operand" "=v")
2858         (match_operand:V2DF 1 "register_operand" "v"))]
2859   "TARGET_VSX"
2860 {
2861   if (BYTES_BIG_ENDIAN)
2862     /* Big endian word numbering for words in operand is 0 1
2863        Result words 0 is where they need to be.  */
2864     emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1]));
2865
2866   else
2867     {
2868       rtx rtx_tmp;
2869       rtx rtx_val = GEN_INT (12);
2870       rtx_tmp = gen_reg_rtx (V4SImode);
2871
2872       emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1]));
2873
2874       /* Little endian word numbering for operand is 3 2 1 0.
2875          take (operand[1] operand[1]) and shift left three words
2876          0 1 2 3   0 1 2 3  =>  3 0 1 2
2877          Words 0 and 2 are now where they need to be for the result.  */
2878       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2879                  rtx_tmp, rtx_val));
2880     }
2881   DONE;
2882 }
2883   [(set_attr "type" "veccomplex")])
2884
2885 ;; Generate unsigned2
2886 ;; convert two double float vectors to a vector of single precision
2887 ;; unsigned ints
2888 (define_expand "vunsigned2_v2df"
2889 [(match_operand:V4SI 0 "register_operand" "=v")
2890  (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v")
2891                (match_operand:V2DF 2 "register_operand" "v")]
2892               UNSPEC_VSX_VSIGNED2)]
2893  "TARGET_VSX"
2894 {
2895   rtx rtx_src1, rtx_src2, rtx_dst;
2896   bool signed_convert=false;
2897
2898   rtx_dst = operands[0];
2899   rtx_src1 = operands[1];
2900   rtx_src2 = operands[2];
2901
2902   rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2);
2903   DONE;
2904 })
2905
2906 ;; Generate vunsignedo_v2df
2907 ;; unsigned double float to int convert odd word
2908 (define_expand "vunsignedo_v2df"
2909   [(set (match_operand:V4SI 0 "register_operand" "=v")
2910         (match_operand:V2DF 1 "register_operand" "v"))]
2911   "TARGET_VSX"
2912 {
2913   if (BYTES_BIG_ENDIAN)
2914     {
2915       rtx rtx_tmp;
2916       rtx rtx_val = GEN_INT (12);
2917       rtx_tmp = gen_reg_rtx (V4SImode);
2918
2919       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2920
2921       /* Big endian word numbering for words in operand is 0 1 2 3.
2922          take (operand[1] operand[1]) and shift left one word
2923          0 1 2 3    0 1 2 3  =>  1 2 3 0
2924          Words 1 and 3 are now are now where they need to be for result.  */
2925
2926       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2927                  rtx_tmp, rtx_val));
2928     }
2929   else
2930     /* Little endian word numbering for operand is 3 2 1 0.
2931        Result words 3 and 1 are where they need to be.  */
2932     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2933
2934   DONE;
2935 }
2936   [(set_attr "type" "veccomplex")])
2937
2938 ;; Generate vunsignede_v2df
2939 ;; unsigned double float to int even word
2940 (define_expand "vunsignede_v2df"
2941   [(set (match_operand:V4SI 0 "register_operand" "=v")
2942         (match_operand:V2DF 1 "register_operand" "v"))]
2943   "TARGET_VSX"
2944 {
2945   if (BYTES_BIG_ENDIAN)
2946     /* Big endian word numbering for words in operand is 0 1
2947        Result words 0 is where they need to be.  */
2948     emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1]));
2949
2950   else
2951     {
2952       rtx rtx_tmp;
2953       rtx rtx_val = GEN_INT (12);
2954       rtx_tmp = gen_reg_rtx (V4SImode);
2955
2956       emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1]));
2957
2958       /* Little endian word numbering for operand is 3 2 1 0.
2959          take (operand[1] operand[1]) and shift left three words
2960          0 1 2 3   0 1 2 3  =>  3 0 1 2
2961          Words 0 and 2 are now where they need to be for the result.  */
2962       emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp,
2963                  rtx_tmp, rtx_val));
2964     }
2965   DONE;
2966 }
2967   [(set_attr "type" "veccomplex")])
2968
2969 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
2970 ;; since the xvrdpiz instruction does not truncate the value if the floating
2971 ;; point value is < LONG_MIN or > LONG_MAX.
2972 (define_insn "*vsx_float_fix_v2df2"
2973   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa")
2974         (float:V2DF
2975          (fix:V2DI
2976           (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))]
2977   "TARGET_HARD_FLOAT
2978    && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
2979    && !flag_trapping_math && TARGET_FRIZ"
2980   "xvrdpiz %x0,%x1"
2981   [(set_attr "type" "vecdouble")])
2982
2983 \f
2984 ;; Permute operations
2985
2986 ;; Build a V2DF/V2DI vector from two scalars
2987 (define_insn "vsx_concat_<mode>"
2988   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
2989         (vec_concat:VSX_D
2990          (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")
2991          (match_operand:<VEC_base> 2 "gpc_reg_operand" "wa,b")))]
2992   "VECTOR_MEM_VSX_P (<MODE>mode)"
2993 {
2994   if (which_alternative == 0)
2995     return (BYTES_BIG_ENDIAN
2996             ? "xxpermdi %x0,%x1,%x2,0"
2997             : "xxpermdi %x0,%x2,%x1,0");
2998
2999   else if (which_alternative == 1)
3000     return (BYTES_BIG_ENDIAN
3001             ? "mtvsrdd %x0,%1,%2"
3002             : "mtvsrdd %x0,%2,%1");
3003
3004   else
3005     gcc_unreachable ();
3006 }
3007   [(set_attr "type" "vecperm,vecmove")])
3008
3009 ;; Combiner patterns to allow creating XXPERMDI's to access either double
3010 ;; word element in a vector register.
3011 (define_insn "*vsx_concat_<mode>_1"
3012   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3013         (vec_concat:VSX_D
3014          (vec_select:<VEC_base>
3015           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
3016           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
3017          (match_operand:<VEC_base> 3 "gpc_reg_operand" "wa")))]
3018   "VECTOR_MEM_VSX_P (<MODE>mode)"
3019 {
3020   HOST_WIDE_INT dword = INTVAL (operands[2]);
3021   if (BYTES_BIG_ENDIAN)
3022     {
3023       operands[4] = GEN_INT (2*dword);
3024       return "xxpermdi %x0,%x1,%x3,%4";
3025     }
3026   else
3027     {
3028       operands[4] = GEN_INT (!dword);
3029       return "xxpermdi %x0,%x3,%x1,%4";
3030     }
3031 }
3032   [(set_attr "type" "vecperm")])
3033
3034 (define_insn "*vsx_concat_<mode>_2"
3035   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3036         (vec_concat:VSX_D
3037          (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa")
3038          (vec_select:<VEC_base>
3039           (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
3040           (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
3041   "VECTOR_MEM_VSX_P (<MODE>mode)"
3042 {
3043   HOST_WIDE_INT dword = INTVAL (operands[3]);
3044   if (BYTES_BIG_ENDIAN)
3045     {
3046       operands[4] = GEN_INT (dword);
3047       return "xxpermdi %x0,%x1,%x2,%4";
3048     }
3049   else
3050     {
3051       operands[4] = GEN_INT (2 * !dword);
3052       return "xxpermdi %x0,%x2,%x1,%4";
3053     }
3054 }
3055   [(set_attr "type" "vecperm")])
3056
3057 (define_insn "*vsx_concat_<mode>_3"
3058   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3059         (vec_concat:VSX_D
3060          (vec_select:<VEC_base>
3061           (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
3062           (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
3063          (vec_select:<VEC_base>
3064           (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
3065           (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
3066   "VECTOR_MEM_VSX_P (<MODE>mode)"
3067 {
3068   HOST_WIDE_INT dword1 = INTVAL (operands[2]);
3069   HOST_WIDE_INT dword2 = INTVAL (operands[4]);
3070   if (BYTES_BIG_ENDIAN)
3071     {
3072       operands[5] = GEN_INT ((2 * dword1) + dword2);
3073       return "xxpermdi %x0,%x1,%x3,%5";
3074     }
3075   else
3076     {
3077       operands[5] = GEN_INT ((2 * !dword2) + !dword1);
3078       return "xxpermdi %x0,%x3,%x1,%5";
3079     }
3080 }
3081   [(set_attr "type" "vecperm")])
3082
3083 ;; Special purpose concat using xxpermdi to glue two single precision values
3084 ;; together, relying on the fact that internally scalar floats are represented
3085 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
3086 (define_insn "vsx_concat_v2sf"
3087   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa")
3088         (unspec:V2DF
3089          [(match_operand:SF 1 "vsx_register_operand" "wa")
3090           (match_operand:SF 2 "vsx_register_operand" "wa")]
3091          UNSPEC_VSX_CONCAT))]
3092   "VECTOR_MEM_VSX_P (V2DFmode)"
3093 {
3094   if (BYTES_BIG_ENDIAN)
3095     return "xxpermdi %x0,%x1,%x2,0";
3096   else
3097     return "xxpermdi %x0,%x2,%x1,0";
3098 }
3099   [(set_attr "type" "vecperm")])
3100
3101 ;; Concatenate 4 SImode elements into a V4SImode reg.
3102 (define_expand "vsx_init_v4si"
3103   [(use (match_operand:V4SI 0 "gpc_reg_operand"))
3104    (use (match_operand:SI 1 "gpc_reg_operand"))
3105    (use (match_operand:SI 2 "gpc_reg_operand"))
3106    (use (match_operand:SI 3 "gpc_reg_operand"))
3107    (use (match_operand:SI 4 "gpc_reg_operand"))]
3108    "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
3109 {
3110   rtx a = gen_lowpart_SUBREG (DImode, operands[1]);
3111   rtx b = gen_lowpart_SUBREG (DImode, operands[2]);
3112   rtx c = gen_lowpart_SUBREG (DImode, operands[3]);
3113   rtx d = gen_lowpart_SUBREG (DImode, operands[4]);
3114   if (!BYTES_BIG_ENDIAN)
3115     {
3116       std::swap (a, b);
3117       std::swap (c, d);
3118     }
3119
3120   rtx ab = gen_reg_rtx (DImode);
3121   rtx cd = gen_reg_rtx (DImode);
3122   emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b,
3123                                    GEN_INT (0xffffffff)));
3124   emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d,
3125                                    GEN_INT (0xffffffff)));
3126
3127   rtx abcd = gen_reg_rtx (V2DImode);
3128   emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
3129   emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
3130   DONE;
3131 })
3132
3133 ;; xxpermdi for little endian loads and stores.  We need several of
3134 ;; these since the form of the PARALLEL differs by mode.
3135 (define_insn "*vsx_xxpermdi2_le_<mode>"
3136   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3137         (vec_select:VSX_D
3138           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3139           (parallel [(const_int 1) (const_int 0)])))]
3140   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
3141   "xxpermdi %x0,%x1,%x1,2"
3142   [(set_attr "type" "vecperm")])
3143
3144 (define_insn "xxswapd_v16qi"
3145   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3146         (vec_select:V16QI
3147           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3148           (parallel [(const_int 8) (const_int 9)
3149                      (const_int 10) (const_int 11)
3150                      (const_int 12) (const_int 13)
3151                      (const_int 14) (const_int 15)
3152                      (const_int 0) (const_int 1)
3153                      (const_int 2) (const_int 3)
3154                      (const_int 4) (const_int 5)
3155                      (const_int 6) (const_int 7)])))]
3156   "TARGET_VSX"
3157 ;; AIX does not support the extended mnemonic xxswapd.  Use the basic
3158 ;; mnemonic xxpermdi instead.
3159   "xxpermdi %x0,%x1,%x1,2"
3160   [(set_attr "type" "vecperm")])
3161
3162 (define_insn "xxswapd_v8hi"
3163   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3164         (vec_select:V8HI
3165           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3166           (parallel [(const_int 4) (const_int 5)
3167                      (const_int 6) (const_int 7)
3168                      (const_int 0) (const_int 1)
3169                      (const_int 2) (const_int 3)])))]
3170   "TARGET_VSX"
3171 ;; AIX does not support the extended mnemonic xxswapd.  Use the basic
3172 ;; mnemonic xxpermdi instead.
3173   "xxpermdi %x0,%x1,%x1,2"
3174   [(set_attr "type" "vecperm")])
3175
3176 (define_insn "xxswapd_<mode>"
3177   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3178         (vec_select:VSX_W
3179           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3180           (parallel [(const_int 2) (const_int 3)
3181                      (const_int 0) (const_int 1)])))]
3182   "TARGET_VSX"
3183 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3184 ;; mnemonic xxpermdi instead.
3185   "xxpermdi %x0,%x1,%x1,2"
3186   [(set_attr "type" "vecperm")])
3187
3188 (define_insn "xxswapd_<mode>"
3189   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3190         (vec_select:VSX_D
3191           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3192           (parallel [(const_int 1) (const_int 0)])))]
3193   "TARGET_VSX"
3194 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3195 ;; mnemonic xxpermdi instead.
3196   "xxpermdi %x0,%x1,%x1,2"
3197   [(set_attr "type" "vecperm")])
3198
3199 ;; Swap upper/lower 64-bit values in a 128-bit vector
3200 (define_insn "xxswapd_v1ti"
3201   [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
3202      (subreg:V1TI
3203           (vec_select:V2DI
3204             (subreg:V2DI
3205               (match_operand:V1TI 1 "vsx_register_operand" "v") 0 )
3206           (parallel [(const_int 1)(const_int 0)]))
3207            0))]
3208   "TARGET_POWER10"
3209 ;; AIX does not support extended mnemonic xxswapd.  Use the basic
3210 ;; mnemonic xxpermdi instead.
3211   "xxpermdi %x0,%x1,%x1,2"
3212   [(set_attr "type" "vecperm")])
3213
3214 (define_insn "xxgenpcvm_<mode>_internal"
3215   [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa")
3216         (unspec:VSX_EXTRACT_I4
3217          [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v")
3218           (match_operand:QI 2 "const_0_to_3_operand" "n")]
3219          UNSPEC_XXGENPCV))]
3220     "TARGET_POWER10"
3221     "xxgenpcv<wd>m %x0,%1,%2"
3222     [(set_attr "type" "vecsimple")])
3223
3224 (define_expand "xxgenpcvm_<mode>"
3225   [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand"))
3226    (use (match_operand:VSX_EXTRACT_I4 1 "register_operand"))
3227    (use (match_operand:QI 2 "immediate_operand"))]
3228   "TARGET_POWER10"
3229 {
3230   if (!BYTES_BIG_ENDIAN)
3231     {
3232       /* gen_xxgenpcvm assumes Big Endian order.  If LE,
3233          change swap upper and lower double words.  */
3234       rtx tmp = gen_reg_rtx (<MODE>mode);
3235
3236       emit_insn (gen_xxswapd_<mode> (tmp, operands[1]));
3237       operands[1] = tmp;
3238     }
3239     emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1],
3240                                               operands[2]));
3241   DONE;
3242 })
3243
3244 ;; lxvd2x for little endian loads.  We need several of
3245 ;; these since the form of the PARALLEL differs by mode.
3246 (define_insn "*vsx_lxvd2x2_le_<mode>"
3247   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3248         (vec_select:VSX_D
3249           (match_operand:VSX_D 1 "memory_operand" "Z")
3250           (parallel [(const_int 1) (const_int 0)])))]
3251   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3252   "lxvd2x %x0,%y1"
3253   [(set_attr "type" "vecload")])
3254
3255 (define_insn "*vsx_lxvd2x4_le_<mode>"
3256   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
3257         (vec_select:VSX_W
3258           (match_operand:VSX_W 1 "memory_operand" "Z")
3259           (parallel [(const_int 2) (const_int 3)
3260                      (const_int 0) (const_int 1)])))]
3261   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3262   "lxvd2x %x0,%y1"
3263   [(set_attr "type" "vecload")])
3264
3265 (define_insn "*vsx_lxvd2x8_le_V8HI"
3266   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
3267         (vec_select:V8HI
3268           (match_operand:V8HI 1 "memory_operand" "Z")
3269           (parallel [(const_int 4) (const_int 5)
3270                      (const_int 6) (const_int 7)
3271                      (const_int 0) (const_int 1)
3272                      (const_int 2) (const_int 3)])))]
3273   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3274   "lxvd2x %x0,%y1"
3275   [(set_attr "type" "vecload")])
3276
3277 (define_insn "*vsx_lxvd2x16_le_V16QI"
3278   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
3279         (vec_select:V16QI
3280           (match_operand:V16QI 1 "memory_operand" "Z")
3281           (parallel [(const_int 8) (const_int 9)
3282                      (const_int 10) (const_int 11)
3283                      (const_int 12) (const_int 13)
3284                      (const_int 14) (const_int 15)
3285                      (const_int 0) (const_int 1)
3286                      (const_int 2) (const_int 3)
3287                      (const_int 4) (const_int 5)
3288                      (const_int 6) (const_int 7)])))]
3289   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3290   "lxvd2x %x0,%y1"
3291   [(set_attr "type" "vecload")])
3292
3293 ;; stxvd2x for little endian stores.  We need several of
3294 ;; these since the form of the PARALLEL differs by mode.
3295 (define_insn "*vsx_stxvd2x2_le_<mode>"
3296   [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
3297         (vec_select:VSX_D
3298           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3299           (parallel [(const_int 1) (const_int 0)])))]
3300   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3301   "stxvd2x %x1,%y0"
3302   [(set_attr "type" "vecstore")])
3303
3304 (define_insn "*vsx_stxvd2x4_le_<mode>"
3305   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
3306         (vec_select:VSX_W
3307           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
3308           (parallel [(const_int 2) (const_int 3)
3309                      (const_int 0) (const_int 1)])))]
3310   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR"
3311   "stxvd2x %x1,%y0"
3312   [(set_attr "type" "vecstore")])
3313
3314 (define_insn "*vsx_stxvd2x8_le_V8HI"
3315   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
3316         (vec_select:V8HI
3317           (match_operand:V8HI 1 "vsx_register_operand" "wa")
3318           (parallel [(const_int 4) (const_int 5)
3319                      (const_int 6) (const_int 7)
3320                      (const_int 0) (const_int 1)
3321                      (const_int 2) (const_int 3)])))]
3322   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR"
3323   "stxvd2x %x1,%y0"
3324   [(set_attr "type" "vecstore")])
3325
3326 (define_insn "*vsx_stxvd2x16_le_V16QI"
3327   [(set (match_operand:V16QI 0 "memory_operand" "=Z")
3328         (vec_select:V16QI
3329           (match_operand:V16QI 1 "vsx_register_operand" "wa")
3330           (parallel [(const_int 8) (const_int 9)
3331                      (const_int 10) (const_int 11)
3332                      (const_int 12) (const_int 13)
3333                      (const_int 14) (const_int 15)
3334                      (const_int 0) (const_int 1)
3335                      (const_int 2) (const_int 3)
3336                      (const_int 4) (const_int 5)
3337                      (const_int 6) (const_int 7)])))]
3338   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR"
3339   "stxvd2x %x1,%y0"
3340   [(set_attr "type" "vecstore")])
3341
3342 ;; Convert a TImode value into V1TImode
3343 (define_expand "vsx_set_v1ti"
3344   [(match_operand:V1TI 0 "nonimmediate_operand")
3345    (match_operand:V1TI 1 "nonimmediate_operand")
3346    (match_operand:TI 2 "input_operand")
3347    (match_operand:QI 3 "u5bit_cint_operand")]
3348   "VECTOR_MEM_VSX_P (V1TImode)"
3349 {
3350   if (operands[3] != const0_rtx)
3351     gcc_unreachable ();
3352
3353   emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1]));
3354   DONE;
3355 })
3356
3357 ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
3358 (define_expand "vsx_set_<mode>"
3359   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
3360    (use (match_operand:VSX_D 1 "vsx_register_operand"))
3361    (use (match_operand:<VEC_base> 2 "gpc_reg_operand"))
3362    (use (match_operand:QI 3 "const_0_to_1_operand"))]
3363   "VECTOR_MEM_VSX_P (<MODE>mode)"
3364 {
3365   rtx dest = operands[0];
3366   rtx vec_reg = operands[1];
3367   rtx value = operands[2];
3368   rtx ele = operands[3];
3369   rtx tmp = gen_reg_rtx (<VEC_base>mode);
3370
3371   if (ele == const0_rtx)
3372     {
3373       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
3374       emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
3375       DONE;
3376     }
3377   else if (ele == const1_rtx)
3378     {
3379       emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
3380       emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
3381       DONE;
3382     }
3383   else
3384     gcc_unreachable ();
3385 })
3386
3387 ;; Extract a DF/DI element from V2DF/V2DI
3388 ;; Optimize cases were we can do a simple or direct move.
3389 ;; Or see if we can avoid doing the move at all
3390
3391 (define_expand "vsx_extract_<mode>"
3392   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
3393         (vec_select:<VEC_base>
3394          (match_operand:VSX_D 1 "gpc_reg_operand")
3395          (parallel
3396           [(match_operand:QI 2 "const_0_to_1_operand")])))]
3397   "VECTOR_MEM_VSX_P (<MODE>mode)"
3398   "")
3399
3400 (define_insn "*vsx_extract_<mode>_0"
3401   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wa,wr")
3402         (vec_select:<VEC_base>
3403          (match_operand:VSX_D 1 "gpc_reg_operand" "0,wa,wa")
3404          (parallel
3405           [(match_operand:QI 2 "const_0_to_1_operand" "n,n,n")])))]
3406   "VECTOR_MEM_VSX_P (<MODE>mode)
3407    && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)"
3408 {
3409   if (which_alternative == 0)
3410     return ASM_COMMENT_START " vec_extract to same register";
3411
3412   if (which_alternative == 2)
3413     return "mfvsrd %0,%x1";
3414
3415   return "xxlor %x0,%x1,%x1";
3416 }
3417   [(set_attr "type" "*,veclogical,mfvsr")
3418    (set_attr "isa" "*,*,p8v")
3419    (set_attr "length" "0,*,*")])
3420
3421 (define_insn "*vsx_extract_<mode>_1"
3422   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wr")
3423         (vec_select:<VEC_base>
3424          (match_operand:VSX_D 1 "gpc_reg_operand" "wa,wa")
3425          (parallel
3426           [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))]
3427   "VECTOR_MEM_VSX_P (<MODE>mode)
3428    && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 0)"
3429 {
3430   if (which_alternative == 1)
3431     return "mfvsrld %0,%x1";
3432
3433   operands[3] = GEN_INT (BYTES_BIG_ENDIAN ? 2 : 3);
3434   return "xxpermdi %x0,%x1,%x1,%3";
3435 }
3436   [(set_attr "type" "mfvsr,vecperm")
3437    (set_attr "isa" "*,p9v")])
3438
3439 ;; Optimize extracting a single scalar element from memory.
3440 (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load"
3441   [(set (match_operand:<VEC_base> 0 "register_operand" "=wa,wr")
3442         (vec_select:<VSX_D:VEC_base>
3443          (match_operand:VSX_D 1 "memory_operand" "m,m")
3444          (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))
3445    (clobber (match_scratch:P 3 "=&b,&b"))]
3446   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)"
3447   "#"
3448   "&& reload_completed"
3449   [(set (match_dup 0) (match_dup 4))]
3450 {
3451   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3452                                            operands[3], <VSX_D:VEC_base>mode);
3453 }
3454   [(set_attr "type" "fpload,load")
3455    (set_attr "length" "8")])
3456
3457 ;; Optimize storing a single scalar element that is the right location to
3458 ;; memory
3459 (define_insn "*vsx_extract_<mode>_store"
3460   [(set (match_operand:<VEC_base> 0 "memory_operand" "=m,Z,wY")
3461         (vec_select:<VEC_base>
3462          (match_operand:VSX_D 1 "register_operand" "d,v,v")
3463          (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "n,n,n")])))]
3464   "VECTOR_MEM_VSX_P (<MODE>mode)
3465    && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)"
3466   "@
3467    stfd%U0%X0 %1,%0
3468    stxsdx %x1,%y0
3469    stxsd %1,%0"
3470   [(set_attr "type" "fpstore")
3471    (set_attr "isa" "*,p7v,p9v")])
3472
3473 ;; Variable V2DI/V2DF extract shift
3474 (define_insn "vsx_vslo_<mode>"
3475   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v")
3476         (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3477                              (match_operand:V2DI 2 "gpc_reg_operand" "v")]
3478                             UNSPEC_VSX_VSLO))]
3479   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3480   "vslo %0,%1,%2"
3481   [(set_attr "type" "vecperm")])
3482
3483 ;; Variable V2DI/V2DF extract from a register
3484 (define_insn_and_split "vsx_extract_<mode>_var"
3485   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v")
3486         (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
3487                              (match_operand:DI 2 "gpc_reg_operand" "r")]
3488                             UNSPEC_VSX_EXTRACT))
3489    (clobber (match_scratch:DI 3 "=r"))
3490    (clobber (match_scratch:V2DI 4 "=&v"))]
3491   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3492   "#"
3493   "&& reload_completed"
3494   [(const_int 0)]
3495 {
3496   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3497                                 operands[3], operands[4]);
3498   DONE;
3499 })
3500
3501 ;; Variable V2DI/V2DF extract from memory
3502 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3503   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,r")
3504         (unspec:<VEC_base> [(match_operand:VSX_D 1 "memory_operand" "Q,Q")
3505                              (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3506                             UNSPEC_VSX_EXTRACT))
3507    (clobber (match_scratch:DI 3 "=&b,&b"))]
3508   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3509   "#"
3510   "&& reload_completed"
3511   [(set (match_dup 0) (match_dup 4))]
3512 {
3513   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3514                                            operands[3], <VEC_base>mode);
3515 }
3516   [(set_attr "type" "fpload,load")])
3517
3518 ;; Extract a SF element from V4SF
3519 (define_insn_and_split "vsx_extract_v4sf"
3520   [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
3521         (vec_select:SF
3522          (match_operand:V4SF 1 "vsx_register_operand" "wa")
3523          (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")])))
3524    (clobber (match_scratch:V4SF 3 "=0"))]
3525   "VECTOR_UNIT_VSX_P (V4SFmode)"
3526   "#"
3527   "&& 1"
3528   [(const_int 0)]
3529 {
3530   rtx op0 = operands[0];
3531   rtx op1 = operands[1];
3532   rtx op2 = operands[2];
3533   rtx op3 = operands[3];
3534   rtx tmp;
3535   HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2);
3536
3537   if (ele == 0)
3538     tmp = op1;
3539   else
3540     {
3541       if (GET_CODE (op3) == SCRATCH)
3542         op3 = gen_reg_rtx (V4SFmode);
3543       emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele)));
3544       tmp = op3;
3545     }
3546   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
3547   DONE;
3548 }
3549   [(set_attr "length" "8")
3550    (set_attr "type" "fp")])
3551
3552 (define_insn_and_split "*vsx_extract_v4sf_load"
3553   [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r")
3554         (vec_select:SF
3555          (match_operand:V4SF 1 "memory_operand" "m,Z,m,m")
3556          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
3557    (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))]
3558   "VECTOR_MEM_VSX_P (V4SFmode)"
3559   "#"
3560   "&& reload_completed"
3561   [(set (match_dup 0) (match_dup 4))]
3562 {
3563   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3564                                            operands[3], SFmode);
3565 }
3566   [(set_attr "type" "fpload,fpload,fpload,load")
3567    (set_attr "length" "8")
3568    (set_attr "isa" "*,p7v,p9v,*")])
3569
3570 ;; Variable V4SF extract from a register
3571 (define_insn_and_split "vsx_extract_v4sf_var"
3572   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
3573         (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v")
3574                     (match_operand:DI 2 "gpc_reg_operand" "r")]
3575                    UNSPEC_VSX_EXTRACT))
3576    (clobber (match_scratch:DI 3 "=r"))
3577    (clobber (match_scratch:V2DI 4 "=&v"))]
3578   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3579   "#"
3580   "&& reload_completed"
3581   [(const_int 0)]
3582 {
3583   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3584                                 operands[3], operands[4]);
3585   DONE;
3586 })
3587
3588 ;; Variable V4SF extract from memory
3589 (define_insn_and_split "*vsx_extract_v4sf_var_load"
3590   [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r")
3591         (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q")
3592                     (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3593                    UNSPEC_VSX_EXTRACT))
3594    (clobber (match_scratch:DI 3 "=&b,&b"))]
3595   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT"
3596   "#"
3597   "&& reload_completed"
3598   [(set (match_dup 0) (match_dup 4))]
3599 {
3600   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3601                                            operands[3], SFmode);
3602 }
3603   [(set_attr "type" "fpload,load")])
3604
3605 ;; Expand the builtin form of xxpermdi to canonical rtl.
3606 (define_expand "vsx_xxpermdi_<mode>"
3607   [(match_operand:VSX_L 0 "vsx_register_operand")
3608    (match_operand:VSX_L 1 "vsx_register_operand")
3609    (match_operand:VSX_L 2 "vsx_register_operand")
3610    (match_operand:QI 3 "u5bit_cint_operand")]
3611   "VECTOR_MEM_VSX_P (<MODE>mode)"
3612 {
3613   rtx target = operands[0];
3614   rtx op0 = operands[1];
3615   rtx op1 = operands[2];
3616   int mask = INTVAL (operands[3]);
3617   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3618   rtx perm1 = GEN_INT ((mask & 1) + 2);
3619   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3620
3621   if (<MODE>mode == V2DFmode)
3622     gen = gen_vsx_xxpermdi2_v2df_1;
3623   else
3624     {
3625       gen = gen_vsx_xxpermdi2_v2di_1;
3626       if (<MODE>mode != V2DImode)
3627         {
3628           target = gen_lowpart (V2DImode, target);
3629           op0 = gen_lowpart (V2DImode, op0);
3630           op1 = gen_lowpart (V2DImode, op1);
3631         }
3632     }
3633   emit_insn (gen (target, op0, op1, perm0, perm1));
3634   DONE;
3635 })
3636
3637 ;; Special version of xxpermdi that retains big-endian semantics.
3638 (define_expand "vsx_xxpermdi_<mode>_be"
3639   [(match_operand:VSX_L 0 "vsx_register_operand")
3640    (match_operand:VSX_L 1 "vsx_register_operand")
3641    (match_operand:VSX_L 2 "vsx_register_operand")
3642    (match_operand:QI 3 "u5bit_cint_operand")]
3643   "VECTOR_MEM_VSX_P (<MODE>mode)"
3644 {
3645   rtx target = operands[0];
3646   rtx op0 = operands[1];
3647   rtx op1 = operands[2];
3648   int mask = INTVAL (operands[3]);
3649   rtx perm0 = GEN_INT ((mask >> 1) & 1);
3650   rtx perm1 = GEN_INT ((mask & 1) + 2);
3651   rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
3652
3653   if (<MODE>mode == V2DFmode)
3654     gen = gen_vsx_xxpermdi2_v2df_1;
3655   else
3656     {
3657       gen = gen_vsx_xxpermdi2_v2di_1;
3658       if (<MODE>mode != V2DImode)
3659         {
3660           target = gen_lowpart (V2DImode, target);
3661           op0 = gen_lowpart (V2DImode, op0);
3662           op1 = gen_lowpart (V2DImode, op1);
3663         }
3664     }
3665   /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
3666      transformation we don't want; it is necessary for
3667      rs6000_expand_vec_perm_const_1 but not for this use.  So we
3668      prepare for that by reversing the transformation here.  */
3669   if (BYTES_BIG_ENDIAN)
3670     emit_insn (gen (target, op0, op1, perm0, perm1));
3671   else
3672     {
3673       rtx p0 = GEN_INT (3 - INTVAL (perm1));
3674       rtx p1 = GEN_INT (3 - INTVAL (perm0));
3675       emit_insn (gen (target, op1, op0, p0, p1));
3676     }
3677   DONE;
3678 })
3679
3680 (define_insn "vsx_xxpermdi2_<mode>_1"
3681   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
3682         (vec_select:VSX_D
3683           (vec_concat:<VS_double>
3684             (match_operand:VSX_D 1 "vsx_register_operand" "wa")
3685             (match_operand:VSX_D 2 "vsx_register_operand" "wa"))
3686           (parallel [(match_operand 3 "const_0_to_1_operand" "")
3687                      (match_operand 4 "const_2_to_3_operand" "")])))]
3688   "VECTOR_MEM_VSX_P (<MODE>mode)"
3689 {
3690   int op3, op4, mask;
3691
3692   /* For little endian, swap operands and invert/swap selectors
3693      to get the correct xxpermdi.  The operand swap sets up the
3694      inputs as a little endian array.  The selectors are swapped
3695      because they are defined to use big endian ordering.  The
3696      selectors are inverted to get the correct doublewords for
3697      little endian ordering.  */
3698   if (BYTES_BIG_ENDIAN)
3699     {
3700       op3 = INTVAL (operands[3]);
3701       op4 = INTVAL (operands[4]);
3702     }
3703   else
3704     {
3705       op3 = 3 - INTVAL (operands[4]);
3706       op4 = 3 - INTVAL (operands[3]);
3707     }
3708
3709   mask = (op3 << 1) | (op4 - 2);
3710   operands[3] = GEN_INT (mask);
3711
3712   if (BYTES_BIG_ENDIAN)
3713     return "xxpermdi %x0,%x1,%x2,%3";
3714   else
3715     return "xxpermdi %x0,%x2,%x1,%3";
3716 }
3717   [(set_attr "type" "vecperm")])
3718
3719 ;; Extraction of a single element in a small integer vector.  Until ISA 3.0,
3720 ;; none of the small types were allowed in a vector register, so we had to
3721 ;; extract to a DImode and either do a direct move or store.
3722 (define_expand  "vsx_extract_<mode>"
3723   [(parallel [(set (match_operand:<VEC_base> 0 "gpc_reg_operand")
3724                    (vec_select:<VEC_base>
3725                     (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
3726                     (parallel [(match_operand:QI 2 "const_int_operand")])))
3727               (clobber (match_scratch:VSX_EXTRACT_I 3))])]
3728   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3729 {
3730   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
3731   if (TARGET_P9_VECTOR)
3732     {
3733       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
3734                                             operands[2]));
3735       DONE;
3736     }
3737 })
3738
3739 (define_insn "vsx_extract_<mode>_p9"
3740   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,<VSX_EX>")
3741         (vec_select:<VEC_base>
3742          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3743          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
3744    (clobber (match_scratch:SI 3 "=r,X"))]
3745   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3746 {
3747   if (which_alternative == 0)
3748     return "#";
3749
3750   else
3751     {
3752       HOST_WIDE_INT elt = INTVAL (operands[2]);
3753       HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN
3754                                ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt
3755                                : elt);
3756
3757       HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
3758       HOST_WIDE_INT offset = unit_size * elt_adj;
3759
3760       operands[2] = GEN_INT (offset);
3761       if (unit_size == 4)
3762         return "xxextractuw %x0,%x1,%2";
3763       else
3764         return "vextractu<wd> %0,%1,%2";
3765     }
3766 }
3767   [(set_attr "type" "vecsimple")
3768    (set_attr "isa" "p9v,*")])
3769
3770 (define_split
3771   [(set (match_operand:<VEC_base> 0 "int_reg_operand")
3772         (vec_select:<VEC_base>
3773          (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
3774          (parallel [(match_operand:QI 2 "const_int_operand")])))
3775    (clobber (match_operand:SI 3 "int_reg_operand"))]
3776   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
3777   [(const_int 0)]
3778 {
3779   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
3780   rtx op1 = operands[1];
3781   rtx op2 = operands[2];
3782   rtx op3 = operands[3];
3783   HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode);
3784
3785   emit_move_insn (op3, GEN_INT (offset));
3786   if (BYTES_BIG_ENDIAN)
3787     emit_insn (gen_vextu<wd>lx (op0_si, op3, op1));
3788   else
3789     emit_insn (gen_vextu<wd>rx (op0_si, op3, op1));
3790   DONE;
3791 })
3792
3793 ;; Optimize zero extracts to eliminate the AND after the extract.
3794 (define_insn_and_split "*vsx_extract_<mode>_di_p9"
3795   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>")
3796         (zero_extend:DI
3797          (vec_select:<VEC_base>
3798           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>")
3799           (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
3800    (clobber (match_scratch:SI 3 "=r,X"))]
3801   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3802   "#"
3803   "&& reload_completed"
3804   [(parallel [(set (match_dup 4)
3805                    (vec_select:<VEC_base>
3806                     (match_dup 1)
3807                     (parallel [(match_dup 2)])))
3808               (clobber (match_dup 3))])]
3809 {
3810   operands[4] = gen_rtx_REG (<VEC_base>mode, REGNO (operands[0]));
3811 }
3812   [(set_attr "isa" "p9v,*")])
3813
3814 ;; Optimize stores to use the ISA 3.0 scalar store instructions
3815 (define_insn_and_split "*vsx_extract_<mode>_store_p9"
3816   [(set (match_operand:<VEC_base> 0 "memory_operand" "=Z,m")
3817         (vec_select:<VEC_base>
3818          (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v")
3819          (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
3820    (clobber (match_scratch:<VEC_base> 3 "=<VSX_EX>,&*r"))
3821    (clobber (match_scratch:SI 4 "=X,&r"))]
3822   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
3823   "#"
3824   "&& reload_completed"
3825   [(parallel [(set (match_dup 3)
3826                    (vec_select:<VEC_base>
3827                     (match_dup 1)
3828                     (parallel [(match_dup 2)])))
3829               (clobber (match_dup 4))])
3830    (set (match_dup 0)
3831         (match_dup 3))])
3832
3833 (define_insn_and_split  "*vsx_extract_si"
3834   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z")
3835         (vec_select:SI
3836          (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v")
3837          (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
3838    (clobber (match_scratch:V4SI 3 "=v,v,v"))]
3839   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
3840   "#"
3841   "&& reload_completed"
3842   [(const_int 0)]
3843 {
3844   rtx dest = operands[0];
3845   rtx src = operands[1];
3846   rtx element = operands[2];
3847   rtx vec_tmp = operands[3];
3848   int value;
3849
3850   /* Adjust index for LE element ordering, the below minuend 3 is computed by
3851      GET_MODE_NUNITS (V4SImode) - 1.  */
3852   if (!BYTES_BIG_ENDIAN)
3853     element = GEN_INT (3 - INTVAL (element));
3854
3855   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3856      instruction.  */
3857   value = INTVAL (element);
3858   if (value != 1)
3859     emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
3860   else
3861     vec_tmp = src;
3862
3863   if (MEM_P (operands[0]))
3864     {
3865       if (can_create_pseudo_p ())
3866         dest = rs6000_force_indexed_or_indirect_mem (dest);
3867
3868       if (TARGET_P8_VECTOR)
3869         emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3870       else
3871         emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
3872     }
3873
3874   else if (TARGET_P8_VECTOR)
3875     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
3876   else
3877     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3878                     gen_rtx_REG (DImode, REGNO (vec_tmp)));
3879
3880   DONE;
3881 }
3882   [(set_attr "type" "mfvsr,vecperm,fpstore")
3883    (set_attr "length" "8")
3884    (set_attr "isa" "*,p8v,*")])
3885
3886 (define_insn_and_split  "*vsx_extract_<mode>_p8"
3887   [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r")
3888         (vec_select:<VEC_base>
3889          (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v")
3890          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3891    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
3892   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
3893    && !TARGET_P9_VECTOR"
3894   "#"
3895   "&& reload_completed"
3896   [(const_int 0)]
3897 {
3898   rtx dest = operands[0];
3899   rtx src = operands[1];
3900   rtx element = operands[2];
3901   rtx vec_tmp = operands[3];
3902   int value;
3903
3904   if (!BYTES_BIG_ENDIAN)
3905     element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element));
3906
3907   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
3908      instruction.  */
3909   value = INTVAL (element);
3910   if (<MODE>mode == V16QImode)
3911     {
3912       if (value != 7)
3913         emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element));
3914       else
3915         vec_tmp = src;
3916     }
3917   else if (<MODE>mode == V8HImode)
3918     {
3919       if (value != 3)
3920         emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
3921       else
3922         vec_tmp = src;
3923     }
3924   else
3925     gcc_unreachable ();
3926
3927   emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
3928                   gen_rtx_REG (DImode, REGNO (vec_tmp)));
3929   DONE;
3930 }
3931   [(set_attr "type" "mfvsr")])
3932
3933 ;; Optimize extracting a single scalar element from memory.
3934 (define_insn_and_split "*vsx_extract_<mode>_load"
3935   [(set (match_operand:<VEC_base> 0 "register_operand" "=r")
3936         (vec_select:<VEC_base>
3937          (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m")
3938          (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
3939    (clobber (match_scratch:DI 3 "=&b"))]
3940   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3941   "#"
3942   "&& reload_completed"
3943   [(set (match_dup 0) (match_dup 4))]
3944 {
3945   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3946                                            operands[3], <VEC_base>mode);
3947 }
3948   [(set_attr "type" "load")
3949    (set_attr "length" "8")])
3950
3951 ;; Variable V16QI/V8HI/V4SI extract from a register
3952 (define_insn_and_split "vsx_extract_<mode>_var"
3953   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,r")
3954         (unspec:<VEC_base>
3955          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v")
3956           (match_operand:DI 2 "gpc_reg_operand" "r,r")]
3957          UNSPEC_VSX_EXTRACT))
3958    (clobber (match_scratch:DI 3 "=r,r"))
3959    (clobber (match_scratch:V2DI 4 "=X,&v"))]
3960   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3961   "#"
3962   "&& reload_completed"
3963   [(const_int 0)]
3964 {
3965   rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
3966                                 operands[3], operands[4]);
3967   DONE;
3968 }
3969   [(set_attr "isa" "p9v,*")])
3970
3971 ;; Variable V16QI/V8HI/V4SI extract from memory
3972 (define_insn_and_split "*vsx_extract_<mode>_var_load"
3973   [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r")
3974         (unspec:<VEC_base>
3975          [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q")
3976           (match_operand:DI 2 "gpc_reg_operand" "r")]
3977          UNSPEC_VSX_EXTRACT))
3978    (clobber (match_scratch:DI 3 "=&b"))]
3979   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
3980   "#"
3981   "&& reload_completed"
3982   [(set (match_dup 0) (match_dup 4))]
3983 {
3984   operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2],
3985                                            operands[3], <VEC_base>mode);
3986 }
3987   [(set_attr "type" "load")])
3988
3989 ;; ISA 3.1 extract
3990 (define_expand "vextractl<mode>"
3991   [(set (match_operand:V2DI 0 "altivec_register_operand")
3992         (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
3993                       (match_operand:VI2 2 "altivec_register_operand")
3994                       (match_operand:SI 3 "register_operand")]
3995                      UNSPEC_EXTRACTL))]
3996   "TARGET_POWER10"
3997 {
3998   if (BYTES_BIG_ENDIAN)
3999     {
4000       emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1],
4001                                                operands[2], operands[3]));
4002       emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
4003     }
4004   else
4005     emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2],
4006                                              operands[1], operands[3]));
4007   DONE;
4008 })
4009
4010 (define_insn "vextractl<mode>_internal"
4011   [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
4012         (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
4013                       (match_operand:VEC_I 2 "altivec_register_operand" "v")
4014                       (match_operand:SI 3 "register_operand" "r")]
4015                      UNSPEC_EXTRACTL))]
4016   "TARGET_POWER10"
4017   "vext<du_or_d><wd>vlx %0,%1,%2,%3"
4018   [(set_attr "type" "vecsimple")])
4019
4020 (define_expand "vextractr<mode>"
4021   [(set (match_operand:V2DI 0 "altivec_register_operand")
4022         (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand")
4023                       (match_operand:VI2 2 "altivec_register_operand")
4024                       (match_operand:SI 3 "register_operand")]
4025                      UNSPEC_EXTRACTR))]
4026   "TARGET_POWER10"
4027 {
4028   if (BYTES_BIG_ENDIAN)
4029     {
4030       emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1],
4031                                                operands[2], operands[3]));
4032       emit_insn (gen_xxswapd_v2di (operands[0], operands[0]));
4033     }
4034   else
4035     emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2],
4036                                              operands[1], operands[3]));
4037   DONE;
4038 })
4039
4040 (define_insn "vextractr<mode>_internal"
4041   [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
4042         (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v")
4043                       (match_operand:VEC_I 2 "altivec_register_operand" "v")
4044                       (match_operand:SI 3 "register_operand" "r")]
4045                      UNSPEC_EXTRACTR))]
4046   "TARGET_POWER10"
4047   "vext<du_or_d><wd>vrx %0,%1,%2,%3"
4048   [(set_attr "type" "vecsimple")])
4049
4050 (define_expand "vinsertvl_<mode>"
4051   [(set (match_operand:VI2 0 "altivec_register_operand")
4052         (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4053                      (match_operand:VI2 2 "altivec_register_operand")
4054                      (match_operand:SI 3 "register_operand" "r")]
4055                     UNSPEC_INSERTL))]
4056   "TARGET_POWER10"
4057 {
4058   if (BYTES_BIG_ENDIAN)
4059      emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4060                                                operands[1], operands[2]));
4061    else
4062      emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4063                                                operands[1], operands[2]));
4064    DONE;
4065 })
4066
4067 (define_insn "vinsertvl_internal_<mode>"
4068   [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4069         (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4070                        (match_operand:VEC_I 2 "altivec_register_operand" "v")
4071                        (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4072                       UNSPEC_INSERTL))]
4073   "TARGET_POWER10"
4074   "vins<wd>vlx %0,%1,%2"
4075   [(set_attr "type" "vecsimple")])
4076
4077 (define_expand "vinsertvr_<mode>"
4078   [(set (match_operand:VI2 0 "altivec_register_operand")
4079         (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand")
4080                      (match_operand:VI2 2 "altivec_register_operand")
4081                      (match_operand:SI 3 "register_operand" "r")]
4082                     UNSPEC_INSERTR))]
4083   "TARGET_POWER10"
4084 {
4085   if (BYTES_BIG_ENDIAN)
4086      emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3],
4087                                                operands[1], operands[2]));
4088    else
4089      emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3],
4090                                                operands[1], operands[2]));
4091    DONE;
4092 })
4093
4094 (define_insn "vinsertvr_internal_<mode>"
4095   [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4096         (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4097                        (match_operand:VEC_I 2 "altivec_register_operand" "v")
4098                        (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4099                       UNSPEC_INSERTR))]
4100   "TARGET_POWER10"
4101   "vins<wd>vrx %0,%1,%2"
4102   [(set_attr "type" "vecsimple")])
4103
4104 (define_expand "vinsertgl_<mode>"
4105   [(set (match_operand:VI2 0 "altivec_register_operand")
4106         (unspec:VI2 [(match_operand:SI 1 "register_operand")
4107                      (match_operand:VI2 2 "altivec_register_operand")
4108                      (match_operand:SI 3 "register_operand")]
4109                     UNSPEC_INSERTL))]
4110   "TARGET_POWER10"
4111 {
4112   if (BYTES_BIG_ENDIAN)
4113     emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4114                                             operands[1], operands[2]));
4115   else
4116     emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4117                                             operands[1], operands[2]));
4118   DONE;
4119  })
4120
4121 (define_insn "vinsertgl_internal_<mode>"
4122  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4123        (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4124                       (match_operand:SI 2 "register_operand" "r")
4125                       (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4126                      UNSPEC_INSERTL))]
4127  "TARGET_POWER10"
4128  "vins<wd>lx %0,%1,%2"
4129  [(set_attr "type" "vecsimple")])
4130
4131 (define_expand "vinsertgr_<mode>"
4132   [(set (match_operand:VI2 0 "altivec_register_operand")
4133         (unspec:VI2 [(match_operand:SI 1 "register_operand")
4134                      (match_operand:VI2 2 "altivec_register_operand")
4135                      (match_operand:SI 3 "register_operand")]
4136                     UNSPEC_INSERTR))]
4137   "TARGET_POWER10"
4138 {
4139   if (BYTES_BIG_ENDIAN)
4140     emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3],
4141                                             operands[1], operands[2]));
4142   else
4143     emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3],
4144                                             operands[1], operands[2]));
4145   DONE;
4146  })
4147
4148 (define_insn "vinsertgr_internal_<mode>"
4149  [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v")
4150    (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r")
4151                   (match_operand:SI 2 "register_operand" "r")
4152                   (match_operand:VEC_I 3 "altivec_register_operand" "0")]
4153                  UNSPEC_INSERTR))]
4154  "TARGET_POWER10"
4155  "vins<wd>rx %0,%1,%2"
4156  [(set_attr "type" "vecsimple")])
4157
4158 (define_expand "vreplace_elt_<mode>"
4159   [(set (match_operand:REPLACE_ELT 0 "register_operand")
4160   (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
4161                        (match_operand:<VEC_base> 2 "register_operand")
4162                        (match_operand:QI 3 "const_0_to_3_operand")]
4163                       UNSPEC_REPLACE_ELT))]
4164  "TARGET_POWER10"
4165 {
4166    int index;
4167    /* Immediate value is the word index, convert to byte index and adjust for
4168       Endianness if needed.  */
4169    if (BYTES_BIG_ENDIAN)
4170      index = INTVAL (operands[3]) << <REPLACE_ELT_sh>;
4171
4172    else
4173      index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>);
4174
4175    emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1],
4176                                             operands[2],
4177                                             GEN_INT (index)));
4178    DONE;
4179  }
4180 [(set_attr "type" "vecsimple")])
4181
4182 (define_insn "vreplace_elt_<mode>_inst"
4183  [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v")
4184   (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0")
4185                        (match_operand:<VEC_base> 2 "register_operand" "r")
4186                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
4187                       UNSPEC_REPLACE_ELT))]
4188  "TARGET_POWER10"
4189  "vins<REPLACE_ELT_char> %0,%2,%3"
4190  [(set_attr "type" "vecsimple")])
4191
4192 (define_insn "vreplace_un_<mode>"
4193  [(set (match_operand:V16QI 0 "register_operand" "=v")
4194   (unspec:V16QI [(match_operand:REPLACE_ELT 1 "register_operand" "0")
4195                  (match_operand:<VEC_base> 2 "register_operand" "r")
4196                  (match_operand:QI 3 "const_0_to_12_operand" "n")]
4197                 UNSPEC_REPLACE_UN))]
4198  "TARGET_POWER10"
4199  "vins<REPLACE_ELT_char> %0,%2,%3"
4200  [(set_attr "type" "vecsimple")])
4201
4202 ;; VSX_EXTRACT optimizations
4203 ;; Optimize double d = (double) vec_extract (vi, <n>)
4204 ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
4205 (define_insn_and_split "*vsx_extract_si_<uns>float_df"
4206   [(set (match_operand:DF 0 "gpc_reg_operand" "=wa")
4207         (any_float:DF
4208          (vec_select:SI
4209           (match_operand:V4SI 1 "gpc_reg_operand" "v")
4210           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4211    (clobber (match_scratch:V4SI 3 "=v"))]
4212   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4213   "#"
4214   "&& 1"
4215   [(const_int 0)]
4216 {
4217   rtx dest = operands[0];
4218   rtx src = operands[1];
4219   rtx element = operands[2];
4220   rtx v4si_tmp = operands[3];
4221   int value;
4222
4223   /* Adjust index for LE element ordering, the below minuend 3 is computed by
4224      GET_MODE_NUNITS (V4SImode) - 1.  */
4225   if (!BYTES_BIG_ENDIAN)
4226     element = GEN_INT (3 - INTVAL (element));
4227
4228   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4229      instruction.  */
4230   value = INTVAL (element);
4231   if (value != 0)
4232     {
4233       if (GET_CODE (v4si_tmp) == SCRATCH)
4234         v4si_tmp = gen_reg_rtx (V4SImode);
4235       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4236     }
4237   else
4238     v4si_tmp = src;
4239
4240   emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
4241   DONE;
4242 })
4243
4244 ;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
4245 ;; where <type> is a floating point type that supported by the hardware that is
4246 ;; not double.  First convert the value to double, and then to the desired
4247 ;; type.
4248 (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
4249   [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa")
4250         (any_float:VSX_EXTRACT_FL
4251          (vec_select:SI
4252           (match_operand:V4SI 1 "gpc_reg_operand" "v")
4253           (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
4254    (clobber (match_scratch:V4SI 3 "=v"))
4255    (clobber (match_scratch:DF 4 "=wa"))]
4256   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4257   "#"
4258   "&& 1"
4259   [(const_int 0)]
4260 {
4261   rtx dest = operands[0];
4262   rtx src = operands[1];
4263   rtx element = operands[2];
4264   rtx v4si_tmp = operands[3];
4265   rtx df_tmp = operands[4];
4266   int value;
4267
4268   /* Adjust index for LE element ordering, the below minuend 3 is computed by
4269      GET_MODE_NUNITS (V4SImode) - 1.  */
4270   if (!BYTES_BIG_ENDIAN)
4271     element = GEN_INT (3 - INTVAL (element));
4272
4273   /* If the value is in the correct position, we can avoid doing the VSPLT<x>
4274      instruction.  */
4275   value = INTVAL (element);
4276   if (value != 0)
4277     {
4278       if (GET_CODE (v4si_tmp) == SCRATCH)
4279         v4si_tmp = gen_reg_rtx (V4SImode);
4280       emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
4281     }
4282   else
4283     v4si_tmp = src;
4284
4285   if (GET_CODE (df_tmp) == SCRATCH)
4286     df_tmp = gen_reg_rtx (DFmode);
4287
4288   emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
4289
4290   if (<MODE>mode == SFmode)
4291     emit_insn (gen_truncdfsf2 (dest, df_tmp));
4292   else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
4293     emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
4294   else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
4295            && TARGET_FLOAT128_HW)
4296     emit_insn (gen_extenddftf2_hw (dest, df_tmp));
4297   else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
4298     emit_insn (gen_extenddfif2 (dest, df_tmp));
4299   else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
4300     emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
4301   else
4302     gcc_unreachable ();
4303
4304   DONE;
4305 })
4306
4307 ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>)
4308 ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE
4309 ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char,
4310 ;; vector short or vector unsigned short.
4311 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_fl_<FL_CONV:mode>"
4312   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4313         (float:FL_CONV
4314          (vec_select:<VSX_EXTRACT_I:VEC_base>
4315           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4316           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4317    (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))]
4318   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4319    && TARGET_P9_VECTOR"
4320   "#"
4321   "&& reload_completed"
4322   [(parallel [(set (match_dup 3)
4323                    (vec_select:<VSX_EXTRACT_I:VEC_base>
4324                     (match_dup 1)
4325                     (parallel [(match_dup 2)])))
4326               (clobber (scratch:SI))])
4327    (set (match_dup 4)
4328         (sign_extend:DI (match_dup 3)))
4329    (set (match_dup 0)
4330         (float:<FL_CONV:MODE> (match_dup 4)))]
4331 {
4332   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4333 }
4334   [(set_attr "isa" "<FL_CONV:VSisa>")])
4335
4336 (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_ufl_<FL_CONV:mode>"
4337   [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa")
4338         (unsigned_float:FL_CONV
4339          (vec_select:<VSX_EXTRACT_I:VEC_base>
4340           (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
4341           (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
4342    (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))]
4343   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
4344    && TARGET_P9_VECTOR"
4345   "#"
4346   "&& reload_completed"
4347   [(parallel [(set (match_dup 3)
4348                    (vec_select:<VSX_EXTRACT_I:VEC_base>
4349                     (match_dup 1)
4350                     (parallel [(match_dup 2)])))
4351               (clobber (scratch:SI))])
4352    (set (match_dup 0)
4353         (float:<FL_CONV:MODE> (match_dup 4)))]
4354 {
4355   operands[4] = gen_rtx_REG (DImode, REGNO (operands[3]));
4356 }
4357   [(set_attr "isa" "<FL_CONV:VSisa>")])
4358
4359 ;; V4SI/V8HI/V16QI set operation on ISA 3.0
4360 (define_insn "vsx_set_<mode>_p9"
4361   [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>")
4362         (unspec:VSX_EXTRACT_I
4363          [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0")
4364           (match_operand:<VEC_base> 2 "gpc_reg_operand" "<VSX_EX>")
4365           (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
4366          UNSPEC_VSX_SET))]
4367   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4368 {
4369   int ele = INTVAL (operands[3]);
4370   int nunits = GET_MODE_NUNITS (<MODE>mode);
4371
4372   if (!BYTES_BIG_ENDIAN)
4373     ele = nunits - 1 - ele;
4374
4375   operands[3] = GEN_INT (GET_MODE_SIZE (<VEC_base>mode) * ele);
4376   if (<MODE>mode == V4SImode)
4377     return "xxinsertw %x0,%x2,%3";
4378   else
4379     return "vinsert<wd> %0,%2,%3";
4380 }
4381   [(set_attr "type" "vecperm")])
4382
4383 (define_insn_and_split "vsx_set_v4sf_p9"
4384   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4385         (unspec:V4SF
4386          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4387           (match_operand:SF 2 "gpc_reg_operand" "wa")
4388           (match_operand:QI 3 "const_0_to_3_operand" "n")]
4389          UNSPEC_VSX_SET))
4390    (clobber (match_scratch:SI 4 "=&wa"))]
4391   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4392   "#"
4393   "&& reload_completed"
4394   [(set (match_dup 5)
4395         (unspec:V4SF [(match_dup 2)]
4396                      UNSPEC_VSX_CVDPSPN))
4397    (parallel [(set (match_dup 4)
4398                    (vec_select:SI (match_dup 6)
4399                                   (parallel [(match_dup 7)])))
4400               (clobber (scratch:SI))])
4401    (set (match_dup 8)
4402         (unspec:V4SI [(match_dup 8)
4403                       (match_dup 4)
4404                       (match_dup 3)]
4405                      UNSPEC_VSX_SET))]
4406 {
4407   unsigned int tmp_regno = reg_or_subregno (operands[4]);
4408
4409   operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
4410   operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
4411   operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3);
4412   operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4413 }
4414   [(set_attr "type" "vecperm")
4415    (set_attr "length" "12")
4416    (set_attr "isa" "p9v")])
4417
4418 ;; Special case setting 0.0f to a V4SF element
4419 (define_insn_and_split "*vsx_set_v4sf_p9_zero"
4420   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4421         (unspec:V4SF
4422          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4423           (match_operand:SF 2 "zero_fp_constant" "j")
4424           (match_operand:QI 3 "const_0_to_3_operand" "n")]
4425          UNSPEC_VSX_SET))
4426    (clobber (match_scratch:SI 4 "=&wa"))]
4427   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
4428   "#"
4429   "&& reload_completed"
4430   [(set (match_dup 4)
4431         (const_int 0))
4432    (set (match_dup 5)
4433         (unspec:V4SI [(match_dup 5)
4434                       (match_dup 4)
4435                       (match_dup 3)]
4436                      UNSPEC_VSX_SET))]
4437 {
4438   operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
4439 }
4440   [(set_attr "type" "vecperm")
4441    (set_attr "length" "8")
4442    (set_attr "isa" "p9v")])
4443
4444 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
4445 ;; that is in the default scalar position (1 for big endian, 2 for little
4446 ;; endian).  We just need to do an xxinsertw since the element is in the
4447 ;; correct location.
4448
4449 (define_insn "*vsx_insert_extract_v4sf_p9"
4450   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4451         (unspec:V4SF
4452          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4453           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4454                          (parallel
4455                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4456           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4457          UNSPEC_VSX_SET))]
4458   "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
4459    && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))"
4460 {
4461   int ele = INTVAL (operands[4]);
4462
4463   /* Adjust index for LE element ordering, the below minuend 3 is computed by
4464      GET_MODE_NUNITS (V4SFmode) - 1.  */
4465   if (!BYTES_BIG_ENDIAN)
4466     ele = 3 - ele;
4467
4468   operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
4469   return "xxinsertw %x0,%x2,%4";
4470 }
4471   [(set_attr "type" "vecperm")])
4472
4473 ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
4474 ;; that is in the default scalar position (1 for big endian, 2 for little
4475 ;; endian).  Convert the insert/extract to int and avoid doing the conversion.
4476
4477 (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
4478   [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
4479         (unspec:V4SF
4480          [(match_operand:V4SF 1 "gpc_reg_operand" "0")
4481           (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
4482                          (parallel
4483                           [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
4484           (match_operand:QI 4 "const_0_to_3_operand" "n")]
4485          UNSPEC_VSX_SET))
4486    (clobber (match_scratch:SI 5 "=&wa"))]
4487   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
4488    && TARGET_P9_VECTOR && TARGET_POWERPC64
4489    && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))"
4490   "#"
4491   "&& 1"
4492   [(parallel [(set (match_dup 5)
4493                    (vec_select:SI (match_dup 6)
4494                                   (parallel [(match_dup 3)])))
4495               (clobber (scratch:SI))])
4496    (set (match_dup 7)
4497         (unspec:V4SI [(match_dup 8)
4498                       (match_dup 5)
4499                       (match_dup 4)]
4500                      UNSPEC_VSX_SET))]
4501 {
4502   if (GET_CODE (operands[5]) == SCRATCH)
4503     operands[5] = gen_reg_rtx (SImode);
4504
4505   operands[6] = gen_lowpart (V4SImode, operands[2]);
4506   operands[7] = gen_lowpart (V4SImode, operands[0]);
4507   operands[8] = gen_lowpart (V4SImode, operands[1]);
4508 }
4509   [(set_attr "type" "vecperm")
4510    (set_attr "isa" "p9v")])
4511
4512 ;; Expanders for builtins
4513 (define_expand "vsx_mergel_<mode>"
4514   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4515    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4516    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4517   "VECTOR_MEM_VSX_P (<MODE>mode)"
4518 {
4519   rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3));
4520   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4521   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4522   emit_insn (gen_rtx_SET (operands[0], x));
4523   DONE;
4524 })
4525
4526 (define_expand "vsx_mergeh_<mode>"
4527   [(use (match_operand:VSX_D 0 "vsx_register_operand"))
4528    (use (match_operand:VSX_D 1 "vsx_register_operand"))
4529    (use (match_operand:VSX_D 2 "vsx_register_operand"))]
4530   "VECTOR_MEM_VSX_P (<MODE>mode)"
4531 {
4532   rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2));
4533   rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]);
4534   x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v));
4535   emit_insn (gen_rtx_SET (operands[0], x));
4536   DONE;
4537 })
4538
4539 ;; V2DF/V2DI splat
4540 ;; We separate the register splat insn from the memory splat insn to force the
4541 ;; register allocator to generate the indexed form of the SPLAT when it is
4542 ;; given an offsettable memory reference.  Otherwise, if the register and
4543 ;; memory insns were combined into a single insn, the register allocator will
4544 ;; load the value into a register, and then do a double word permute.
4545 (define_expand "vsx_splat_<mode>"
4546   [(set (match_operand:VSX_D 0 "vsx_register_operand")
4547         (vec_duplicate:VSX_D
4548          (match_operand:<VEC_base> 1 "input_operand")))]
4549   "VECTOR_MEM_VSX_P (<MODE>mode)"
4550 {
4551   rtx op1 = operands[1];
4552   if (MEM_P (op1))
4553     operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
4554   else if (!REG_P (op1))
4555     op1 = force_reg (<VSX_D:VEC_base>mode, op1);
4556 })
4557
4558 (define_insn "vsx_splat_<mode>_reg"
4559   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
4560         (vec_duplicate:VSX_D
4561          (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")))]
4562   "VECTOR_MEM_VSX_P (<MODE>mode)"
4563   "@
4564    xxpermdi %x0,%x1,%x1,0
4565    mtvsrdd %x0,%1,%1"
4566   [(set_attr "type" "vecperm,vecmove")])
4567
4568 (define_insn "vsx_splat_<mode>_mem"
4569   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4570         (vec_duplicate:VSX_D
4571          (match_operand:<VSX_D:VEC_base> 1 "memory_operand" "Z")))]
4572   "VECTOR_MEM_VSX_P (<MODE>mode)"
4573   "lxvdsx %x0,%y1"
4574   [(set_attr "type" "vecload")])
4575
4576 ;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element
4577 (define_insn "*vsx_splat_extract_<mode>"
4578   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4579         (vec_duplicate:VSX_D
4580          (vec_select:<VEC_base>
4581           (match_operand:VSX_D 1 "vsx_register_operand" "wa")
4582           (parallel [(match_operand 2 "const_0_to_1_operand" "n")]))))]
4583   "VECTOR_MEM_VSX_P (<MODE>mode)"
4584 {
4585   int which_word = INTVAL (operands[2]);
4586   if (!BYTES_BIG_ENDIAN)
4587     which_word = 1 - which_word;
4588
4589   operands[3] = GEN_INT (which_word ? 3 : 0);
4590   return "xxpermdi %x0,%x1,%x1,%3";
4591 }
4592   [(set_attr "type" "vecperm")])
4593
4594 ;; V4SI splat support
4595 (define_insn "vsx_splat_v4si"
4596   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
4597         (vec_duplicate:V4SI
4598          (match_operand:SI 1 "splat_input_operand" "r,Z")))]
4599   "TARGET_P9_VECTOR"
4600   "@
4601    mtvsrws %x0,%1
4602    lxvwsx %x0,%y1"
4603   [(set_attr "type" "vecperm,vecload")])
4604
4605 ;; SImode is not currently allowed in vector registers.  This pattern
4606 ;; allows us to use direct move to get the value in a vector register
4607 ;; so that we can use XXSPLTW
4608 (define_insn "vsx_splat_v4si_di"
4609   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we")
4610         (vec_duplicate:V4SI
4611          (truncate:SI
4612           (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))]
4613   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
4614   "@
4615    xxspltw %x0,%x1,1
4616    mtvsrws %x0,%1"
4617   [(set_attr "type" "vecperm")
4618    (set_attr "isa" "p8v,*")])
4619
4620 ;; V4SF splat (ISA 3.0)
4621 (define_insn_and_split "vsx_splat_v4sf"
4622   [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
4623         (vec_duplicate:V4SF
4624          (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))]
4625   "TARGET_P9_VECTOR"
4626   "@
4627    lxvwsx %x0,%y1
4628    #
4629    mtvsrws %x0,%1"
4630   "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
4631   [(set (match_dup 0)
4632         (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
4633    (set (match_dup 0)
4634         (unspec:V4SF [(match_dup 0)
4635                       (const_int 0)] UNSPEC_VSX_XXSPLTW))]
4636   ""
4637   [(set_attr "type" "vecload,vecperm,vecperm")
4638    (set_attr "length" "*,8,*")
4639    (set_attr "isa" "*,p8v,*")])
4640
4641 ;; V4SF/V4SI splat from a vector element
4642 (define_insn "vsx_xxspltw_<mode>"
4643   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4644         (vec_duplicate:VSX_W
4645          (vec_select:<VEC_base>
4646           (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4647           (parallel
4648            [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))]
4649   "VECTOR_MEM_VSX_P (<MODE>mode)"
4650 {
4651   if (!BYTES_BIG_ENDIAN)
4652     operands[2] = GEN_INT (3 - INTVAL (operands[2]));
4653
4654   return "xxspltw %x0,%x1,%2";
4655 }
4656   [(set_attr "type" "vecperm")])
4657
4658 (define_insn "vsx_xxspltw_<mode>_direct"
4659   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4660         (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa")
4661                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4662                       UNSPEC_VSX_XXSPLTW))]
4663   "VECTOR_MEM_VSX_P (<MODE>mode)"
4664   "xxspltw %x0,%x1,%2"
4665   [(set_attr "type" "vecperm")])
4666
4667 ;; V16QI/V8HI splat support on ISA 2.07
4668 (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
4669   [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
4670         (vec_duplicate:VSX_SPLAT_I
4671          (truncate:<VEC_base>
4672           (match_operand:DI 1 "altivec_register_operand" "v"))))]
4673   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
4674   "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
4675   [(set_attr "type" "vecperm")])
4676
4677 ;; V2DF/V2DI splat for use by vec_splat builtin
4678 (define_insn "vsx_xxspltd_<mode>"
4679   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
4680         (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
4681                        (match_operand:QI 2 "u5bit_cint_operand" "i")]
4682                       UNSPEC_VSX_XXSPLTD))]
4683   "VECTOR_MEM_VSX_P (<MODE>mode)"
4684 {
4685   if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
4686       || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
4687     return "xxpermdi %x0,%x1,%x1,0";
4688   else
4689     return "xxpermdi %x0,%x1,%x1,3";
4690 }
4691   [(set_attr "type" "vecperm")])
4692
4693 ;; V4SF/V4SI interleave
4694 (define_expand "vsx_xxmrghw_<mode>"
4695   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4696         (vec_select:VSX_W
4697           (vec_concat:<VS_double>
4698             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4699             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4700           (parallel [(const_int 0) (const_int 4)
4701                      (const_int 1) (const_int 5)])))]
4702   "VECTOR_MEM_VSX_P (<MODE>mode)"
4703 {
4704   rtx (*fun) (rtx, rtx, rtx);
4705   fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_<mode>
4706                          : gen_altivec_vmrglw_direct_<mode>;
4707   if (!BYTES_BIG_ENDIAN)
4708     std::swap (operands[1], operands[2]);
4709   emit_insn (fun (operands[0], operands[1], operands[2]));
4710   DONE;
4711 }
4712   [(set_attr "type" "vecperm")])
4713
4714 (define_expand "vsx_xxmrglw_<mode>"
4715   [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
4716         (vec_select:VSX_W
4717           (vec_concat:<VS_double>
4718             (match_operand:VSX_W 1 "vsx_register_operand" "wa")
4719             (match_operand:VSX_W 2 "vsx_register_operand" "wa"))
4720           (parallel [(const_int 2) (const_int 6)
4721                      (const_int 3) (const_int 7)])))]
4722   "VECTOR_MEM_VSX_P (<MODE>mode)"
4723 {
4724   rtx (*fun) (rtx, rtx, rtx);
4725   fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_<mode>
4726                          : gen_altivec_vmrghw_direct_<mode>;
4727   if (!BYTES_BIG_ENDIAN)
4728     std::swap (operands[1], operands[2]);
4729   emit_insn (fun (operands[0], operands[1], operands[2]));
4730   DONE;
4731 }
4732   [(set_attr "type" "vecperm")])
4733
4734 ;; Shift left double by word immediate
4735 (define_insn "vsx_xxsldwi_<mode>"
4736   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa")
4737         (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa")
4738                        (match_operand:VSX_L 2 "vsx_register_operand" "wa")
4739                        (match_operand:QI 3 "u5bit_cint_operand" "i")]
4740                       UNSPEC_VSX_SLDWI))]
4741   "VECTOR_MEM_VSX_P (<MODE>mode)"
4742   "xxsldwi %x0,%x1,%x2,%3"
4743   [(set_attr "type" "vecperm")
4744    (set_attr "isa" "<VSisa>")])
4745
4746 \f
4747 ;; Vector reduction insns and splitters
4748
4749 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df"
4750   [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa")
4751         (VEC_reduc:V2DF
4752          (vec_concat:V2DF
4753           (vec_select:DF
4754            (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4755            (parallel [(const_int 1)]))
4756           (vec_select:DF
4757            (match_dup 1)
4758            (parallel [(const_int 0)])))
4759          (match_dup 1)))
4760    (clobber (match_scratch:V2DF 2 "=0,&wa"))]
4761   "VECTOR_UNIT_VSX_P (V2DFmode)"
4762   "#"
4763   "&& 1"
4764   [(const_int 0)]
4765 {
4766   rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
4767              ? gen_reg_rtx (V2DFmode)
4768              : operands[2];
4769   emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
4770   emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
4771   DONE;
4772 }
4773   [(set_attr "length" "8")
4774    (set_attr "type" "veccomplex")])
4775
4776 (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf"
4777   [(set (match_operand:V4SF 0 "vfloat_operand" "=wa")
4778         (VEC_reduc:V4SF
4779          (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4780          (match_operand:V4SF 1 "vfloat_operand" "wa")))
4781    (clobber (match_scratch:V4SF 2 "=&wa"))
4782    (clobber (match_scratch:V4SF 3 "=&wa"))]
4783   "VECTOR_UNIT_VSX_P (V4SFmode)"
4784   "#"
4785   "&& 1"
4786   [(const_int 0)]
4787 {
4788   rtx op0 = operands[0];
4789   rtx op1 = operands[1];
4790   rtx tmp2, tmp3, tmp4;
4791
4792   if (can_create_pseudo_p ())
4793     {
4794       tmp2 = gen_reg_rtx (V4SFmode);
4795       tmp3 = gen_reg_rtx (V4SFmode);
4796       tmp4 = gen_reg_rtx (V4SFmode);
4797     }
4798   else
4799     {
4800       tmp2 = operands[2];
4801       tmp3 = operands[3];
4802       tmp4 = tmp2;
4803     }
4804
4805   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4806   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4807   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4808   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
4809   DONE;
4810 }
4811   [(set_attr "length" "16")
4812    (set_attr "type" "veccomplex")])
4813
4814 ;; Combiner patterns with the vector reduction patterns that knows we can get
4815 ;; to the top element of the V2DF array without doing an extract.
4816
4817 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
4818   [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa")
4819         (vec_select:DF
4820          (VEC_reduc:V2DF
4821           (vec_concat:V2DF
4822            (vec_select:DF
4823             (match_operand:V2DF 1 "vfloat_operand" "wa,wa")
4824             (parallel [(const_int 1)]))
4825            (vec_select:DF
4826             (match_dup 1)
4827             (parallel [(const_int 0)])))
4828           (match_dup 1))
4829          (parallel [(const_int 1)])))
4830    (clobber (match_scratch:DF 2 "=0,&wa"))]
4831   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)"
4832   "#"
4833   "&& 1"
4834   [(const_int 0)]
4835 {
4836   rtx hi = gen_highpart (DFmode, operands[1]);
4837   rtx lo = (GET_CODE (operands[2]) == SCRATCH)
4838             ? gen_reg_rtx (DFmode)
4839             : operands[2];
4840
4841   emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
4842   emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
4843   DONE;
4844 }
4845   [(set_attr "length" "8")
4846    (set_attr "type" "veccomplex")])
4847
4848 (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
4849   [(set (match_operand:SF 0 "vfloat_operand" "=f")
4850         (vec_select:SF
4851          (VEC_reduc:V4SF
4852           (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
4853           (match_operand:V4SF 1 "vfloat_operand" "wa"))
4854          (parallel [(const_int 3)])))
4855    (clobber (match_scratch:V4SF 2 "=&wa"))
4856    (clobber (match_scratch:V4SF 3 "=&wa"))
4857    (clobber (match_scratch:V4SF 4 "=0"))]
4858   "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)"
4859   "#"
4860   "&& 1"
4861   [(const_int 0)]
4862 {
4863   rtx op0 = operands[0];
4864   rtx op1 = operands[1];
4865   rtx tmp2, tmp3, tmp4, tmp5;
4866
4867   if (can_create_pseudo_p ())
4868     {
4869       tmp2 = gen_reg_rtx (V4SFmode);
4870       tmp3 = gen_reg_rtx (V4SFmode);
4871       tmp4 = gen_reg_rtx (V4SFmode);
4872       tmp5 = gen_reg_rtx (V4SFmode);
4873     }
4874   else
4875     {
4876       tmp2 = operands[2];
4877       tmp3 = operands[3];
4878       tmp4 = tmp2;
4879       tmp5 = operands[4];
4880     }
4881
4882   emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
4883   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
4884   emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
4885   emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
4886   emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
4887   DONE;
4888 }
4889   [(set_attr "length" "20")
4890    (set_attr "type" "veccomplex")])
4891
4892 \f
4893 ;; Power8 Vector fusion.  The fused ops must be physically adjacent.
4894 (define_peephole
4895   [(set (match_operand:P 0 "base_reg_operand")
4896         (match_operand:P 1 "short_cint_operand"))
4897    (set (match_operand:VSX_M 2 "vsx_register_operand")
4898         (mem:VSX_M (plus:P (match_dup 0)
4899                            (match_operand:P 3 "int_reg_operand"))))]
4900   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4901   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4902   [(set_attr "length" "8")
4903    (set_attr "type" "vecload")])
4904
4905 (define_peephole
4906   [(set (match_operand:P 0 "base_reg_operand")
4907         (match_operand:P 1 "short_cint_operand"))
4908    (set (match_operand:VSX_M 2 "vsx_register_operand")
4909         (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand")
4910                            (match_dup 0))))]
4911   "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
4912   "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion"
4913   [(set_attr "length" "8")
4914    (set_attr "type" "vecload")])
4915
4916 \f
4917 ;; ISA 3.1 vector extend sign support
4918 (define_insn "vsx_sign_extend_v2di_v1ti"
4919   [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
4920         (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")]
4921                      UNSPEC_VSX_SIGN_EXTEND))]
4922   "TARGET_POWER10"
4923  "vextsd2q %0,%1"
4924 [(set_attr "type" "vecexts")])
4925
4926 ;; ISA 3.0 vector extend sign support
4927
4928 (define_insn "vsx_sign_extend_v16qi_<mode>"
4929   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4930         (unspec:VSINT_84
4931          [(match_operand:V16QI 1 "vsx_register_operand" "v")]
4932          UNSPEC_VSX_SIGN_EXTEND))]
4933   "TARGET_P9_VECTOR"
4934   "vextsb2<wd> %0,%1"
4935   [(set_attr "type" "vecexts")])
4936
4937 (define_insn "vsx_sign_extend_v8hi_<mode>"
4938   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
4939         (unspec:VSINT_84
4940          [(match_operand:V8HI 1 "vsx_register_operand" "v")]
4941          UNSPEC_VSX_SIGN_EXTEND))]
4942   "TARGET_P9_VECTOR"
4943   "vextsh2<wd> %0,%1"
4944   [(set_attr "type" "vecexts")])
4945
4946 (define_insn "vsx_sign_extend_v4si_v2di"
4947   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
4948         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
4949                      UNSPEC_VSX_SIGN_EXTEND))]
4950   "TARGET_P9_VECTOR"
4951   "vextsw2d %0,%1"
4952   [(set_attr "type" "vecexts")])
4953
4954 ;; Sign extend DI to TI.  We provide both GPR targets and Altivec targets on
4955 ;; power10.  On earlier systems, the machine independent code will generate a
4956 ;; shift left to sign extend the 64-bit value to 128-bit.
4957 ;;
4958 ;; If the register allocator prefers to use GPR registers, we will use a shift
4959 ;; left instruction to sign extend the 64-bit value to 128-bit.
4960 ;;
4961 ;; If the register allocator prefers to use Altivec registers on power10,
4962 ;; generate the vextsd2q instruction.
4963 (define_insn_and_split "extendditi2"
4964   [(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v")
4965         (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b,wa,Z")))
4966    (clobber (reg:DI CA_REGNO))]
4967   "TARGET_POWERPC64 && TARGET_POWER10"
4968   "#"
4969   "&& reload_completed"
4970   [(pc)]
4971 {
4972   rtx dest = operands[0];
4973   rtx src = operands[1];
4974   int dest_regno = reg_or_subregno (dest);
4975
4976   /* Handle conversion to GPR registers.  Load up the low part and then do
4977      a sign extension to the upper part.  */
4978   if (INT_REGNO_P (dest_regno))
4979     {
4980       rtx dest_hi = gen_highpart (DImode, dest);
4981       rtx dest_lo = gen_lowpart (DImode, dest);
4982
4983       emit_move_insn (dest_lo, src);
4984       /* In case src is a MEM, we have to use the destination, which is a
4985          register, instead of re-using the source.  */
4986       rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo;
4987       emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63)));
4988       DONE;
4989     }
4990
4991   /* For conversion to an Altivec register, generate either a splat operation
4992      or a load rightmost double word instruction.  Both instructions gets the
4993      DImode value into the lower 64 bits, and then do the vextsd2q
4994      instruction.  */
4995
4996   else if (ALTIVEC_REGNO_P (dest_regno))
4997     {
4998       if (MEM_P (src))
4999         emit_insn (gen_vsx_lxvrdx (dest, src));
5000       else
5001         {
5002           rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
5003           emit_insn (gen_vsx_splat_v2di (dest_v2di, src));
5004         }
5005
5006       emit_insn (gen_extendditi2_vector (dest, dest));
5007       DONE;
5008     }
5009
5010   else
5011     gcc_unreachable ();
5012 }
5013   [(set_attr "length" "8")
5014    (set_attr "type" "shift,load,vecmove,vecperm,load")])
5015
5016 ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
5017 (define_insn "extendditi2_vector"
5018   [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
5019         (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")]
5020                      UNSPEC_EXTENDDITI2))]
5021   "TARGET_POWER10"
5022   "vextsd2q %0,%1"
5023   [(set_attr "type" "vecexts")])
5024
5025 \f
5026 ;; ISA 3.0 Binary Floating-Point Support
5027
5028 ;; VSX Scalar Extract Exponent Quad-Precision
5029 (define_insn "xsxexpqp_<mode>"
5030   [(set (match_operand:DI 0 "altivec_register_operand" "=v")
5031         (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
5032          UNSPEC_VSX_SXEXPDP))]
5033   "TARGET_P9_VECTOR"
5034   "xsxexpqp %0,%1"
5035   [(set_attr "type" "vecmove")])
5036
5037 ;; VSX Scalar Extract Exponent Double-Precision
5038 (define_insn "xsxexpdp"
5039   [(set (match_operand:DI 0 "register_operand" "=r")
5040         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
5041          UNSPEC_VSX_SXEXPDP))]
5042   "TARGET_P9_VECTOR && TARGET_64BIT"
5043   "xsxexpdp %0,%x1"
5044   [(set_attr "type" "integer")])
5045
5046 ;; VSX Scalar Extract Significand Quad-Precision
5047 (define_insn "xsxsigqp_<mode>"
5048   [(set (match_operand:TI 0 "altivec_register_operand" "=v")
5049         (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
5050          UNSPEC_VSX_SXSIG))]
5051   "TARGET_P9_VECTOR"
5052   "xsxsigqp %0,%1"
5053   [(set_attr "type" "vecmove")])
5054
5055 ;; VSX Scalar Extract Significand Double-Precision
5056 (define_insn "xsxsigdp"
5057   [(set (match_operand:DI 0 "register_operand" "=r")
5058         (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")]
5059          UNSPEC_VSX_SXSIG))]
5060   "TARGET_P9_VECTOR && TARGET_64BIT"
5061   "xsxsigdp %0,%x1"
5062   [(set_attr "type" "integer")])
5063
5064 ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument
5065 (define_insn "xsiexpqpf_<mode>"
5066   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
5067         (unspec:IEEE128
5068          [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5069           (match_operand:DI 2 "altivec_register_operand" "v")]
5070          UNSPEC_VSX_SIEXPQP))]
5071   "TARGET_P9_VECTOR"
5072   "xsiexpqp %0,%1,%2"
5073   [(set_attr "type" "vecmove")])
5074
5075 ;; VSX Scalar Insert Exponent Quad-Precision
5076 (define_insn "xsiexpqp_<mode>"
5077   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
5078         (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v")
5079                          (match_operand:DI 2 "altivec_register_operand" "v")]
5080          UNSPEC_VSX_SIEXPQP))]
5081   "TARGET_P9_VECTOR"
5082   "xsiexpqp %0,%1,%2"
5083   [(set_attr "type" "vecmove")])
5084
5085 ;; VSX Scalar Insert Exponent Double-Precision
5086 (define_insn "xsiexpdp"
5087   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
5088         (unspec:DF [(match_operand:DI 1 "register_operand" "r")
5089                     (match_operand:DI 2 "register_operand" "r")]
5090          UNSPEC_VSX_SIEXPDP))]
5091   "TARGET_P9_VECTOR && TARGET_64BIT"
5092   "xsiexpdp %x0,%1,%2"
5093   [(set_attr "type" "fpsimple")])
5094
5095 ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument
5096 (define_insn "xsiexpdpf"
5097   [(set (match_operand:DF 0 "vsx_register_operand" "=wa")
5098         (unspec:DF [(match_operand:DF 1 "register_operand" "r")
5099                     (match_operand:DI 2 "register_operand" "r")]
5100          UNSPEC_VSX_SIEXPDP))]
5101   "TARGET_P9_VECTOR && TARGET_64BIT"
5102   "xsiexpdp %x0,%1,%2"
5103   [(set_attr "type" "fpsimple")])
5104
5105 ;; VSX Scalar Compare Exponents Double-Precision
5106 (define_expand "xscmpexpdp_<code>"
5107   [(set (match_dup 3)
5108         (compare:CCFP
5109          (unspec:DF
5110           [(match_operand:DF 1 "vsx_register_operand" "wa")
5111            (match_operand:DF 2 "vsx_register_operand" "wa")]
5112           UNSPEC_VSX_SCMPEXPDP)
5113          (const_int 0)))
5114    (set (match_operand:SI 0 "register_operand" "=r")
5115         (CMP_TEST:SI (match_dup 3)
5116                      (const_int 0)))]
5117   "TARGET_P9_VECTOR"
5118 {
5119   if (<CODE> == UNORDERED && !HONOR_NANS (DFmode))
5120     {
5121       emit_move_insn (operands[0], const0_rtx);
5122       DONE;
5123     }
5124
5125   operands[3] = gen_reg_rtx (CCFPmode);
5126 })
5127
5128 (define_insn "*xscmpexpdp"
5129   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5130         (compare:CCFP
5131          (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa")
5132                      (match_operand:DF 2 "vsx_register_operand" "wa")]
5133           UNSPEC_VSX_SCMPEXPDP)
5134          (match_operand:SI 3 "zero_constant" "j")))]
5135   "TARGET_P9_VECTOR"
5136   "xscmpexpdp %0,%x1,%x2"
5137   [(set_attr "type" "fpcompare")])
5138
5139 ;; VSX Scalar Compare Exponents Quad-Precision
5140 (define_expand "xscmpexpqp_<code>_<mode>"
5141   [(set (match_dup 3)
5142         (compare:CCFP
5143          (unspec:IEEE128
5144           [(match_operand:IEEE128 1 "vsx_register_operand" "v")
5145            (match_operand:IEEE128 2 "vsx_register_operand" "v")]
5146           UNSPEC_VSX_SCMPEXPQP)
5147          (const_int 0)))
5148    (set (match_operand:SI 0 "register_operand" "=r")
5149         (CMP_TEST:SI (match_dup 3)
5150                      (const_int 0)))]
5151   "TARGET_P9_VECTOR"
5152 {
5153   if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode))
5154     {
5155       emit_move_insn (operands[0], const0_rtx);
5156       DONE;
5157     }
5158
5159   operands[3] = gen_reg_rtx (CCFPmode);
5160 })
5161
5162 (define_insn "*xscmpexpqp"
5163   [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
5164         (compare:CCFP
5165          (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5166                           (match_operand:IEEE128 2 "altivec_register_operand" "v")]
5167           UNSPEC_VSX_SCMPEXPQP)
5168          (match_operand:SI 3 "zero_constant" "j")))]
5169   "TARGET_P9_VECTOR"
5170   "xscmpexpqp %0,%1,%2"
5171   [(set_attr "type" "fpcompare")])
5172
5173 ;; VSX Scalar Test Data Class Quad-Precision
5174 ;;  (Expansion for scalar_test_data_class (__ieee128, int))
5175 ;;   (Has side effect of setting the lt bit if operand 1 is negative,
5176 ;;    setting the eq bit if any of the conditions tested by operand 2
5177 ;;    are satisfied, and clearing the gt and undordered bits to zero.)
5178 (define_expand "xststdcqp_<mode>"
5179   [(set (match_dup 3)
5180         (compare:CCFP
5181          (unspec:IEEE128
5182           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5183            (match_operand:SI 2 "u7bit_cint_operand" "n")]
5184           UNSPEC_VSX_STSTDC)
5185          (const_int 0)))
5186    (set (match_operand:SI 0 "register_operand" "=r")
5187         (eq:SI (match_dup 3)
5188                (const_int 0)))]
5189   "TARGET_P9_VECTOR"
5190 {
5191   operands[3] = gen_reg_rtx (CCFPmode);
5192 })
5193
5194 ;; VSX Scalar Test Data Class Double- and Single-Precision
5195 ;;  (The lt bit is set if operand 1 is negative.  The eq bit is set
5196 ;;   if any of the conditions tested by operand 2 are satisfied.
5197 ;;   The gt and unordered bits are cleared to zero.)
5198 (define_expand "xststdc<sd>p"
5199   [(set (match_dup 3)
5200         (compare:CCFP
5201          (unspec:SFDF
5202           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5203            (match_operand:SI 2 "u7bit_cint_operand" "n")]
5204           UNSPEC_VSX_STSTDC)
5205          (match_dup 4)))
5206    (set (match_operand:SI 0 "register_operand" "=r")
5207         (eq:SI (match_dup 3)
5208                (const_int 0)))]
5209   "TARGET_P9_VECTOR"
5210 {
5211   operands[3] = gen_reg_rtx (CCFPmode);
5212   operands[4] = CONST0_RTX (SImode);
5213 })
5214
5215 ;; The VSX Scalar Test Negative Quad-Precision
5216 (define_expand "xststdcnegqp_<mode>"
5217   [(set (match_dup 2)
5218         (compare:CCFP
5219          (unspec:IEEE128
5220           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5221            (const_int 0)]
5222           UNSPEC_VSX_STSTDC)
5223          (const_int 0)))
5224    (set (match_operand:SI 0 "register_operand" "=r")
5225         (lt:SI (match_dup 2)
5226                (const_int 0)))]
5227   "TARGET_P9_VECTOR"
5228 {
5229   operands[2] = gen_reg_rtx (CCFPmode);
5230 })
5231
5232 ;; The VSX Scalar Test Negative Double- and Single-Precision
5233 (define_expand "xststdcneg<sd>p"
5234   [(set (match_dup 2)
5235         (compare:CCFP
5236          (unspec:SFDF
5237           [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5238            (const_int 0)]
5239           UNSPEC_VSX_STSTDC)
5240          (match_dup 3)))
5241    (set (match_operand:SI 0 "register_operand" "=r")
5242         (lt:SI (match_dup 2)
5243                (const_int 0)))]
5244   "TARGET_P9_VECTOR"
5245 {
5246   operands[2] = gen_reg_rtx (CCFPmode);
5247   operands[3] = CONST0_RTX (SImode);
5248 })
5249
5250 (define_insn "*xststdcqp_<mode>"
5251   [(set (match_operand:CCFP 0 "" "=y")
5252         (compare:CCFP
5253          (unspec:IEEE128
5254           [(match_operand:IEEE128 1 "altivec_register_operand" "v")
5255            (match_operand:SI 2 "u7bit_cint_operand" "n")]
5256           UNSPEC_VSX_STSTDC)
5257          (const_int 0)))]
5258   "TARGET_P9_VECTOR"
5259   "xststdcqp %0,%1,%2"
5260   [(set_attr "type" "fpcompare")])
5261
5262 (define_insn "*xststdc<sd>p"
5263   [(set (match_operand:CCFP 0 "" "=y")
5264         (compare:CCFP
5265          (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa")
5266                        (match_operand:SI 2 "u7bit_cint_operand" "n")]
5267           UNSPEC_VSX_STSTDC)
5268          (match_operand:SI 3 "zero_constant" "j")))]
5269   "TARGET_P9_VECTOR"
5270   "xststdc<sd>p %0,%x1,%2"
5271   [(set_attr "type" "fpcompare")])
5272
5273 ;; VSX Vector Extract Exponent Double and Single Precision
5274 (define_insn "xvxexp<sd>p"
5275   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5276         (unspec:VSX_F
5277          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5278          UNSPEC_VSX_VXEXP))]
5279   "TARGET_P9_VECTOR"
5280   "xvxexp<sd>p %x0,%x1"
5281   [(set_attr "type" "vecsimple")])
5282
5283 ;; VSX Vector Extract Significand Double and Single Precision
5284 (define_insn "xvxsig<sd>p"
5285   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5286         (unspec:VSX_F
5287          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")]
5288          UNSPEC_VSX_VXSIG))]
5289   "TARGET_P9_VECTOR"
5290   "xvxsig<sd>p %x0,%x1"
5291   [(set_attr "type" "vecsimple")])
5292
5293 ;; VSX Vector Insert Exponent Double and Single Precision
5294 (define_insn "xviexp<sd>p"
5295   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
5296         (unspec:VSX_F
5297          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5298           (match_operand:VSX_F 2 "vsx_register_operand" "wa")]
5299          UNSPEC_VSX_VIEXP))]
5300   "TARGET_P9_VECTOR"
5301   "xviexp<sd>p %x0,%x1,%x2"
5302   [(set_attr "type" "vecsimple")])
5303
5304 ;; VSX Vector Test Data Class Double and Single Precision
5305 ;; The corresponding elements of the result vector are all ones
5306 ;; if any of the conditions tested by operand 3 are satisfied.
5307 (define_insn "xvtstdc<sd>p"
5308   [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa")
5309         (unspec:<VSI>
5310          [(match_operand:VSX_F 1 "vsx_register_operand" "wa")
5311           (match_operand:SI 2 "u7bit_cint_operand" "n")]
5312          UNSPEC_VSX_VTSTDC))]
5313   "TARGET_P9_VECTOR"
5314   "xvtstdc<sd>p %x0,%x1,%2"
5315   [(set_attr "type" "vecsimple")])
5316
5317 ;; ISA 3.0 String Operations Support
5318
5319 ;; Compare vectors producing a vector result and a predicate, setting CR6
5320 ;; to indicate a combined status.  This pattern matches v16qi, v8hi, and
5321 ;; v4si modes.  It does not match v2df, v4sf, or v2di modes.  There's no
5322 ;; need to match v4sf, v2df, or v2di modes because those are expanded
5323 ;; to use Power8 instructions.
5324 (define_insn "*vsx_ne_<mode>_p"
5325   [(set (reg:CC CR6_REGNO)
5326         (unspec:CC
5327          [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v")
5328                  (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))]
5329          UNSPEC_PREDICATE))
5330    (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v")
5331         (ne:VSX_EXTRACT_I (match_dup 1)
5332                           (match_dup 2)))]
5333   "TARGET_P9_VECTOR"
5334   "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5335   [(set_attr "type" "vecsimple")])
5336
5337 (define_insn "*vector_nez_<mode>_p"
5338   [(set (reg:CC CR6_REGNO)
5339         (unspec:CC [(unspec:VI
5340                      [(match_operand:VI 1 "gpc_reg_operand" "v")
5341                       (match_operand:VI 2 "gpc_reg_operand" "v")]
5342                      UNSPEC_NEZ_P)]
5343          UNSPEC_PREDICATE))
5344    (set (match_operand:VI 0 "gpc_reg_operand" "=v")
5345         (unspec:VI [(match_dup 1)
5346                     (match_dup 2)]
5347          UNSPEC_NEZ_P))]
5348   "TARGET_P9_VECTOR"
5349   "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2"
5350   [(set_attr "type" "vecsimple")])
5351
5352 ;; Return first position of match between vectors using natural order
5353 ;; for both LE and BE execution modes.
5354 (define_expand "first_match_index_<mode>"
5355   [(match_operand:SI 0 "register_operand")
5356    (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5357                (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5358   UNSPEC_VSX_FIRST_MATCH_INDEX)]
5359   "TARGET_P9_VECTOR"
5360 {
5361   int sh;
5362
5363   rtx cmp_result = gen_reg_rtx (<MODE>mode);
5364   rtx not_result = gen_reg_rtx (<MODE>mode);
5365
5366   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5367                                             operands[2]));
5368   emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result));
5369
5370   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5371
5372   if (<MODE>mode == V16QImode)
5373     {
5374       if (!BYTES_BIG_ENDIAN)
5375         emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result));
5376       else
5377         emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result));
5378     }
5379   else
5380     {
5381       rtx tmp = gen_reg_rtx (SImode);
5382       if (!BYTES_BIG_ENDIAN)
5383         emit_insn (gen_vctzlsbb_<mode> (tmp, not_result));
5384       else
5385         emit_insn (gen_vclzlsbb_<mode> (tmp, not_result));
5386       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5387     }
5388   DONE;
5389 })
5390
5391 ;; Return first position of match between vectors or end of string (EOS) using
5392 ;; natural element order for both LE and BE execution modes.
5393 (define_expand "first_match_or_eos_index_<mode>"
5394   [(match_operand:SI 0 "register_operand")
5395    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5396    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5397   UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)]
5398   "TARGET_P9_VECTOR"
5399 {
5400   int sh;
5401   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5402   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5403   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5404   rtx and_result = gen_reg_rtx (<MODE>mode);
5405   rtx result = gen_reg_rtx (<MODE>mode);
5406   rtx vzero = gen_reg_rtx (<MODE>mode);
5407
5408   /* Vector with zeros in elements that correspond to zeros in operands.  */
5409   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5410   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5411   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5412   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5413
5414   /* Vector with ones in elments that do not match.  */
5415   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5416                                              operands[2]));
5417
5418   /* Create vector with ones in elements where there was a zero in one of
5419      the source elements or the elements that match.  */
5420   emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result));
5421   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5422
5423   if (<MODE>mode == V16QImode)
5424     {
5425       if (!BYTES_BIG_ENDIAN)
5426         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5427       else
5428         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5429     }
5430   else
5431     {
5432       rtx tmp = gen_reg_rtx (SImode);
5433       if (!BYTES_BIG_ENDIAN)
5434         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5435       else
5436         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5437       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5438     }
5439   DONE;
5440 })
5441
5442 ;; Return first position of mismatch between vectors using natural
5443 ;; element order for both LE and BE execution modes.
5444 (define_expand "first_mismatch_index_<mode>"
5445   [(match_operand:SI 0 "register_operand")
5446    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5447    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5448   UNSPEC_VSX_FIRST_MISMATCH_INDEX)]
5449   "TARGET_P9_VECTOR"
5450 {
5451   int sh;
5452   rtx cmp_result = gen_reg_rtx (<MODE>mode);
5453
5454   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1],
5455                                             operands[2]));
5456   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5457
5458   if (<MODE>mode == V16QImode)
5459     {
5460       if (!BYTES_BIG_ENDIAN)
5461         emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result));
5462       else
5463         emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result));
5464     }
5465   else
5466     {
5467       rtx tmp = gen_reg_rtx (SImode);
5468       if (!BYTES_BIG_ENDIAN)
5469         emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result));
5470       else
5471         emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result));
5472       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5473     }
5474   DONE;
5475 })
5476
5477 ;; Return first position of mismatch between vectors or end of string (EOS)
5478 ;; using natural element order for both LE and BE execution modes.
5479 (define_expand "first_mismatch_or_eos_index_<mode>"
5480   [(match_operand:SI 0 "register_operand")
5481    (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand")
5482    (match_operand:VSX_EXTRACT_I 2 "register_operand")]
5483   UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)]
5484   "TARGET_P9_VECTOR"
5485 {
5486   int sh;
5487   rtx cmpz1_result = gen_reg_rtx (<MODE>mode);
5488   rtx cmpz2_result = gen_reg_rtx (<MODE>mode);
5489   rtx cmpz_result = gen_reg_rtx (<MODE>mode);
5490   rtx not_cmpz_result = gen_reg_rtx (<MODE>mode);
5491   rtx and_result = gen_reg_rtx (<MODE>mode);
5492   rtx result = gen_reg_rtx (<MODE>mode);
5493   rtx vzero = gen_reg_rtx (<MODE>mode);
5494
5495   /* Vector with zeros in elements that correspond to zeros in operands.  */
5496   emit_move_insn (vzero, CONST0_RTX (<MODE>mode));
5497
5498   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero));
5499   emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero));
5500   emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result));
5501
5502   /* Vector with ones in elments that match.  */
5503   emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1],
5504                                              operands[2]));
5505   emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result));
5506
5507   /* Create vector with ones in elements where there was a zero in one of
5508      the source elements or the elements did not match.  */
5509   emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result));
5510   sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2;
5511
5512   if (<MODE>mode == V16QImode)
5513     {
5514       if (!BYTES_BIG_ENDIAN)
5515         emit_insn (gen_vctzlsbb_<mode> (operands[0], result));
5516       else
5517         emit_insn (gen_vclzlsbb_<mode> (operands[0], result));
5518     }
5519   else
5520     {
5521       rtx tmp = gen_reg_rtx (SImode);
5522       if (!BYTES_BIG_ENDIAN)
5523         emit_insn (gen_vctzlsbb_<mode> (tmp, result));
5524       else
5525         emit_insn (gen_vclzlsbb_<mode> (tmp, result));
5526       emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh)));
5527     }
5528   DONE;
5529 })
5530
5531 ;; Load VSX Vector with Length
5532 (define_expand "lxvl"
5533   [(set (match_dup 3)
5534         (ashift:DI (match_operand:DI 2 "register_operand")
5535                    (const_int 56)))
5536    (set (match_operand:V16QI 0 "vsx_register_operand")
5537         (unspec:V16QI
5538          [(match_operand:DI 1 "gpc_reg_operand")
5539           (mem:V16QI (match_dup 1))
5540           (match_dup 3)]
5541          UNSPEC_LXVL))]
5542   "TARGET_P9_VECTOR && TARGET_64BIT"
5543 {
5544   operands[3] = gen_reg_rtx (DImode);
5545 })
5546
5547 (define_insn "*lxvl"
5548   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5549         (unspec:V16QI
5550          [(match_operand:DI 1 "gpc_reg_operand" "b")
5551           (mem:V16QI (match_dup 1))
5552           (match_operand:DI 2 "register_operand" "r")]
5553          UNSPEC_LXVL))]
5554   "TARGET_P9_VECTOR && TARGET_64BIT"
5555   "lxvl %x0,%1,%2"
5556   [(set_attr "type" "vecload")])
5557
5558 (define_insn "lxvll"
5559   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5560         (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
5561                        (mem:V16QI (match_dup 1))
5562                        (match_operand:DI 2 "register_operand" "r")]
5563                       UNSPEC_LXVLL))]
5564   "TARGET_P9_VECTOR"
5565   "lxvll %x0,%1,%2"
5566   [(set_attr "type" "vecload")])
5567
5568 ;; Expand for builtin xl_len_r
5569 (define_expand "xl_len_r"
5570   [(match_operand:V16QI 0 "vsx_register_operand")
5571    (match_operand:DI 1 "register_operand")
5572    (match_operand:DI 2 "register_operand")]
5573   ""
5574 {
5575   rtx shift_mask = gen_reg_rtx (V16QImode);
5576   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5577   rtx tmp = gen_reg_rtx (DImode);
5578
5579   emit_insn (gen_altivec_lvsl_reg_di (shift_mask, operands[2]));
5580   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5581   emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp));
5582   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp,
5583              shift_mask));
5584   DONE;
5585 })
5586
5587 (define_insn "stxvll"
5588   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5589         (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5590                        (mem:V16QI (match_dup 1))
5591                        (match_operand:DI 2 "register_operand" "r")]
5592                       UNSPEC_STXVLL))]
5593   "TARGET_P9_VECTOR"
5594   "stxvll %x0,%1,%2"
5595   [(set_attr "type" "vecstore")])
5596
5597 ;; Store VSX Vector with Length
5598 (define_expand "stxvl"
5599   [(set (match_dup 3)
5600         (ashift:DI (match_operand:DI 2 "register_operand")
5601                    (const_int 56)))
5602    (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
5603         (unspec:V16QI
5604          [(match_operand:V16QI 0 "vsx_register_operand")
5605           (mem:V16QI (match_dup 1))
5606           (match_dup 3)]
5607          UNSPEC_STXVL))]
5608   "TARGET_P9_VECTOR && TARGET_64BIT"
5609 {
5610   operands[3] = gen_reg_rtx (DImode);
5611 })
5612
5613 ;; Define optab for vector access with length vectorization exploitation.
5614 (define_expand "len_load_v16qi"
5615   [(match_operand:V16QI 0 "vlogical_operand")
5616    (match_operand:V16QI 1 "memory_operand")
5617    (match_operand:QI 2 "gpc_reg_operand")
5618    (match_operand:QI 3 "zero_constant")]
5619   "TARGET_P9_VECTOR && TARGET_64BIT"
5620 {
5621   rtx mem = XEXP (operands[1], 0);
5622   mem = force_reg (DImode, mem);
5623   rtx len = gen_lowpart (DImode, operands[2]);
5624   emit_insn (gen_lxvl (operands[0], mem, len));
5625   DONE;
5626 })
5627
5628 (define_expand "len_store_v16qi"
5629   [(match_operand:V16QI 0 "memory_operand")
5630    (match_operand:V16QI 1 "vlogical_operand")
5631    (match_operand:QI 2 "gpc_reg_operand")
5632    (match_operand:QI 3 "zero_constant")
5633   ]
5634   "TARGET_P9_VECTOR && TARGET_64BIT"
5635 {
5636   rtx mem = XEXP (operands[0], 0);
5637   mem = force_reg (DImode, mem);
5638   rtx len = gen_lowpart (DImode, operands[2]);
5639   emit_insn (gen_stxvl (operands[1], mem, len));
5640   DONE;
5641 })
5642
5643 (define_insn "*stxvl"
5644   [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
5645         (unspec:V16QI
5646          [(match_operand:V16QI 0 "vsx_register_operand" "wa")
5647           (mem:V16QI (match_dup 1))
5648           (match_operand:DI 2 "register_operand" "r")]
5649          UNSPEC_STXVL))]
5650   "TARGET_P9_VECTOR && TARGET_64BIT"
5651   "stxvl %x0,%1,%2"
5652   [(set_attr "type" "vecstore")])
5653
5654 ;; Expand for builtin xst_len_r
5655 (define_expand "xst_len_r"
5656   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
5657    (match_operand:DI 1 "register_operand" "b")
5658    (match_operand:DI 2 "register_operand" "r")]
5659   "UNSPEC_XST_LEN_R"
5660 {
5661   rtx shift_mask = gen_reg_rtx (V16QImode);
5662   rtx rtx_vtmp = gen_reg_rtx (V16QImode);
5663   rtx tmp = gen_reg_rtx (DImode);
5664
5665   emit_insn (gen_altivec_lvsr_reg_di (shift_mask, operands[2]));
5666   emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0],
5667              shift_mask));
5668   emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56)));
5669   emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp));
5670   DONE;
5671 })
5672
5673 ;; Vector Compare Not Equal Byte (specified/not+eq:)
5674 (define_insn "vcmpneb"
5675   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5676          (not:V16QI
5677            (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v")
5678                      (match_operand:V16QI 2 "altivec_register_operand" "v"))))]
5679   "TARGET_P9_VECTOR"
5680   "vcmpneb %0,%1,%2"
5681   [(set_attr "type" "vecsimple")])
5682
5683 ;; Vector Compare Not Equal v1ti (specified/not+eq:)
5684 (define_expand "vcmpnet"
5685   [(set (match_operand:V1TI 0 "altivec_register_operand")
5686         (not:V1TI
5687           (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand")
5688                    (match_operand:V1TI 2 "altivec_register_operand"))))]
5689    "TARGET_POWER10"
5690 {
5691   emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2]));
5692   emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0]));
5693   DONE;
5694 })
5695
5696 ;; Vector Compare Not Equal or Zero Byte
5697 (define_insn "vcmpnezb"
5698   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5699         (unspec:V16QI
5700          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5701           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5702          UNSPEC_VCMPNEZB))]
5703   "TARGET_P9_VECTOR"
5704   "vcmpnezb %0,%1,%2"
5705   [(set_attr "type" "vecsimple")])
5706
5707 ;; Vector Compare Not Equal or Zero Byte predicate or record-form
5708 (define_insn "vcmpnezb_p"
5709   [(set (reg:CC CR6_REGNO)
5710         (unspec:CC
5711          [(match_operand:V16QI 1 "altivec_register_operand" "v")
5712           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5713          UNSPEC_VCMPNEZB))
5714    (set (match_operand:V16QI 0 "altivec_register_operand" "=v")
5715         (unspec:V16QI
5716          [(match_dup 1)
5717           (match_dup 2)]
5718          UNSPEC_VCMPNEZB))]
5719   "TARGET_P9_VECTOR"
5720   "vcmpnezb. %0,%1,%2"
5721   [(set_attr "type" "vecsimple")])
5722
5723 ;; Vector Compare Not Equal Half Word (specified/not+eq:)
5724 (define_insn "vcmpneh"
5725   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5726         (not:V8HI
5727           (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v")
5728                    (match_operand:V8HI 2 "altivec_register_operand" "v"))))]
5729   "TARGET_P9_VECTOR"
5730   "vcmpneh %0,%1,%2"
5731   [(set_attr "type" "vecsimple")])
5732
5733 ;; Vector Compare Not Equal or Zero Half Word
5734 (define_insn "vcmpnezh"
5735   [(set (match_operand:V8HI 0 "altivec_register_operand" "=v")
5736         (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v")
5737                       (match_operand:V8HI 2 "altivec_register_operand" "v")]
5738          UNSPEC_VCMPNEZH))]
5739   "TARGET_P9_VECTOR"
5740   "vcmpnezh %0,%1,%2"
5741   [(set_attr "type" "vecsimple")])
5742
5743 ;; Vector Compare Not Equal Word (specified/not+eq:)
5744 (define_insn "vcmpnew"
5745   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5746         (not:V4SI
5747           (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v")
5748                    (match_operand:V4SI 2 "altivec_register_operand" "v"))))]
5749   "TARGET_P9_VECTOR"
5750   "vcmpnew %0,%1,%2"
5751   [(set_attr "type" "vecsimple")])
5752
5753 ;; Vector Compare Not Equal or Zero Word
5754 (define_insn "vcmpnezw"
5755   [(set (match_operand:V4SI 0 "altivec_register_operand" "=v")
5756         (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v")
5757                       (match_operand:V4SI 2 "altivec_register_operand" "v")]
5758          UNSPEC_VCMPNEZW))]
5759   "TARGET_P9_VECTOR"
5760   "vcmpnezw %0,%1,%2"
5761   [(set_attr "type" "vecsimple")])
5762
5763 ;; Vector Count Leading Zero Least-Significant Bits Byte
5764 (define_insn "vclzlsbb_<mode>"
5765   [(set (match_operand:SI 0 "register_operand" "=r")
5766         (unspec:SI
5767          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5768          UNSPEC_VCLZLSBB))]
5769   "TARGET_P9_VECTOR"
5770   "vclzlsbb %0,%1"
5771   [(set_attr "type" "vecsimple")])
5772
5773 ;; Vector Count Trailing Zero Least-Significant Bits Byte
5774 (define_insn "vctzlsbb_<mode>"
5775   [(set (match_operand:SI 0 "register_operand" "=r")
5776         (unspec:SI
5777          [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
5778          UNSPEC_VCTZLSBB))]
5779   "TARGET_P9_VECTOR"
5780   "vctzlsbb %0,%1"
5781   [(set_attr "type" "vecsimple")])
5782
5783 ;; Vector Extract Unsigned Byte Left-Indexed
5784 (define_insn "vextublx"
5785   [(set (match_operand:SI 0 "register_operand" "=r")
5786         (unspec:SI
5787          [(match_operand:SI 1 "register_operand" "r")
5788           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5789          UNSPEC_VEXTUBLX))]
5790   "TARGET_P9_VECTOR"
5791   "vextublx %0,%1,%2"
5792   [(set_attr "type" "vecsimple")])
5793
5794 ;; Vector Extract Unsigned Byte Right-Indexed
5795 (define_insn "vextubrx"
5796   [(set (match_operand:SI 0 "register_operand" "=r")
5797         (unspec:SI
5798          [(match_operand:SI 1 "register_operand" "r")
5799           (match_operand:V16QI 2 "altivec_register_operand" "v")]
5800          UNSPEC_VEXTUBRX))]
5801   "TARGET_P9_VECTOR"
5802   "vextubrx %0,%1,%2"
5803   [(set_attr "type" "vecsimple")])
5804
5805 ;; Vector Extract Unsigned Half Word Left-Indexed
5806 (define_insn "vextuhlx"
5807   [(set (match_operand:SI 0 "register_operand" "=r")
5808         (unspec:SI
5809          [(match_operand:SI 1 "register_operand" "r")
5810           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5811          UNSPEC_VEXTUHLX))]
5812   "TARGET_P9_VECTOR"
5813   "vextuhlx %0,%1,%2"
5814   [(set_attr "type" "vecsimple")])
5815
5816 ;; Vector Extract Unsigned Half Word Right-Indexed
5817 (define_insn "vextuhrx"
5818   [(set (match_operand:SI 0 "register_operand" "=r")
5819         (unspec:SI
5820          [(match_operand:SI 1 "register_operand" "r")
5821           (match_operand:V8HI 2 "altivec_register_operand" "v")]
5822          UNSPEC_VEXTUHRX))]
5823   "TARGET_P9_VECTOR"
5824   "vextuhrx %0,%1,%2"
5825   [(set_attr "type" "vecsimple")])
5826
5827 ;; Vector Extract Unsigned Word Left-Indexed
5828 (define_insn "vextuwlx"
5829   [(set (match_operand:SI 0 "register_operand" "=r")
5830         (unspec:SI
5831          [(match_operand:SI 1 "register_operand" "r")
5832           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5833          UNSPEC_VEXTUWLX))]
5834   "TARGET_P9_VECTOR"
5835   "vextuwlx %0,%1,%2"
5836   [(set_attr "type" "vecsimple")])
5837
5838 ;; Vector Extract Unsigned Word Right-Indexed
5839 (define_insn "vextuwrx"
5840   [(set (match_operand:SI 0 "register_operand" "=r")
5841         (unspec:SI
5842          [(match_operand:SI 1 "register_operand" "r")
5843           (match_operand:V4SI 2 "altivec_register_operand" "v")]
5844          UNSPEC_VEXTUWRX))]
5845   "TARGET_P9_VECTOR"
5846   "vextuwrx %0,%1,%2"
5847   [(set_attr "type" "vecsimple")])
5848
5849 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
5850 ;; endian version needs to adjust the byte number, and the V4SI element in
5851 ;; vinsert4b.
5852 (define_insn "extract4b"
5853   [(set (match_operand:V2DI 0 "vsx_register_operand")
5854        (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
5855                      (match_operand:QI 2 "const_0_to_12_operand" "n")]
5856                     UNSPEC_XXEXTRACTUW))]
5857   "TARGET_P9_VECTOR"
5858 {
5859   if (!BYTES_BIG_ENDIAN)
5860     operands[2] = GEN_INT (12 - INTVAL (operands[2]));
5861
5862   return "xxextractuw %x0,%x1,%2";
5863 })
5864
5865 (define_expand "insert4b"
5866   [(set (match_operand:V16QI 0 "vsx_register_operand")
5867         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
5868                        (match_operand:V16QI 2 "vsx_register_operand")
5869                        (match_operand:QI 3 "const_0_to_12_operand")]
5870                    UNSPEC_XXINSERTW))]
5871   "TARGET_P9_VECTOR"
5872 {
5873   if (!BYTES_BIG_ENDIAN)
5874     {
5875       rtx op1 = operands[1];
5876       rtx v4si_tmp = gen_reg_rtx (V4SImode);
5877       emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
5878       operands[1] = v4si_tmp;
5879       operands[3] = GEN_INT (12 - INTVAL (operands[3]));
5880     }
5881 })
5882
5883 (define_insn "*insert4b_internal"
5884   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
5885         (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
5886                        (match_operand:V16QI 2 "vsx_register_operand" "0")
5887                        (match_operand:QI 3 "const_0_to_12_operand" "n")]
5888                    UNSPEC_XXINSERTW))]
5889   "TARGET_P9_VECTOR"
5890   "xxinsertw %x0,%x1,%3"
5891   [(set_attr "type" "vecperm")])
5892
5893
5894 ;; Generate vector extract four float 32 values from left four elements
5895 ;; of eight element vector of float 16 values.
5896 (define_expand "vextract_fp_from_shorth"
5897   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5898         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5899    UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
5900   "TARGET_P9_VECTOR"
5901 {
5902   int i;
5903   int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
5904   int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
5905
5906   rtx rvals[16];
5907   rtx mask = gen_reg_rtx (V16QImode);
5908   rtx tmp = gen_reg_rtx (V16QImode);
5909   rtvec v;
5910
5911   for (i = 0; i < 16; i++)
5912     if (!BYTES_BIG_ENDIAN)
5913       rvals[i] = GEN_INT (vals_le[i]);
5914     else
5915       rvals[i] = GEN_INT (vals_be[i]);
5916
5917   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5918      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5919      src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the
5920      conversion instruction.  */
5921   v = gen_rtvec_v (16, rvals);
5922   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5923   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5924                                           operands[1], mask));
5925   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5926   DONE;
5927 })
5928
5929 ;; Generate vector extract four float 32 values from right four elements
5930 ;; of eight element vector of float 16 values.
5931 (define_expand "vextract_fp_from_shortl"
5932   [(set (match_operand:V4SF 0 "register_operand" "=wa")
5933         (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
5934         UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
5935   "TARGET_P9_VECTOR"
5936 {
5937   int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
5938   int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
5939
5940   int i;
5941   rtx rvals[16];
5942   rtx mask = gen_reg_rtx (V16QImode);
5943   rtx tmp = gen_reg_rtx (V16QImode);
5944   rtvec v;
5945
5946   for (i = 0; i < 16; i++)
5947     if (!BYTES_BIG_ENDIAN)
5948       rvals[i] = GEN_INT (vals_le[i]);
5949     else
5950       rvals[i] = GEN_INT (vals_be[i]);
5951
5952   /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
5953      inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
5954      src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the
5955      conversion instruction.  */
5956   v = gen_rtvec_v (16, rvals);
5957   emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
5958   emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
5959                                           operands[1], mask));
5960   emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
5961   DONE;
5962 })
5963
5964 ;; Support for ISA 3.0 vector byte reverse
5965
5966 ;; Swap all bytes with in a vector
5967 (define_insn "p9_xxbrq_v1ti"
5968   [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa")
5969         (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))]
5970   "TARGET_P9_VECTOR"
5971   "xxbrq %x0,%x1"
5972   [(set_attr "type" "vecperm")])
5973
5974 (define_expand "p9_xxbrq_v16qi"
5975   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
5976    (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
5977   "TARGET_P9_VECTOR"
5978 {
5979   rtx op0 = gen_reg_rtx (V1TImode);
5980   rtx op1 = gen_lowpart (V1TImode, operands[1]);
5981   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
5982   emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
5983   DONE;
5984 })
5985
5986 ;; Swap all bytes in each 64-bit element
5987 (define_insn "p9_xxbrd_v2di"
5988   [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
5989         (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
5990   "TARGET_P9_VECTOR"
5991   "xxbrd %x0,%x1"
5992   [(set_attr "type" "vecperm")])
5993
5994 (define_expand "p9_xxbrd_v2df"
5995   [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
5996    (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
5997   "TARGET_P9_VECTOR"
5998 {
5999   rtx op0 = gen_reg_rtx (V2DImode);
6000   rtx op1 = gen_lowpart (V2DImode, operands[1]);
6001   emit_insn (gen_p9_xxbrd_v2di (op0, op1));
6002   emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
6003   DONE;
6004 })
6005
6006 ;; Swap all bytes in each 32-bit element
6007 (define_insn "p9_xxbrw_v4si"
6008   [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
6009         (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
6010   "TARGET_P9_VECTOR"
6011   "xxbrw %x0,%x1"
6012   [(set_attr "type" "vecperm")])
6013
6014 (define_expand "p9_xxbrw_v4sf"
6015   [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
6016    (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
6017   "TARGET_P9_VECTOR"
6018 {
6019   rtx op0 = gen_reg_rtx (V4SImode);
6020   rtx op1 = gen_lowpart (V4SImode, operands[1]);
6021   emit_insn (gen_p9_xxbrw_v4si (op0, op1));
6022   emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
6023   DONE;
6024 })
6025
6026 ;; Swap all bytes in each element of vector
6027 (define_expand "revb_<mode>"
6028   [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
6029    (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
6030   ""
6031 {
6032   if (TARGET_P9_VECTOR)
6033     emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1]));
6034   else
6035     {
6036       if (<MODE>mode == V8HImode)
6037         {
6038           rtx splt = gen_reg_rtx (V8HImode);
6039           emit_insn (gen_altivec_vspltish (splt, GEN_INT (8)));
6040           emit_insn (gen_altivec_vrlh (operands[0], operands[1], splt));
6041         }
6042       else
6043         {
6044           /* Want to have the elements in reverse order relative
6045              to the endian mode in use, i.e. in LE mode, put elements
6046              in BE order.  */
6047           rtx sel = swap_endian_selector_for_mode (<MODE>mode);
6048           emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], operands[1],
6049                                                       operands[1], sel));
6050         }
6051     }
6052
6053   DONE;
6054 })
6055
6056 ;; Reversing bytes in vector char is just a NOP.
6057 (define_expand "revb_v16qi"
6058   [(set (match_operand:V16QI 0 "vsx_register_operand")
6059         (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))]
6060   ""
6061 {
6062   emit_move_insn (operands[0], operands[1]);
6063   DONE;
6064 })
6065
6066 ;; Swap all bytes in each 16-bit element
6067 (define_insn "p9_xxbrh_v8hi"
6068   [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa")
6069         (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))]
6070   "TARGET_P9_VECTOR"
6071   "xxbrh %x0,%x1"
6072   [(set_attr "type" "vecperm")])
6073 \f
6074
6075 ;; Operand numbers for the following peephole2
6076 (define_constants
6077   [(SFBOOL_TMP_GPR               0)             ;; GPR temporary
6078    (SFBOOL_TMP_VSX               1)             ;; vector temporary
6079    (SFBOOL_MFVSR_D               2)             ;; move to gpr dest
6080    (SFBOOL_MFVSR_A               3)             ;; move to gpr src
6081    (SFBOOL_BOOL_D                4)             ;; and/ior/xor dest
6082    (SFBOOL_BOOL_A1               5)             ;; and/ior/xor arg1
6083    (SFBOOL_BOOL_A2               6)             ;; and/ior/xor arg1
6084    (SFBOOL_SHL_D                 7)             ;; shift left dest
6085    (SFBOOL_SHL_A                 8)             ;; shift left arg
6086    (SFBOOL_MTVSR_D               9)             ;; move to vecter dest
6087    (SFBOOL_MFVSR_A_V4SF         10)             ;; SFBOOL_MFVSR_A as V4SFmode
6088    (SFBOOL_BOOL_A_DI            11)             ;; SFBOOL_BOOL_A1/A2 as DImode
6089    (SFBOOL_TMP_VSX_DI           12)             ;; SFBOOL_TMP_VSX as DImode
6090    (SFBOOL_MTVSR_D_V4SF         13)])           ;; SFBOOL_MTVSRD_D as V4SFmode
6091
6092 ;; Attempt to optimize some common GLIBC operations using logical operations to
6093 ;; pick apart SFmode operations.  For example, there is code from e_powf.c
6094 ;; after macro expansion that looks like:
6095 ;;
6096 ;;      typedef union {
6097 ;;        float value;
6098 ;;        uint32_t word;
6099 ;;      } ieee_float_shape_type;
6100 ;;
6101 ;;      float t1;
6102 ;;      int32_t is;
6103 ;;
6104 ;;      do {
6105 ;;        ieee_float_shape_type gf_u;
6106 ;;        gf_u.value = (t1);
6107 ;;        (is) = gf_u.word;
6108 ;;      } while (0);
6109 ;;
6110 ;;      do {
6111 ;;        ieee_float_shape_type sf_u;
6112 ;;        sf_u.word = (is & 0xfffff000);
6113 ;;        (t1) = sf_u.value;
6114 ;;      } while (0);
6115 ;;
6116 ;;
6117 ;; This would result in two direct move operations (convert to memory format,
6118 ;; direct move to GPR, do the AND operation, direct move to VSX, convert to
6119 ;; scalar format).  With this peephole, we eliminate the direct move to the
6120 ;; GPR, and instead move the integer mask value to the vector register after a
6121 ;; shift and do the VSX logical operation.
6122
6123 ;; The insns for dealing with SFmode in GPR registers looks like:
6124 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
6125 ;;
6126 ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
6127 ;;
6128 ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
6129 ;;
6130 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
6131 ;;
6132 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
6133 ;;
6134 ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
6135
6136 (define_peephole2
6137   [(match_scratch:DI SFBOOL_TMP_GPR "r")
6138    (match_scratch:V4SF SFBOOL_TMP_VSX "wa")
6139
6140    ;; MFVSRWZ (aka zero_extend)
6141    (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand")
6142         (zero_extend:DI
6143          (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand")))
6144
6145    ;; AND/IOR/XOR operation on int
6146    (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand")
6147         (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand")
6148                         (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand")))
6149
6150    ;; SLDI
6151    (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand")
6152         (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand")
6153                    (const_int 32)))
6154
6155    ;; MTVSRD
6156    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
6157         (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
6158
6159   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
6160    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
6161       to compare registers, when the mode is different.  */
6162    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
6163    && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
6164    && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
6165    && (REG_P (operands[SFBOOL_BOOL_A2])
6166        || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
6167    && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
6168        || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
6169    && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
6170        || (REG_P (operands[SFBOOL_BOOL_A2])
6171            && REGNO (operands[SFBOOL_MFVSR_D])
6172                 == REGNO (operands[SFBOOL_BOOL_A2])))
6173    && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
6174    && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
6175        || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
6176    && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
6177   [(set (match_dup SFBOOL_TMP_GPR)
6178         (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
6179                    (const_int 32)))
6180
6181    (set (match_dup SFBOOL_TMP_VSX_DI)
6182         (match_dup SFBOOL_TMP_GPR))
6183
6184    (set (match_dup SFBOOL_MTVSR_D_V4SF)
6185         (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF)
6186                           (match_dup SFBOOL_TMP_VSX)))]
6187 {
6188   rtx bool_a1 = operands[SFBOOL_BOOL_A1];
6189   rtx bool_a2 = operands[SFBOOL_BOOL_A2];
6190   int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]);
6191   int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]);
6192   int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]);
6193   int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]);
6194
6195   if (CONST_INT_P (bool_a2))
6196     {
6197       rtx tmp_gpr = operands[SFBOOL_TMP_GPR];
6198       emit_move_insn (tmp_gpr, bool_a2);
6199       operands[SFBOOL_BOOL_A_DI] = tmp_gpr;
6200     }
6201   else
6202     {
6203       int regno_bool_a1 = REGNO (bool_a1);
6204       int regno_bool_a2 = REGNO (bool_a2);
6205       int regno_bool_a = (regno_mfvsr_d == regno_bool_a1
6206                           ? regno_bool_a2 : regno_bool_a1);
6207       operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a);
6208     }
6209
6210   operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a);
6211   operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx);
6212   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
6213 })
6214
6215 ;; Support signed/unsigned long long to float conversion vectorization.
6216 ;; Note that any_float (pc) here is just for code attribute <su>.
6217 (define_expand "vec_pack<su>_float_v2di"
6218   [(match_operand:V4SF 0 "vfloat_operand")
6219    (match_operand:V2DI 1 "vint_operand")
6220    (match_operand:V2DI 2 "vint_operand")
6221    (any_float (pc))]
6222   "TARGET_VSX"
6223 {
6224   rtx r1 = gen_reg_rtx (V4SFmode);
6225   rtx r2 = gen_reg_rtx (V4SFmode);
6226   emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1]));
6227   emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2]));
6228   rs6000_expand_extract_even (operands[0], r1, r2);
6229   DONE;
6230 })
6231
6232 ;; Support float to signed/unsigned long long conversion vectorization.
6233 ;; Note that any_fix (pc) here is just for code attribute <su>.
6234 (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf"
6235   [(match_operand:V2DI 0 "vint_operand")
6236    (match_operand:V4SF 1 "vfloat_operand")
6237    (any_fix (pc))]
6238   "TARGET_VSX"
6239 {
6240   rtx reg = gen_reg_rtx (V4SFmode);
6241   rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN);
6242   emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6243   DONE;
6244 })
6245
6246 ;; Note that any_fix (pc) here is just for code attribute <su>.
6247 (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf"
6248   [(match_operand:V2DI 0 "vint_operand")
6249    (match_operand:V4SF 1 "vfloat_operand")
6250    (any_fix (pc))]
6251   "TARGET_VSX"
6252 {
6253   rtx reg = gen_reg_rtx (V4SFmode);
6254   rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN);
6255   emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg));
6256   DONE;
6257 })
6258
6259 (define_insn "vsx_<xvcvbf16>"
6260   [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
6261         (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")]
6262                       XVCVBF16))]
6263   "TARGET_POWER10"
6264   "<xvcvbf16> %x0,%x1"
6265   [(set_attr "type" "vecfloat")])
6266
6267 (define_insn "vec_mtvsrbmi"
6268   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
6269         (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
6270         UNSPEC_MTVSBM))]
6271   "TARGET_POWER10"
6272   "mtvsrbmi %0,%1"
6273 )
6274
6275 (define_insn "vec_mtvsr_<mode>"
6276   [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v")
6277         (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")]
6278         UNSPEC_MTVSBM))]
6279   "TARGET_POWER10"
6280   "mtvsr<wd>m %0,%1";
6281   [(set_attr "type" "vecsimple")])
6282
6283 (define_insn "vec_cntmb_<mode>"
6284   [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
6285         (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v")
6286                     (match_operand:QI 2 "const_0_to_1_operand" "n")]
6287         UNSPEC_VCNTMB))]
6288   "TARGET_POWER10"
6289   "vcntmb<wd> %0,%1,%2"
6290   [(set_attr "type" "vecsimple")])
6291
6292 (define_insn "vec_extract_<mode>"
6293   [(set (match_operand:SI 0 "register_operand" "=r")
6294         (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
6295         UNSPEC_VEXTRACT))]
6296   "TARGET_POWER10"
6297   "vextract<wd>m %0,%1"
6298   [(set_attr "type" "vecsimple")])
6299
6300 (define_insn "vec_expand_<mode>"
6301   [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v")
6302         (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
6303         UNSPEC_VEXPAND))]
6304   "TARGET_POWER10"
6305   "vexpand<wd>m %0,%1"
6306   [(set_attr "type" "vecsimple")])
6307
6308 (define_insn "dives_<mode>"
6309   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6310         (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6311                         (match_operand:VIlong 2 "vsx_register_operand" "v")]
6312         UNSPEC_VDIVES))]
6313   "TARGET_POWER10"
6314   "vdives<wd> %0,%1,%2"
6315   [(set_attr "type" "vecdiv")
6316    (set_attr "size" "<bits>")])
6317
6318 (define_insn "diveu_<mode>"
6319   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6320         (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v")
6321                         (match_operand:VIlong 2 "vsx_register_operand" "v")]
6322         UNSPEC_VDIVEU))]
6323   "TARGET_POWER10"
6324   "vdiveu<wd> %0,%1,%2"
6325   [(set_attr "type" "vecdiv")
6326    (set_attr "size" "<bits>")])
6327
6328 (define_insn "div<mode>3"
6329   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6330         (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6331                     (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6332   "TARGET_POWER10"
6333   "vdivs<wd> %0,%1,%2"
6334   [(set_attr "type" "vecdiv")
6335    (set_attr "size" "<bits>")])
6336
6337 (define_insn "udiv<mode>3"
6338   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6339         (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6340                     (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6341   "TARGET_POWER10"
6342   "vdivu<wd> %0,%1,%2"
6343   [(set_attr "type" "vecdiv")
6344    (set_attr "size" "<bits>")])
6345
6346 (define_insn "mod<mode>3"
6347   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6348         (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6349                     (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6350   "TARGET_POWER10"
6351   "vmods<wd> %0,%1,%2"
6352   [(set_attr "type" "vecdiv")
6353    (set_attr "size" "<bits>")])
6354
6355 (define_insn "umod<mode>3"
6356   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6357         (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
6358                      (match_operand:VIlong 2 "vsx_register_operand" "v")))]
6359   "TARGET_POWER10"
6360   "vmodu<wd> %0,%1,%2"
6361   [(set_attr "type" "vecdiv")
6362    (set_attr "size" "<bits>")])
6363
6364 (define_insn "smul<mode>3_highpart"
6365   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6366         (mult:VIlong (ashiftrt
6367                        (match_operand:VIlong 1 "vsx_register_operand" "v")
6368                        (const_int 32))
6369                      (ashiftrt
6370                        (match_operand:VIlong 2 "vsx_register_operand" "v")
6371                        (const_int 32))))]
6372   "TARGET_POWER10"
6373   "vmulhs<wd> %0,%1,%2"
6374   [(set_attr "type" "veccomplex")])
6375
6376 (define_insn "umul<mode>3_highpart"
6377   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
6378         (us_mult:VIlong (ashiftrt
6379                           (match_operand:VIlong 1 "vsx_register_operand" "v")
6380                           (const_int 32))
6381                         (ashiftrt
6382                           (match_operand:VIlong 2 "vsx_register_operand" "v")
6383                           (const_int 32))))]
6384   "TARGET_POWER10"
6385   "vmulhu<wd> %0,%1,%2"
6386   [(set_attr "type" "veccomplex")])
6387
6388 ;; Vector multiply low double word
6389 (define_insn "mulv2di3"
6390   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
6391         (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v")
6392                    (match_operand:V2DI 2 "vsx_register_operand" "v")))]
6393   "TARGET_POWER10"
6394   "vmulld %0,%1,%2"
6395   [(set_attr "type" "veccomplex")])
6396
6397 \f
6398 ;; XXSPLTIW built-in function support
6399 (define_insn "xxspltiw_v4si"
6400   [(set (match_operand:V4SI 0 "register_operand" "=wa")
6401         (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")]
6402                      UNSPEC_XXSPLTIW))]
6403  "TARGET_POWER10"
6404  "xxspltiw %x0,%1"
6405  [(set_attr "type" "vecperm")
6406   (set_attr "prefixed" "yes")])
6407
6408 (define_expand "xxspltiw_v4sf"
6409   [(set (match_operand:V4SF 0 "register_operand" "=wa")
6410         (unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")]
6411                      UNSPEC_XXSPLTIW))]
6412  "TARGET_POWER10"
6413 {
6414   long value = rs6000_const_f32_to_i32 (operands[1]);
6415   emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value)));
6416   DONE;
6417 })
6418
6419 (define_insn "xxspltiw_v4sf_inst"
6420   [(set (match_operand:V4SF 0 "register_operand" "=wa")
6421         (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
6422                      UNSPEC_XXSPLTIW))]
6423  "TARGET_POWER10"
6424  "xxspltiw %x0,%1"
6425  [(set_attr "type" "vecperm")
6426   (set_attr "prefixed" "yes")])
6427
6428 ;; XXSPLTIDP built-in function support
6429 (define_expand "xxspltidp_v2df"
6430   [(set (match_operand:V2DF 0 "register_operand" )
6431         (unspec:V2DF [(match_operand:SF 1 "const_double_operand")]
6432                      UNSPEC_XXSPLTIDP))]
6433  "TARGET_POWER10"
6434 {
6435   long value = rs6000_const_f32_to_i32 (operands[1]);
6436   rs6000_emit_xxspltidp_v2df (operands[0], value);
6437   DONE;
6438 })
6439
6440 (define_insn "xxspltidp_v2df_inst"
6441   [(set (match_operand:V2DF 0 "register_operand" "=wa")
6442         (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")]
6443                      UNSPEC_XXSPLTIDP))]
6444   "TARGET_POWER10"
6445   "xxspltidp %x0,%1"
6446   [(set_attr "type" "vecperm")
6447    (set_attr "prefixed" "yes")])
6448
6449 ;; XXSPLTI32DX built-in function support
6450 (define_expand "xxsplti32dx_v4si"
6451   [(set (match_operand:V4SI 0 "register_operand" "=wa")
6452         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6453                       (match_operand:QI 2 "u1bit_cint_operand" "n")
6454                       (match_operand:SI 3 "s32bit_cint_operand" "n")]
6455                      UNSPEC_XXSPLTI32DX))]
6456  "TARGET_POWER10"
6457 {
6458   int index = INTVAL (operands[2]);
6459
6460   if (!BYTES_BIG_ENDIAN)
6461     index = 1 - index;
6462
6463    emit_insn (gen_xxsplti32dx_v4si_inst (operands[0], operands[1],
6464                                          GEN_INT (index), operands[3]));
6465    DONE;
6466 }
6467  [(set_attr "type" "vecperm")])
6468
6469 (define_insn "xxsplti32dx_v4si_inst"
6470   [(set (match_operand:V4SI 0 "register_operand" "=wa")
6471         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6472                       (match_operand:QI 2 "u1bit_cint_operand" "n")
6473                       (match_operand:SI 3 "s32bit_cint_operand" "n")]
6474                      UNSPEC_XXSPLTI32DX))]
6475   "TARGET_POWER10"
6476   "xxsplti32dx %x0,%2,%3"
6477   [(set_attr "type" "vecperm")
6478    (set_attr "prefixed" "yes")])
6479
6480 (define_expand "xxsplti32dx_v4sf"
6481   [(set (match_operand:V4SF 0 "register_operand" "=wa")
6482         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
6483                       (match_operand:QI 2 "u1bit_cint_operand" "n")
6484                       (match_operand:SF 3 "const_double_operand" "n")]
6485                      UNSPEC_XXSPLTI32DX))]
6486   "TARGET_POWER10"
6487 {
6488   int index = INTVAL (operands[2]);
6489   long value = rs6000_const_f32_to_i32 (operands[3]);
6490   if (!BYTES_BIG_ENDIAN)
6491     index = 1 - index;
6492
6493    emit_insn (gen_xxsplti32dx_v4sf_inst (operands[0], operands[1],
6494                                          GEN_INT (index), GEN_INT (value)));
6495    DONE;
6496 })
6497
6498 (define_insn "xxsplti32dx_v4sf_inst"
6499   [(set (match_operand:V4SF 0 "register_operand" "=wa")
6500         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
6501                       (match_operand:QI 2 "u1bit_cint_operand" "n")
6502                       (match_operand:SI 3 "s32bit_cint_operand" "n")]
6503                      UNSPEC_XXSPLTI32DX))]
6504   "TARGET_POWER10"
6505   "xxsplti32dx %x0,%2,%3"
6506   [(set_attr "type" "vecperm")
6507    (set_attr "prefixed" "yes")])
6508
6509 ;; XXBLEND built-in function support
6510 (define_insn "xxblend_<mode>"
6511   [(set (match_operand:VM3 0 "register_operand" "=wa")
6512         (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa")
6513                      (match_operand:VM3 2 "register_operand" "wa")
6514                      (match_operand:VM3 3 "register_operand" "wa")]
6515                     UNSPEC_XXBLEND))]
6516   "TARGET_POWER10"
6517   "xxblendv<VM3_char> %x0,%x1,%x2,%x3"
6518   [(set_attr "type" "vecperm")
6519    (set_attr "prefixed" "yes")])
6520
6521 ;; XXPERMX built-in function support
6522 (define_expand "xxpermx"
6523   [(set (match_operand:V2DI 0 "register_operand" "+wa")
6524         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
6525                       (match_operand:V2DI 2 "register_operand" "wa")
6526                       (match_operand:V16QI 3 "register_operand" "wa")
6527                       (match_operand:QI 4 "u8bit_cint_operand" "n")]
6528                      UNSPEC_XXPERMX))]
6529   "TARGET_POWER10"
6530 {
6531   if (BYTES_BIG_ENDIAN)
6532     emit_insn (gen_xxpermx_inst (operands[0], operands[1],
6533                                  operands[2], operands[3],
6534                                  operands[4]));
6535   else
6536     {
6537       /* Reverse value of byte element indexes by XORing with 0xFF.
6538          Reverse the 32-byte section identifier match by subracting bits [0:2]
6539          of elemet from 7.  */
6540       int value = INTVAL (operands[4]);
6541       rtx vreg = gen_reg_rtx (V16QImode);
6542
6543       emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1)));
6544       emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg));
6545       value = 7 - value;
6546       emit_insn (gen_xxpermx_inst (operands[0], operands[2],
6547                                    operands[1], operands[3],
6548                                    GEN_INT (value)));
6549     }
6550
6551   DONE;
6552 }
6553   [(set_attr "type" "vecperm")])
6554
6555 (define_insn "xxpermx_inst"
6556   [(set (match_operand:V2DI 0 "register_operand" "+v")
6557         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
6558                       (match_operand:V2DI 2 "register_operand" "v")
6559                       (match_operand:V16QI 3 "register_operand" "v")
6560                       (match_operand:QI 4 "u3bit_cint_operand" "n")]
6561                      UNSPEC_XXPERMX))]
6562   "TARGET_POWER10"
6563   "xxpermx %x0,%x1,%x2,%x3,%4"
6564   [(set_attr "type" "vecperm")
6565    (set_attr "prefixed" "yes")])
6566
6567 ;; XXEVAL built-in function support
6568 (define_insn "xxeval"
6569   [(set (match_operand:V2DI 0 "register_operand" "=wa")
6570         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa")
6571                       (match_operand:V2DI 2 "register_operand" "wa")
6572                       (match_operand:V2DI 3 "register_operand" "wa")
6573                       (match_operand:QI 4 "u8bit_cint_operand" "n")]
6574                      UNSPEC_XXEVAL))]
6575    "TARGET_POWER10"
6576    "xxeval %0,%1,%2,%3,%4"
6577    [(set_attr "type" "vecperm")
6578     (set_attr "prefixed" "yes")])
6579
6580 ;; Construct V1TI by vsx_concat_v2di
6581 (define_split
6582   [(set (match_operand:V1TI 0 "vsx_register_operand")
6583         (subreg:V1TI
6584           (match_operand:TI 1 "int_reg_operand") 0 ))]
6585   "TARGET_P9_VECTOR && !reload_completed"
6586   [(const_int 0)]
6587 {
6588   rtx tmp1 = simplify_gen_subreg (DImode, operands[1], TImode, 0);
6589   rtx tmp2 = simplify_gen_subreg (DImode, operands[1], TImode, 8);
6590   rtx tmp3 = gen_reg_rtx (V2DImode);
6591   emit_insn (gen_vsx_concat_v2di (tmp3, tmp1, tmp2));
6592   rtx tmp4 = simplify_gen_subreg (V1TImode, tmp3, V2DImode, 0);
6593   emit_move_insn (operands[0], tmp4);
6594   DONE;
6595 })
6596
6597 ;; vmsumcud
6598 (define_insn "vmsumcud"
6599 [(set (match_operand:V1TI 0 "register_operand" "+v")
6600       (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v")
6601                     (match_operand:V2DI 2 "register_operand" "v")
6602                     (match_operand:V1TI 3 "register_operand" "v")]
6603                    UNSPEC_VMSUMCUD))]
6604   "TARGET_POWER10"
6605   "vmsumcud %0,%1,%2,%3"
6606   [(set_attr "type" "veccomplex")]
6607 )