Index: gcc/config/rs6000/predicates.md =================================================================== --- gcc/config/rs6000/predicates.md (revision 226414) +++ gcc/config/rs6000/predicates.md (working copy) @@ -562,6 +562,14 @@ (define_predicate "easy_vector_constant_ return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode)); }) +;; Return true if this is an easy altivec constant that we form +;; by using VSLDOI. +(define_predicate "easy_vector_constant_vsldoi" + (and (match_code "const_vector") + (and (match_test "TARGET_ALTIVEC") + (and (match_test "easy_altivec_constant (op, mode)") + (match_test "vspltis_shifted (op) != 0"))))) + ;; Return 1 if operand is constant zero (scalars and vectors). (define_predicate "zero_constant" (and (match_code "const_int,const_double,const_wide_int,const_vector") Index: gcc/config/rs6000/rs6000-protos.h =================================================================== --- gcc/config/rs6000/rs6000-protos.h (revision 226413) +++ gcc/config/rs6000/rs6000-protos.h (working copy) @@ -31,6 +31,7 @@ extern void init_cumulative_args (CUMULA #endif /* TREE_CODE */ extern bool easy_altivec_constant (rtx, machine_mode); +extern int vspltis_shifted (rtx); extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int); extern bool macho_lo_sum_memory_operand (rtx, machine_mode); extern int num_insns_constant (rtx, machine_mode); Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 226414) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -5448,6 +5448,96 @@ vspltis_constant (rtx op, unsigned step, return true; } +/* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI + instruction, filling in the bottom elements with 0 or -1. + + Return 0 if the constant cannot be generated with VSLDOI. Return positive + for the number of zeroes to shift in, or negative for the number of 0xff + bytes to shift in. + + OP is a CONST_VECTOR. */ + +int +vspltis_shifted (rtx op) +{ + machine_mode mode = GET_MODE (op); + machine_mode inner = GET_MODE_INNER (mode); + + unsigned i, j; + unsigned nunits; + unsigned mask; + + HOST_WIDE_INT val; + + if (mode != V16QImode && mode != V8HImode && mode != V4SImode) + return false; + + /* We need to create pseudo registers to do the shift, so don't recognize + shift vector constants after reload. */ + if (!can_create_pseudo_p ()) + return false; + + nunits = GET_MODE_NUNITS (mode); + mask = GET_MODE_MASK (inner); + + val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1); + + /* Check if the value can really be the operand of a vspltis[bhw]. */ + if (EASY_VECTOR_15 (val)) + ; + + /* Also check if we are loading up the most significant bit which can be done + by loading up -1 and shifting the value left by -1. */ + else if (EASY_VECTOR_MSB (val, inner)) + ; + + else + return 0; + + /* Check if VAL is present in every STEP-th element until we find elements + that are 0 or all 1 bits. */ + for (i = 1; i < nunits; ++i) + { + unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i; + HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); + + /* If the value isn't the splat value, check for the remaining elements + being 0/-1. */ + if (val != elt_val) + { + if (elt_val == 0) + { + for (j = i+1; j < nunits; ++j) + { + unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; + if (const_vector_elt_as_int (op, elt2) != 0) + return 0; + } + + return (nunits - i) * GET_MODE_SIZE (inner); + } + + else if ((elt_val & mask) == mask) + { + for (j = i+1; j < nunits; ++j) + { + unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; + if ((const_vector_elt_as_int (op, elt2) & mask) != mask) + return 0; + } + + return -((nunits - i) * GET_MODE_SIZE (inner)); + } + + else + return 0; + } + } + + /* If all elements are equal, we don't need to do VLSDOI. */ + return 0; +} + /* Return true if OP is of the given MODE and can be synthesized with a vspltisb, vspltish or vspltisw. */ @@ -5512,6 +5602,9 @@ easy_altivec_constant (rtx op, machine_m if (vspltis_constant (op, step, copies)) return true; + if (vspltis_shifted (op) != 0) + return true; + return false; } @@ -5555,7 +5648,7 @@ gen_easy_altivec_constant (rtx op) const char * output_vec_const_move (rtx *operands) { - int cst, cst2; + int cst, cst2, shift; machine_mode mode; rtx dest, vec; @@ -5568,10 +5661,13 @@ output_vec_const_move (rtx *operands) if (zero_constant (vec, mode)) return "xxlxor %x0,%x0,%x0"; + if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode)) + return "xxlorc %x0,%x0,%x0"; + if ((mode == V2DImode || mode == V1TImode) && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1) - return "vspltisw %0,-1"; + return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1"; } if (TARGET_ALTIVEC) @@ -5580,6 +5676,11 @@ output_vec_const_move (rtx *operands) if (zero_constant (vec, mode)) return "vxor %0,%0,%0"; + /* Do we need to construct a value using VSLDOI? */ + shift = vspltis_shifted (vec); + if (shift != 0) + return "#"; + splat_vec = gen_easy_altivec_constant (vec); gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE); operands[1] = XEXP (splat_vec, 0); Index: gcc/config/rs6000/rs6000.h =================================================================== --- gcc/config/rs6000/rs6000.h (revision 226414) +++ gcc/config/rs6000/rs6000.h (working copy) @@ -1980,7 +1980,7 @@ typedef struct rs6000_args && ((n) & 1) == 0) #define EASY_VECTOR_MSB(n,mode) \ - (((unsigned HOST_WIDE_INT)n) == \ + ((((unsigned HOST_WIDE_INT)n) & GET_MODE_MASK (mode)) == \ ((((unsigned HOST_WIDE_INT)GET_MODE_MASK (mode)) + 1) >> 1)) Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 226414) +++ gcc/config/rs6000/altivec.md (working copy) @@ -311,6 +311,47 @@ (define_split operands[4] = gen_rtx_PLUS (op_mode, operands[0], operands[0]); }) +(define_split + [(set (match_operand:VM 0 "altivec_register_operand" "") + (match_operand:VM 1 "easy_vector_constant_vsldoi" ""))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) && can_create_pseudo_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 0) + (unspec:VM [(match_dup 2) + (match_dup 4) + (match_dup 6)] + UNSPEC_VSLDOI))] +{ + rtx op1 = operands[1]; + int elt = (BYTES_BIG_ENDIAN) ? 0 : GET_MODE_NUNITS (mode) - 1; + HOST_WIDE_INT val = const_vector_elt_as_int (op1, elt); + rtx rtx_val = GEN_INT (val); + int shift = vspltis_shifted (op1); + int nunits = GET_MODE_NUNITS (mode); + int i; + + gcc_assert (shift != 0); + operands[2] = gen_reg_rtx (mode); + operands[3] = gen_rtx_CONST_VECTOR (mode, rtvec_alloc (nunits)); + operands[4] = gen_reg_rtx (mode); + + if (shift < 0) + { + operands[5] = CONSTM1_RTX (mode); + operands[6] = GEN_INT (-shift); + } + else + { + operands[5] = CONST0_RTX (mode); + operands[6] = GEN_INT (shift); + } + + /* Populate the constant vectors. */ + for (i = 0; i < nunits; i++) + XVECEXP (operands[3], 0, i) = rtx_val; +}) + (define_insn "get_vrsave_internal" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))] Index: gcc/testsuite/gcc.target/powerpc/pr67071-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr67071-1.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr67071-1.c (revision 0) @@ -0,0 +1,34 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */ +/* { dg-options "-mcpu=power6 -maltivec" } */ + +vector unsigned char +foo_char (void) +{ + return (vector unsigned char) { + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 + }; +} + +vector unsigned short +foo_short (void) +{ + return (vector unsigned short) { + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000 + }; +} + +vector unsigned int +foo_int (void) +{ + return (vector unsigned int) { + 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u, + }; +} + +/* { dg-final { scan-assembler-times "vspltisw" 3 } } */ +/* { dg-final { scan-assembler-times "vslb" 1 } } */ +/* { dg-final { scan-assembler-times "vslh" 1 } } */ +/* { dg-final { scan-assembler-times "vslw" 1 } } */ Index: gcc/testsuite/gcc.target/powerpc/pr67071-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr67071-2.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr67071-2.c (revision 0) @@ -0,0 +1,48 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */ +/* { dg-options "-mcpu=power6 -maltivec" } */ + +vector unsigned char +foo_char (void) +{ + return (vector unsigned char) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +#else + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 +#endif + }; +} + +vector unsigned short +foo_short (void) +{ + return (vector unsigned short) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 +#else + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8000 +#endif + }; +} + +vector unsigned int +foo_int (void) +{ + return (vector unsigned int) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x80000000u, 0x00000000u, 0x00000000u, 0x00000000u, +#else + 0x00000000u, 0x00000000u, 0x00000000u, 0x80000000u, +#endif + }; +} + +/* { dg-final { scan-assembler-times "vspltisw" 3 } } */ +/* { dg-final { scan-assembler-times "vsldoi" 3 } } */ +/* { dg-final { scan-assembler-times "vslb" 1 } } */ +/* { dg-final { scan-assembler-times "vslh" 1 } } */ +/* { dg-final { scan-assembler-times "vslw" 1 } } */ Index: gcc/testsuite/gcc.target/powerpc/pr67071-3.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr67071-3.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/pr67071-3.c (revision 0) @@ -0,0 +1,48 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */ +/* { dg-options "-mcpu=power6 -maltivec" } */ + + +vector unsigned char +foo_char (void) +{ + return (vector unsigned char) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x80, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +#else + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x80, 0x80 +#endif + }; +} + +vector unsigned short +foo_short (void) +{ + return (vector unsigned short) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x8000, 0x8000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff +#else + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8000, 0x8000 +#endif + }; +} + +vector unsigned int +foo_int (void) +{ + return (vector unsigned int) { +#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ + 0x80000000u, 0x80000000u, 0xffffffffu, 0xffffffffu, +#else + 0xffffffffu, 0xffffffffu, 0x80000000u, 0x80000000u, +#endif + }; +} + +/* { dg-final { scan-assembler-times "vslb" 1 } } */ +/* { dg-final { scan-assembler-times "vslh" 1 } } */ +/* { dg-final { scan-assembler-times "vslw" 1 } } */ +/* { dg-final { scan-assembler-times "vsldoi" 3 } } */