From 5aebfdad5836fb769478dfb68e280e29f0d00e5e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 10 Dec 2011 13:14:04 -0800 Subject: [PATCH] rs6000: Implement vec_perm_const for all vector ISAs * config/rs6000/altivec.md (altivec_vmrghb): Rewrite pattern as vec_select + vec_concat. (altivec_vmrghh, altivec_vmrghw, altivec_vmrghsf, altivec_vmrglb, altivec_vmrglh, altivec_vmrglw, altivec_vmrglsf): Likewise. (vec_perm_constv16qi): New. (vec_extract_evenv4si, vec_extract_evenv4sf, vpkuhum_nomode, vpkuwum_nomode, vec_extract_oddv8hi, vec_extract_oddv16qi, vec_interleave_high, vec_interleave_low): Remove. * config/rs6000/paired.md (paired_merge00): Rewrite pattern as vec_select + vec_concat. (paired_merge10, paired_merge01, paired_merge11): Likewise. (vec_perm_constv2sf): New. (vec_interleave_highv2sf, vec_interleave_lowv2sf, vec_extract_evenv2sf, vec_extract_oddv2sf): Remove. * config/rs6000/spe.md (spe_evmergehi): Rewrite pattern as vec_select + vec_concat. (spe_evmergehilo, spe_evmergelo, spe_evmergelohi): New. (vec_perm_constv2si): New. * config/rs6000/vector.md (vec_interleave_highv4sf, vec_interleave_lowv4sf, vec_interleave_high, vec_interleave_low): Remove. * config/rs6000/vsx.md (VS_double): New mode attribute. (UNSPEC_VSX_XXPERMDI): Remove. (vsx_xxpermdi__1): Rewrite pattern as vec_select + vec_concat. (vsx_xxmrghw_, vsx_xxmrglw_): Likewise. (vsx_xxpermdi_): Change to expander. (vec_perm_const): New. (vsx_mergel_, vsx_mergeh_): New. * config/rs6000/predicates.md (const_0_to_1_operand): New. (const_2_to_3_operand): New. * config/rs6000/rs6000.c (TARGET_VECTORIZE_VEC_PERM_CONST_OK): New. (altivec_expand_vec_perm_const): New. (rs6000_expand_vec_perm_const_1, rs6000_expand_vec_perm_const): New. (rs6000_vectorize_vec_perm_const_ok): New. (rs6000_do_expand_vec_perm): New. (rs6000_expand_extract_even, rs6000_expand_interleave): New. * config/rs6000/rs6000-builtin.def (VEC_MERGE*): Update rtx codes. * config/rs6000/rs6000-modes.def: Add double-wide vector modes. * config/rs6000/rs6000-protos.h: Update. From-SVN: r182193 --- gcc/ChangeLog | 42 ++ gcc/config/rs6000/altivec.md | 381 ++++-------------- gcc/config/rs6000/paired.md | 116 ++---- gcc/config/rs6000/predicates.md | 10 + gcc/config/rs6000/rs6000-builtin.def | 8 +- gcc/config/rs6000/rs6000-modes.def | 10 +- gcc/config/rs6000/rs6000-protos.h | 4 + gcc/config/rs6000/rs6000.c | 325 ++++++++++++++- gcc/config/rs6000/spe.md | 57 +-- gcc/config/rs6000/vector.md | 74 +--- gcc/config/rs6000/vsx.md | 150 ++++--- gcc/testsuite/ChangeLog | 6 + .../gcc.target/powerpc/altivec-perm-1.c | 76 ++++ .../gcc.target/powerpc/altivec-perm-2.c | 19 + .../gcc.target/powerpc/altivec-perm-4.c | 13 + 15 files changed, 764 insertions(+), 527 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 885f535b671..b1cc087ac58 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,45 @@ +2011-12-10 Richard Henderson + + * config/rs6000/altivec.md (altivec_vmrghb): Rewrite pattern as + vec_select + vec_concat. + (altivec_vmrghh, altivec_vmrghw, altivec_vmrghsf, altivec_vmrglb, + altivec_vmrglh, altivec_vmrglw, altivec_vmrglsf): Likewise. + (vec_perm_constv16qi): New. + (vec_extract_evenv4si, vec_extract_evenv4sf, vpkuhum_nomode, + vpkuwum_nomode, vec_extract_oddv8hi, vec_extract_oddv16qi, + vec_interleave_high, vec_interleave_low): Remove. + * config/rs6000/paired.md (paired_merge00): Rewrite pattern as + vec_select + vec_concat. + (paired_merge10, paired_merge01, paired_merge11): Likewise. + (vec_perm_constv2sf): New. + (vec_interleave_highv2sf, vec_interleave_lowv2sf, + vec_extract_evenv2sf, vec_extract_oddv2sf): Remove. + * config/rs6000/spe.md (spe_evmergehi): Rewrite pattern as + vec_select + vec_concat. + (spe_evmergehilo, spe_evmergelo, spe_evmergelohi): New. + (vec_perm_constv2si): New. + * config/rs6000/vector.md (vec_interleave_highv4sf, + vec_interleave_lowv4sf, vec_interleave_high, + vec_interleave_low): Remove. + * config/rs6000/vsx.md (VS_double): New mode attribute. + (UNSPEC_VSX_XXPERMDI): Remove. + (vsx_xxpermdi__1): Rewrite pattern as vec_select + vec_concat. + (vsx_xxmrghw_, vsx_xxmrglw_): Likewise. + (vsx_xxpermdi_): Change to expander. + (vec_perm_const): New. + (vsx_mergel_, vsx_mergeh_): New. + * config/rs6000/predicates.md (const_0_to_1_operand): New. + (const_2_to_3_operand): New. + * config/rs6000/rs6000.c (TARGET_VECTORIZE_VEC_PERM_CONST_OK): New. + (altivec_expand_vec_perm_const): New. + (rs6000_expand_vec_perm_const_1, rs6000_expand_vec_perm_const): New. + (rs6000_vectorize_vec_perm_const_ok): New. + (rs6000_do_expand_vec_perm): New. + (rs6000_expand_extract_even, rs6000_expand_interleave): New. + * config/rs6000/rs6000-builtin.def (VEC_MERGE*): Update rtx codes. + * config/rs6000/rs6000-modes.def: Add double-wide vector modes. + * config/rs6000/rs6000-protos.h: Update. + 2011-12-10 Richard Henderson * expr.c (expand_expr_real_2) [VEC_PERM_EXPR]: Avoid passing a diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index a3a8d77bc2e..54ca3694133 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -327,7 +327,7 @@ (define_insn "*altivec_addv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=v") (plus:V4SF (match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")))] + (match_operand:V4SF 2 "register_operand" "v")))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vaddfp %0,%1,%2" [(set_attr "type" "vecfloat")]) @@ -764,202 +764,112 @@ (define_insn "altivec_vmrghb" [(set (match_operand:V16QI 0 "register_operand" "=v") - (vec_merge:V16QI (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "v") - (parallel [(const_int 0) - (const_int 8) - (const_int 1) - (const_int 9) - (const_int 2) - (const_int 10) - (const_int 3) - (const_int 11) - (const_int 4) - (const_int 12) - (const_int 5) - (const_int 13) - (const_int 6) - (const_int 14) - (const_int 7) - (const_int 15)])) - (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "v") - (parallel [(const_int 8) - (const_int 0) - (const_int 9) - (const_int 1) - (const_int 10) - (const_int 2) - (const_int 11) - (const_int 3) - (const_int 12) - (const_int 4) - (const_int 13) - (const_int 5) - (const_int 14) - (const_int 6) - (const_int 15) - (const_int 7)])) - (const_int 21845)))] + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 16) + (const_int 1) (const_int 17) + (const_int 2) (const_int 18) + (const_int 3) (const_int 19) + (const_int 4) (const_int 20) + (const_int 5) (const_int 21) + (const_int 6) (const_int 22) + (const_int 7) (const_int 23)])))] "TARGET_ALTIVEC" "vmrghb %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrghh" [(set (match_operand:V8HI 0 "register_operand" "=v") - (vec_merge:V8HI (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "v") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "v") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (const_int 85)))] + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] "TARGET_ALTIVEC" "vmrghh %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrghw" [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_merge:V4SI (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" "vmrghw %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "*altivec_vmrghsf" [(set (match_operand:V4SF 0 "register_operand" "=v") - (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "VECTOR_MEM_ALTIVEC_P (V4SFmode)" "vmrghw %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrglb" [(set (match_operand:V16QI 0 "register_operand" "=v") - (vec_merge:V16QI (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "v") - (parallel [(const_int 8) - (const_int 0) - (const_int 9) - (const_int 1) - (const_int 10) - (const_int 2) - (const_int 11) - (const_int 3) - (const_int 12) - (const_int 4) - (const_int 13) - (const_int 5) - (const_int 14) - (const_int 6) - (const_int 15) - (const_int 7)])) - (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 8) - (const_int 1) - (const_int 9) - (const_int 2) - (const_int 10) - (const_int 3) - (const_int 11) - (const_int 4) - (const_int 12) - (const_int 5) - (const_int 13) - (const_int 6) - (const_int 14) - (const_int 7) - (const_int 15)])) - (const_int 21845)))] + (vec_select:V16QI + (vec_concat:V32QI + (match_operand:V16QI 1 "register_operand" "v") + (match_operand:V16QI 2 "register_operand" "v")) + (parallel [(const_int 8) (const_int 24) + (const_int 9) (const_int 25) + (const_int 10) (const_int 26) + (const_int 11) (const_int 27) + (const_int 12) (const_int 28) + (const_int 13) (const_int 29) + (const_int 14) (const_int 30) + (const_int 15) (const_int 31)])))] "TARGET_ALTIVEC" "vmrglb %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrglh" [(set (match_operand:V8HI 0 "register_operand" "=v") - (vec_merge:V8HI (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "v") - (parallel [(const_int 4) - (const_int 0) - (const_int 5) - (const_int 1) - (const_int 6) - (const_int 2) - (const_int 7) - (const_int 3)])) - (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 4) - (const_int 1) - (const_int 5) - (const_int 2) - (const_int 6) - (const_int 3) - (const_int 7)])) - (const_int 85)))] + (vec_select:V8HI + (vec_concat:V16HI + (match_operand:V8HI 1 "register_operand" "v") + (match_operand:V8HI 2 "register_operand" "v")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] "TARGET_ALTIVEC" "vmrglh %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "altivec_vmrglw" [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_merge:V4SI - (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] + (vec_select:V4SI + (vec_concat:V8SI + (match_operand:V4SI 1 "register_operand" "v") + (match_operand:V4SI 2 "register_operand" "v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) (define_insn "*altivec_vmrglsf" [(set (match_operand:V4SF 0 "register_operand" "=v") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] + (vec_select:V4SF + (vec_concat:V8SF + (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "VECTOR_MEM_ALTIVEC_P (V4SFmode)" "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -1332,7 +1242,7 @@ (define_insn "*altivec_vrfiz" [(set (match_operand:V4SF 0 "register_operand" "=v") - (fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))] + (fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vrfiz %0,%1" [(set_attr "type" "vecfloat")]) @@ -1366,6 +1276,19 @@ "TARGET_ALTIVEC" "") +(define_expand "vec_perm_constv16qi" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:V16QI 2 "register_operand" "") + (match_operand:V16QI 3 "" "")] + "TARGET_ALTIVEC" +{ + if (altivec_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + (define_insn "altivec_vrfip" ; ceil [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] @@ -1494,7 +1417,7 @@ (define_insn "altivec_vupkhsb" [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] UNSPEC_VUPKHSB))] "TARGET_ALTIVEC" "vupkhsb %0,%1" @@ -1502,7 +1425,7 @@ (define_insn "altivec_vupkhpx" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] UNSPEC_VUPKHPX))] "TARGET_ALTIVEC" "vupkhpx %0,%1" @@ -1510,7 +1433,7 @@ (define_insn "altivec_vupkhsh" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] UNSPEC_VUPKHSH))] "TARGET_ALTIVEC" "vupkhsh %0,%1" @@ -1518,7 +1441,7 @@ (define_insn "altivec_vupklsb" [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] + (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")] UNSPEC_VUPKLSB))] "TARGET_ALTIVEC" "vupklsb %0,%1" @@ -1526,7 +1449,7 @@ (define_insn "altivec_vupklpx" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] UNSPEC_VUPKLPX))] "TARGET_ALTIVEC" "vupklpx %0,%1" @@ -1534,7 +1457,7 @@ (define_insn "altivec_vupklsh" [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] + (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")] UNSPEC_VUPKLSH))] "TARGET_ALTIVEC" "vupklsh %0,%1" @@ -2429,140 +2352,6 @@ "stvrxl %1,%y0" [(set_attr "type" "vecstore")]) -;; ??? This is still used directly by vector.md -(define_expand "vec_extract_evenv4si" - [(set (match_operand:V4SI 0 "register_operand" "") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")] - UNSPEC_EXTEVEN_V4SI))] - "TARGET_ALTIVEC" - " -{ - rtx mask = gen_reg_rtx (V16QImode); - rtvec v = rtvec_alloc (16); - - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 3); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 18); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 19); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 25); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 26); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 27); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); - emit_insn (gen_altivec_vperm_v4si (operands[0], operands[1], operands[2], mask)); - - DONE; -}") - -;; ??? This is still used directly by vector.md -(define_expand "vec_extract_evenv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "register_operand" "")] - UNSPEC_EXTEVEN_V4SF))] - "TARGET_ALTIVEC" - " -{ - rtx mask = gen_reg_rtx (V16QImode); - rtvec v = rtvec_alloc (16); - - RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 0); - RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 1); - RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 2); - RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 3); - RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 8); - RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 9); - RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10); - RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11); - RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16); - RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17); - RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 18); - RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 19); - RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 24); - RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 25); - RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 26); - RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 27); - emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v))); - emit_insn (gen_altivec_vperm_v4sf (operands[0], operands[1], operands[2], mask)); - - DONE; -}") - -(define_insn "vpkuhum_nomode" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand 1 "register_operand" "v") - (match_operand 2 "register_operand" "v")] - UNSPEC_VPKUHUM))] - "TARGET_ALTIVEC" - "vpkuhum %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_insn "vpkuwum_nomode" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand 1 "register_operand" "v") - (match_operand 2 "register_operand" "v")] - UNSPEC_VPKUWUM))] - "TARGET_ALTIVEC" - "vpkuwum %0,%1,%2" - [(set_attr "type" "vecperm")]) - -(define_expand "vec_extract_oddv8hi" - [(set (match_operand:V8HI 0 "register_operand" "") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "register_operand" "")] - UNSPEC_EXTODD_V8HI))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_vpkuwum_nomode (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_extract_oddv16qi" - [(set (match_operand:V16QI 0 "register_operand" "") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "")] - UNSPEC_EXTODD_V16QI))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_vpkuhum_nomode (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_interleave_high" - [(set (match_operand:VI 0 "register_operand" "") - (unspec:VI [(match_operand:VI 1 "register_operand" "") - (match_operand:VI 2 "register_operand" "")] - UNSPEC_INTERHI))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vmrgh (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_interleave_low" - [(set (match_operand:VI 0 "register_operand" "") - (unspec:VI [(match_operand:VI 1 "register_operand" "") - (match_operand:VI 2 "register_operand" "")] - UNSPEC_INTERLO))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vmrgl (operands[0], operands[1], operands[2])); - DONE; -}") - (define_expand "vec_unpacks_float_hi_v8hi" [(set (match_operand:V4SF 0 "register_operand" "") (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "")] diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md index f0bf7f9a5e3..9dce18d8149 100644 --- a/gcc/config/rs6000/paired.md +++ b/gcc/config/rs6000/paired.md @@ -272,48 +272,61 @@ (define_insn "paired_merge00" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (vec_concat:V2SF - (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") - (parallel [(const_int 0)])) - (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") - (parallel [(const_int 0)]))))] + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")) + (parallel [(const_int 0) (const_int 2)])))] "TARGET_PAIRED_FLOAT" "ps_merge00 %0, %1, %2" [(set_attr "type" "fp")]) (define_insn "paired_merge01" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (vec_concat:V2SF - (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") - (parallel [(const_int 0)])) - (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") - (parallel [(const_int 1)]))))] + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")) + (parallel [(const_int 0) (const_int 3)])))] "TARGET_PAIRED_FLOAT" "ps_merge01 %0, %1, %2" [(set_attr "type" "fp")]) (define_insn "paired_merge10" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (vec_concat:V2SF - (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") - (parallel [(const_int 1)])) - (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") - (parallel [(const_int 0)]))))] + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")) + (parallel [(const_int 1) (const_int 2)])))] "TARGET_PAIRED_FLOAT" "ps_merge10 %0, %1, %2" [(set_attr "type" "fp")]) (define_insn "paired_merge11" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (vec_concat:V2SF - (vec_select:SF (match_operand:V2SF 1 "gpc_reg_operand" "f") - (parallel [(const_int 1)])) - (vec_select:SF (match_operand:V2SF 2 "gpc_reg_operand" "f") - (parallel [(const_int 1)]))))] + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "gpc_reg_operand" "f") + (match_operand:V2SF 2 "gpc_reg_operand" "f")) + (parallel [(const_int 1) (const_int 3)])))] "TARGET_PAIRED_FLOAT" "ps_merge11 %0, %1, %2" [(set_attr "type" "fp")]) +(define_expand "vec_perm_constv2sf" + [(match_operand:V2SF 0 "gpc_reg_operand" "") + (match_operand:V2SF 1 "gpc_reg_operand" "") + (match_operand:V2SF 2 "gpc_reg_operand" "") + (match_operand:V2SI 3 "" "")] + "TARGET_PAIRED_FLOAT" +{ + if (rs6000_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + (define_insn "paired_sum0" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") (vec_concat:V2SF (plus:SF (vec_select:SF @@ -439,55 +452,6 @@ DONE; }) -(define_expand "vec_interleave_highv2sf" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f") - (match_operand:V2SF 2 "gpc_reg_operand" "f")] - UNSPEC_INTERHI_V2SF))] - "TARGET_PAIRED_FLOAT" - " -{ - emit_insn (gen_paired_merge00 (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_interleave_lowv2sf" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f") - (match_operand:V2SF 2 "gpc_reg_operand" "f")] - UNSPEC_INTERLO_V2SF))] - "TARGET_PAIRED_FLOAT" - " -{ - emit_insn (gen_paired_merge11 (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_extract_evenv2sf" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f") - (match_operand:V2SF 2 "gpc_reg_operand" "f")] - UNSPEC_EXTEVEN_V2SF))] - "TARGET_PAIRED_FLOAT" - " -{ - emit_insn (gen_paired_merge00 (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_extract_oddv2sf" - [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") - (unspec:V2SF [(match_operand:V2SF 1 "gpc_reg_operand" "f") - (match_operand:V2SF 2 "gpc_reg_operand" "f")] - UNSPEC_EXTODD_V2SF))] - "TARGET_PAIRED_FLOAT" - " -{ - emit_insn (gen_paired_merge11 (operands[0], operands[1], operands[2])); - DONE; -}") - - (define_expand "reduc_splus_v2sf" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") (match_operand:V2SF 1 "gpc_reg_operand" "f"))] @@ -516,12 +480,10 @@ (match_operand:V2SF 1 "gpc_reg_operand" "f") (match_operand:V2SF 2 "gpc_reg_operand" "f")))] "TARGET_PAIRED_FLOAT && flag_unsafe_math_optimizations" - " { - if (paired_emit_vector_cond_expr (operands[0], operands[1], operands[2], - operands[3], operands[4], operands[5])) - DONE; - else - FAIL; -}") - + if (paired_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index e407eda86b3..29dd18d75bd 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -112,6 +112,16 @@ (and (match_code "const_int") (match_test "INTVAL (op) > 0 && exact_log2 (INTVAL (op)) >= 0"))) +;; Match op = 0 or op = 1. +(define_predicate "const_0_to_1_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) + +;; Match op = 2 or op = 3. +(define_predicate "const_2_to_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) + ;; Return 1 if op is a register that is not special. (define_predicate "gpc_reg_operand" (and (match_operand 0 "register_operand") diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 25880ea5691..776350bb5c0 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -984,10 +984,10 @@ BU_VSX_2 (XXMRGHW_4SF, "xxmrghw", CONST, vsx_xxmrghw_v4sf) BU_VSX_2 (XXMRGHW_4SI, "xxmrghw_4si", CONST, vsx_xxmrghw_v4si) BU_VSX_2 (XXMRGLW_4SF, "xxmrglw", CONST, vsx_xxmrglw_v4sf) BU_VSX_2 (XXMRGLW_4SI, "xxmrglw_4si", CONST, vsx_xxmrglw_v4si) -BU_VSX_2 (VEC_MERGEL_V2DF, "mergel_2df", CONST, vec_interleave_lowv2df) -BU_VSX_2 (VEC_MERGEL_V2DI, "mergel_2di", CONST, vec_interleave_lowv2di) -BU_VSX_2 (VEC_MERGEH_V2DF, "mergeh_2df", CONST, vec_interleave_highv2df) -BU_VSX_2 (VEC_MERGEH_V2DI, "mergeh_2di", CONST, vec_interleave_highv2di) +BU_VSX_2 (VEC_MERGEL_V2DF, "mergel_2df", CONST, vsx_mergel_v2df) +BU_VSX_2 (VEC_MERGEL_V2DI, "mergel_2di", CONST, vsx_mergel_v2di) +BU_VSX_2 (VEC_MERGEH_V2DF, "mergeh_2df", CONST, vsx_mergeh_v2df) +BU_VSX_2 (VEC_MERGEH_V2DI, "mergeh_2di", CONST, vsx_mergeh_v2di) /* VSX abs builtin functions. */ BU_VSX_A (XVABSDP, "xvabsdp", CONST, absv2df2) diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index 724c947f14a..f72f40dfdb4 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -34,8 +34,10 @@ CC_MODE (CCFP); CC_MODE (CCEQ); /* Vector modes. */ -VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ -VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ +VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */ VECTOR_MODE (INT, DI, 1); -VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ -VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ +VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 465015270c6..23327f853fd 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -55,6 +55,10 @@ extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, int); +extern bool altivec_expand_vec_perm_const (rtx op[4]); +extern bool rs6000_expand_vec_perm_const (rtx op[4]); +extern void rs6000_expand_extract_even (rtx, rtx, rtx); +extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); extern void build_mask64_2_operands (rtx, rtx *); extern int expand_block_clear (rtx[]); extern int expand_block_move (rtx[]); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index fb983097659..1608f8cf3f3 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1661,6 +1661,8 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p +#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK +#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok /* Simplifications for entries below. */ @@ -4723,7 +4725,7 @@ rs6000_expand_vector_init (rtx target, rtx vals) copy_to_reg (XVECEXP (vals, 0, 3)))); emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); - emit_insn (gen_vec_extract_evenv4sf (target, flt_even, flt_odd)); + rs6000_expand_extract_even (target, flt_even, flt_odd); } return; } @@ -26206,6 +26208,327 @@ rs6000_emit_parity (rtx dst, rtx src) } } +/* Expand an Altivec constant permutation. Return true if we match + an efficient implementation; false to fall back to VPERM. */ + +bool +altivec_expand_vec_perm_const (rtx operands[4]) +{ + struct altivec_perm_insn { + enum insn_code impl; + unsigned char perm[16]; + }; + static const struct altivec_perm_insn patterns[] = { + { CODE_FOR_altivec_vpkuhum, + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, + { CODE_FOR_altivec_vpkuwum, + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, + { CODE_FOR_altivec_vmrghb, + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, + { CODE_FOR_altivec_vmrghh, + { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, + { CODE_FOR_altivec_vmrghw, + { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, + { CODE_FOR_altivec_vmrglb, + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, + { CODE_FOR_altivec_vmrglh, + { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, + { CODE_FOR_altivec_vmrglw, + { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } } + }; + + unsigned int i, j, elt, which; + unsigned char perm[16]; + rtx target, op0, op1, sel, x; + bool one_vec; + + target = operands[0]; + op0 = operands[1]; + op1 = operands[2]; + sel = operands[3]; + + /* Unpack the constant selector. */ + for (i = which = 0; i < 16; ++i) + { + rtx e = XVECEXP (sel, 0, i); + elt = INTVAL (e) & 31; + which |= (elt < 16 ? 1 : 2); + perm[i] = elt; + } + + /* Simplify the constant selector based on operands. */ + switch (which) + { + default: + gcc_unreachable (); + + case 3: + one_vec = false; + if (!rtx_equal_p (op0, op1)) + break; + /* FALLTHRU */ + + case 2: + for (i = 0; i < 16; ++i) + perm[i] &= 15; + op0 = op1; + one_vec = true; + break; + + case 1: + op1 = op0; + one_vec = true; + break; + } + + /* Look for splat patterns. */ + if (one_vec) + { + elt = perm[0]; + + for (i = 0; i < 16; ++i) + if (perm[i] != elt) + break; + if (i == 16) + { + emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt))); + return true; + } + + if (elt % 2 == 0) + { + for (i = 0; i < 16; i += 2) + if (perm[i] != elt || perm[i + 1] != elt + 1) + break; + if (i == 16) + { + x = gen_reg_rtx (V8HImode); + emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0), + GEN_INT (elt / 2))); + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + + if (elt % 4 == 0) + { + for (i = 0; i < 16; i += 4) + if (perm[i] != elt + || perm[i + 1] != elt + 1 + || perm[i + 2] != elt + 2 + || perm[i + 3] != elt + 3) + break; + if (i == 16) + { + x = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0), + GEN_INT (elt / 4))); + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + } + + /* Look for merge and pack patterns. */ + for (j = 0; j < ARRAY_SIZE (patterns); ++j) + { + bool swapped; + + elt = patterns[j].perm[0]; + if (perm[0] == elt) + swapped = false; + else if (perm[0] == elt + 16) + swapped = true; + else + continue; + for (i = 1; i < 16; ++i) + { + elt = patterns[j].perm[i]; + if (swapped) + elt = (elt >= 16 ? elt - 16 : elt + 16); + else if (one_vec && elt >= 16) + elt -= 16; + if (perm[i] != elt) + break; + } + if (i == 16) + { + enum insn_code icode = patterns[j].impl; + enum machine_mode omode = insn_data[icode].operand[0].mode; + enum machine_mode imode = insn_data[icode].operand[1].mode; + + if (swapped) + x = op0, op0 = op1, op1 = x; + if (imode != V16QImode) + { + op0 = gen_lowpart (imode, op0); + op1 = gen_lowpart (imode, op1); + } + if (omode == V16QImode) + x = target; + else + x = gen_reg_rtx (omode); + emit_insn (GEN_FCN (icode) (x, op0, op1)); + if (omode != V16QImode) + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + + return false; +} + +/* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation. + Return true if we match an efficient implementation. */ + +static bool +rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, + unsigned char perm0, unsigned char perm1) +{ + rtx x; + + /* If both selectors come from the same operand, fold to single op. */ + if ((perm0 & 2) == (perm1 & 2)) + { + if (perm0 & 2) + op0 = op1; + else + op1 = op0; + } + /* If both operands are equal, fold to simpler permutation. */ + if (rtx_equal_p (op0, op1)) + { + perm0 = perm0 & 1; + perm1 = (perm1 & 1) + 2; + } + /* If the first selector comes from the second operand, swap. */ + else if (perm0 & 2) + { + if (perm1 & 2) + return false; + perm0 -= 2; + perm1 += 2; + x = op0, op0 = op1, op1 = x; + } + /* If the second selector does not come from the second operand, fail. */ + else if ((perm1 & 2) == 0) + return false; + + /* Success! */ + if (target != NULL) + { + enum machine_mode vmode, dmode; + rtvec v; + + vmode = GET_MODE (target); + gcc_assert (GET_MODE_NUNITS (vmode) == 2); + dmode = mode_for_vector (GET_MODE_INNER (vmode), 4); + + x = gen_rtx_VEC_CONCAT (dmode, op0, op1); + v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1)); + x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v)); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + } + return true; +} + +bool +rs6000_expand_vec_perm_const (rtx operands[4]) +{ + rtx target, op0, op1, sel; + unsigned char perm0, perm1; + + target = operands[0]; + op0 = operands[1]; + op1 = operands[2]; + sel = operands[3]; + + /* Unpack the constant selector. */ + perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3; + perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3; + + return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1); +} + +/* Test whether a constant permutation is supported. */ + +static bool +rs6000_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel) +{ + /* AltiVec (and thus VSX) can handle arbitrary permutations. */ + if (TARGET_ALTIVEC) + return true; + + /* Check for ps_merge* or evmerge* insns. */ + if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode) + || (TARGET_SPE && vmode == V2SImode)) + { + rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); + rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); + return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]); + } + + return false; +} + +/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */ + +static void +rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, + enum machine_mode vmode, unsigned nelt, rtx perm[]) +{ + enum machine_mode imode; + rtx x; + + imode = vmode; + if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT) + { + imode = GET_MODE_INNER (vmode); + imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0); + imode = mode_for_vector (imode, nelt); + } + + x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm)); + x = expand_vec_perm (vmode, op0, op1, x, target); + if (x != target) + emit_move_insn (target, x); +} + +/* Expand an extract even operation. */ + +void +rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) +{ + enum machine_mode vmode = GET_MODE (target); + unsigned i, nelt = GET_MODE_NUNITS (vmode); + rtx perm[16]; + + for (i = 0; i < nelt; i++) + perm[i] = GEN_INT (i * 2); + + rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); +} + +/* Expand a vector interleave operation. */ + +void +rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) +{ + enum machine_mode vmode = GET_MODE (target); + unsigned i, high, nelt = GET_MODE_NUNITS (vmode); + rtx perm[16]; + + high = (highp == TARGET_BIG_ENDIAN ? 0 : nelt / 2); + for (i = 0; i < nelt / 2; i++) + { + perm[i * 2] = GEN_INT (i + high); + perm[i * 2 + 1] = GEN_INT (i + nelt + high); + } + + rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); +} + /* Return an RTX representing where to find the function value of a function returning MODE. */ static rtx diff --git a/gcc/config/rs6000/spe.md b/gcc/config/rs6000/spe.md index d50ad1aad1d..bbe11b9124b 100644 --- a/gcc/config/rs6000/spe.md +++ b/gcc/config/rs6000/spe.md @@ -441,12 +441,11 @@ (define_insn "spe_evmergehi" [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") - (vec_merge:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") - (vec_select:V2SI - (match_operand:V2SI 2 "gpc_reg_operand" "r") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 2)))] + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (parallel [(const_int 0) (const_int 2)])))] "TARGET_SPE" "evmergehi %0,%1,%2" [(set_attr "type" "vecsimple") @@ -454,9 +453,11 @@ (define_insn "spe_evmergehilo" [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") - (vec_merge:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") - (match_operand:V2SI 2 "gpc_reg_operand" "r") - (const_int 2)))] + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (parallel [(const_int 0) (const_int 3)])))] "TARGET_SPE" "evmergehilo %0,%1,%2" [(set_attr "type" "vecsimple") @@ -464,12 +465,11 @@ (define_insn "spe_evmergelo" [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") - (vec_merge:V2SI (vec_select:V2SI - (match_operand:V2SI 1 "gpc_reg_operand" "r") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2SI 2 "gpc_reg_operand" "r") - (const_int 2)))] + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (parallel [(const_int 1) (const_int 3)])))] "TARGET_SPE" "evmergelo %0,%1,%2" [(set_attr "type" "vecsimple") @@ -477,20 +477,29 @@ (define_insn "spe_evmergelohi" [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") - (vec_merge:V2SI (vec_select:V2SI - (match_operand:V2SI 1 "gpc_reg_operand" "r") - (parallel [(const_int 1) - (const_int 0)])) - (vec_select:V2SI - (match_operand:V2SI 2 "gpc_reg_operand" "r") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 2)))] + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gpc_reg_operand" "r") + (match_operand:V2SI 2 "gpc_reg_operand" "r")) + (parallel [(const_int 1) (const_int 2)])))] "TARGET_SPE" "evmergelohi %0,%1,%2" [(set_attr "type" "vecsimple") (set_attr "length" "4")]) +(define_expand "vec_perm_constv2si" + [(match_operand:V2SI 0 "gpc_reg_operand" "") + (match_operand:V2SI 1 "gpc_reg_operand" "") + (match_operand:V2SI 2 "gpc_reg_operand" "") + (match_operand:V2SI 3 "" "")] + "TARGET_SPE" +{ + if (rs6000_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + (define_insn "spe_evnand" [(set (match_operand:V2SI 0 "gpc_reg_operand" "=r") (not:V2SI (and:V2SI (match_operand:V2SI 1 "gpc_reg_operand" "r") diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 4930f8c3733..bcb23ac5de6 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -747,62 +747,6 @@ INTVAL (operands[2])); DONE; }) - -;; Interleave patterns -(define_expand "vec_interleave_highv4sf" - [(set (match_operand:V4SF 0 "vfloat_operand" "") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "vfloat_operand" "") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:V4SF (match_operand:V4SF 2 "vfloat_operand" "") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" - "") - -(define_expand "vec_interleave_lowv4sf" - [(set (match_operand:V4SF 0 "vfloat_operand" "") - (vec_merge:V4SF - (vec_select:V4SF (match_operand:V4SF 1 "vfloat_operand" "") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SF (match_operand:V4SF 2 "vfloat_operand" "") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" - "") - -(define_expand "vec_interleave_high" - [(set (match_operand:VEC_64 0 "vfloat_operand" "") - (vec_concat:VEC_64 - (vec_select: (match_operand:VEC_64 1 "vfloat_operand" "") - (parallel [(const_int 0)])) - (vec_select: (match_operand:VEC_64 2 "vfloat_operand" "") - (parallel [(const_int 0)]))))] - "VECTOR_UNIT_VSX_P (mode)" - "") - -(define_expand "vec_interleave_low" - [(set (match_operand:VEC_64 0 "vfloat_operand" "") - (vec_concat:VEC_64 - (vec_select: (match_operand:VEC_64 1 "vfloat_operand" "") - (parallel [(const_int 1)])) - (vec_select: (match_operand:VEC_64 2 "vfloat_operand" "") - (parallel [(const_int 1)]))))] - "VECTOR_UNIT_VSX_P (mode)" - "") - ;; Convert double word types to single word types (define_expand "vec_pack_trunc_v2df" @@ -816,7 +760,7 @@ emit_insn (gen_vsx_xvcvdpsp (r1, operands[1])); emit_insn (gen_vsx_xvcvdpsp (r2, operands[2])); - emit_insn (gen_vec_extract_evenv4sf (operands[0], r1, r2)); + rs6000_expand_extract_even (operands[0], r1, r2); DONE; }) @@ -831,7 +775,7 @@ emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1])); emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2])); - emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2)); + rs6000_expand_extract_even (operands[0], r1, r2); DONE; }) @@ -846,7 +790,7 @@ emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1])); emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2])); - emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2)); + rs6000_expand_extract_even (operands[0], r1, r2); DONE; }) @@ -858,7 +802,7 @@ { rtx reg = gen_reg_rtx (V4SFmode); - emit_insn (gen_vec_interleave_highv4sf (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], true); emit_insn (gen_vsx_xvcvspdp (operands[0], reg)); DONE; }) @@ -870,7 +814,7 @@ { rtx reg = gen_reg_rtx (V4SFmode); - emit_insn (gen_vec_interleave_lowv4sf (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], false); emit_insn (gen_vsx_xvcvspdp (operands[0], reg)); DONE; }) @@ -882,7 +826,7 @@ { rtx reg = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], true); emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg)); DONE; }) @@ -894,7 +838,7 @@ { rtx reg = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], false); emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg)); DONE; }) @@ -906,7 +850,7 @@ { rtx reg = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], true); emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg)); DONE; }) @@ -918,7 +862,7 @@ { rtx reg = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1])); + rs6000_expand_interleave (reg, operands[1], operands[1], false); emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg)); DONE; }) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index fb860342304..634bd23f437 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -180,7 +180,13 @@ (V4SI "SI") (V8HI "HI") (V16QI "QI")]) - + +;; Map to a double-sized vector mode +(define_mode_attr VS_double [(V4SI "V8SI") + (V4SF "V8SF") + (V2DI "V4DI") + (V2DF "V4DF")]) + ;; Constants for creating unspecs (define_c_enum "unspec" [UNSPEC_VSX_CONCAT @@ -195,7 +201,6 @@ UNSPEC_VSX_CVSPUXDS UNSPEC_VSX_TDIV UNSPEC_VSX_TSQRT - UNSPEC_VSX_XXPERMDI UNSPEC_VSX_SET UNSPEC_VSX_ROUND_I UNSPEC_VSX_ROUND_IC @@ -352,7 +357,7 @@ (define_insn "*vsx_div3" [(set (match_operand:VSX_B 0 "vsx_register_operand" "=,?wa") (div:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" ",wa") - (match_operand:VSX_B 2 "vsx_register_operand" ",wa")))] + (match_operand:VSX_B 2 "vsx_register_operand" ",wa")))] "VECTOR_UNIT_VSX_P (mode)" "xdiv %x0,%x1,%x2" [(set_attr "type" "") @@ -1184,39 +1189,88 @@ [(set_attr "length" "4,8") (set_attr "type" "fp")]) -;; General double word oriented permute, allow the other vector types for -;; optimizing the permute instruction. -(define_insn "vsx_xxpermdi_" - [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wd,?wa") - (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wd,wa") - (match_operand:VSX_L 2 "vsx_register_operand" "wd,wa") - (match_operand:QI 3 "u5bit_cint_operand" "i,i")] - UNSPEC_VSX_XXPERMDI))] +;; Expand the builtin form of xxpermdi to canonical rtl. +(define_expand "vsx_xxpermdi_" + [(match_operand:VSX_L 0 "vsx_register_operand" "") + (match_operand:VSX_L 1 "vsx_register_operand" "") + (match_operand:VSX_L 2 "vsx_register_operand" "") + (match_operand:QI 3 "u5bit_cint_operand" "")] "VECTOR_MEM_VSX_P (mode)" - "xxpermdi %x0,%x1,%x2,%3" - [(set_attr "type" "vecperm")]) +{ + rtx target = operands[0]; + rtx op0 = operands[1]; + rtx op1 = operands[2]; + int mask = INTVAL (operands[3]); + rtx perm0 = GEN_INT ((mask >> 1) & 1); + rtx perm1 = GEN_INT ((mask & 1) + 2); + rtx (*gen) (rtx, rtx, rtx, rtx, rtx); + + if (mode == V2DFmode) + gen = gen_vsx_xxpermdi2_v2df_1; + else + { + gen = gen_vsx_xxpermdi2_v2di_1; + if (mode != V2DImode) + { + target = gen_lowpart (V2DImode, target); + op0 = gen_lowpart (V2DImode, target); + op1 = gen_lowpart (V2DImode, target); + } + } + emit_insn (gen (target, op0, op1, perm0, perm1)); + DONE; +}) -;; Varient of xxpermdi that is emitted by the vec_interleave functions -(define_insn "*vsx_xxpermdi2_" +(define_insn "vsx_xxpermdi2__1" [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd") - (vec_concat:VSX_D - (vec_select: - (match_operand:VSX_D 1 "vsx_register_operand" "wd") - (parallel - [(match_operand:QI 2 "u5bit_cint_operand" "i")])) - (vec_select: - (match_operand:VSX_D 3 "vsx_register_operand" "wd") - (parallel - [(match_operand:QI 4 "u5bit_cint_operand" "i")]))))] + (vec_select:VSX_D + (vec_concat: + (match_operand:VSX_D 1 "vsx_register_operand" "wd") + (match_operand:VSX_D 2 "vsx_register_operand" "wd")) + (parallel [(match_operand 3 "const_0_to_1_operand" "") + (match_operand 4 "const_2_to_3_operand" "")])))] "VECTOR_MEM_VSX_P (mode)" { - gcc_assert ((UINTVAL (operands[2]) <= 1) && (UINTVAL (operands[4]) <= 1)); - operands[5] = GEN_INT (((INTVAL (operands[2]) & 1) << 1) - | (INTVAL (operands[4]) & 1)); - return \"xxpermdi %x0,%x1,%x3,%5\"; + int mask = (INTVAL (operands[3]) << 1) | (INTVAL (operands[4]) - 2); + operands[3] = GEN_INT (mask); + return "xxpermdi %x0,%x1,%x2,%3"; } [(set_attr "type" "vecperm")]) +(define_expand "vec_perm_const" + [(match_operand:VSX_D 0 "vsx_register_operand" "") + (match_operand:VSX_D 1 "vsx_register_operand" "") + (match_operand:VSX_D 2 "vsx_register_operand" "") + (match_operand:V2DI 3 "" "")] + "VECTOR_MEM_VSX_P (mode)" +{ + if (rs6000_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +;; Expanders for builtins +(define_expand "vsx_mergel_" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "") + (vec_select:VSX_D + (vec_concat: + (match_operand:VSX_D 1 "vsx_register_operand" "") + (match_operand:VSX_D 2 "vsx_register_operand" "")) + (parallel [(const_int 1) (const_int 3)])))] + "VECTOR_MEM_VSX_P (mode)" + "") + +(define_expand "vsx_mergeh_" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "") + (vec_select:VSX_D + (vec_concat: + (match_operand:VSX_D 1 "vsx_register_operand" "") + (match_operand:VSX_D 2 "vsx_register_operand" "")) + (parallel [(const_int 0) (const_int 2)])))] + "VECTOR_MEM_VSX_P (mode)" + "") + ;; V2DF/V2DI splat (define_insn "vsx_splat_" [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?wa,?wa,?wa") @@ -1247,40 +1301,24 @@ ;; V4SF/V4SI interleave (define_insn "vsx_xxmrghw_" [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") - (vec_merge:VSX_W - (vec_select:VSX_W - (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (vec_select:VSX_W - (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (const_int 5)))] + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") + (match_operand:VSX_W 2 "vsx_register_operand" "wf,wa")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "VECTOR_MEM_VSX_P (mode)" "xxmrghw %x0,%x1,%x2" [(set_attr "type" "vecperm")]) (define_insn "vsx_xxmrglw_" [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?wa") - (vec_merge:VSX_W - (vec_select:VSX_W - (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:VSX_W - (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] + (vec_select:VSX_W + (vec_concat: + (match_operand:VSX_W 1 "vsx_register_operand" "wf,wa") + (match_operand:VSX_W 2 "vsx_register_operand" "wf,?wa")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "VECTOR_MEM_VSX_P (mode)" "xxmrglw %x0,%x1,%x2" [(set_attr "type" "vecperm")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 83e73b2a9ba..41e3b40a6db 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2011-12-10 Richard Henderson + + * gcc.target/powerpc/altivec-perm-1.c: New. + * gcc.target/powerpc/altivec-perm-2.c: New. + * gcc.target/powerpc/altivec-perm-4.c: New. + 2011-12-10 Joern Rennecke * gcc.target/epiphany/interrupt.c: Add dg-options "-g". diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c b/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c new file mode 100644 index 00000000000..ee5c5eee900 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c @@ -0,0 +1,76 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O -maltivec -mno-vsx" } */ + +typedef unsigned char V __attribute__((vector_size(16))); + +V b1(V x) +{ + return __builtin_shuffle(x, (V){ 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, }); +} + +V b2(V x) +{ + return __builtin_shuffle(x, (V){ 2,3,2,3, 2,3,2,3, 2,3,2,3, 2,3,2,3, }); +} + +V b4(V x) +{ + return __builtin_shuffle(x, (V){ 4,5,6,7, 4,5,6,7, 4,5,6,7, 4,5,6,7, }); +} + +V p2(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); + +} + +V p4(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }); +} + +V h1(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); +} + +V h2(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 }); +} + +V h4(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 }); +} + +V l1(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +} + +V l2(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 }); +} + +V l4(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 }); +} + +/* { dg-final { scan-assembler-not "vperm" } } */ +/* { dg-final { scan-assembler "vspltb" } } */ +/* { dg-final { scan-assembler "vsplth" } } */ +/* { dg-final { scan-assembler "vspltw" } } */ +/* { dg-final { scan-assembler "vpkuhum" } } */ +/* { dg-final { scan-assembler "vpkuwum" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c b/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c new file mode 100644 index 00000000000..1b90bb9567c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O -maltivec -mno-vsx" } */ + +typedef unsigned short V __attribute__((vector_size(16))); + +V f2(V x) +{ + return __builtin_shuffle(x, (V){ 1,1,1,1, 1,1,1,1, }); +} + +V f4(V x) +{ + return __builtin_shuffle(x, (V){ 2,3,2,3, 2,3,2,3, }); +} + +/* { dg-final { scan-assembler-not "vperm" } } */ +/* { dg-final { scan-assembler "vsplth" } } */ +/* { dg-final { scan-assembler "vspltw" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c b/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c new file mode 100644 index 00000000000..9598edfb010 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O -maltivec -mno-vsx" } */ + +typedef unsigned int V __attribute__((vector_size(16))); + +V f4(V x) +{ + return __builtin_shuffle(x, (V){ 1,1,1,1, }); +} + +/* { dg-final { scan-assembler-not "vperm" } } */ +/* { dg-final { scan-assembler "vspltw" } } */ -- 2.43.5