This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch, spu] Implementation of vector unpack for Cell SPU
- From: Ira Rosen <IRAR at il dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 24 Dec 2007 10:39:57 +0200
- Subject: [patch, spu] Implementation of vector unpack for Cell SPU
Hi,
This patch implements vector unpack operations (short to int and char to
short for both signed and unsigned types) for Cell SPU.
Tested on SPU. OK for 4.4?
Thanks,
Ira
ChangeLog:
* config/spu/spu.md (vec_unpacku_hi_v8hi, vec_unpacku_lo_v8hi,
vec_extendhisi2, vec_unpacks_hi_v8hi, vec_unpacks_lo_v8hi,
vec_unpacku_hi_v16qi, vec_unpacku_lo_v16qi, vec_extendqihi2,
vec_unpacks_lo_v16qi): Implement.
testsuite/ChangeLog:
* lib/target-supports.exp (check_effective_target_vect_unpack):
Return true for SPU.
(check_effective_target_vect_short_mult): Likewise.
* gcc.dg/vect/vect-reduc-dot-s16b.c: Expect vectorization of
the loop on targets that support vect_unpack.
Index: config/spu/spu.md
===================================================================
--- config/spu/spu.md (revision 131146)
+++ config/spu/spu.md (working copy)
@@ -153,6 +153,14 @@
(UNSPEC_SPU_REALIGN_LOAD 49)
(UNSPEC_SPU_MASK_FOR_LOAD 50)
(UNSPEC_DFTSV 51)
+ (UNSPEC_VUPKHUH 52)
+ (UNSPEC_VUPKLUH 53)
+ (UNSPEC_VUPKHSH 54)
+ (UNSPEC_VUPKLSH 55)
+ (UNSPEC_VUPKHUB 56))
+ (UNSPEC_VUPKLUB 57)
+ (UNSPEC_VUPKHSB 58)
+ (UNSPEC_VUPKLSB 59)
])
(include "predicates.md")
@@ -4403,3 +4411,159 @@ selb\t%0,%4,%0,%3"
DONE;,
}")
+}
+(define_expand "vec_unpacku_hi_v8hi"
+ [(set (match_operand:V4SI 0 "register_operand" "=r")
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+ UNSPEC_VUPKHUH))]
+ ""
+ "
+{
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+ 0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+ DONE;
+}")
+}
+(define_expand "vec_unpacku_lo_v8hi"
+ [(set (match_operand:V4SI 0 "register_operand" "=r")
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]);
+ UNSPEC_VUPKLUH))]
+ ""
+ "
+{
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+ 0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+ DONE;
+}")
+}
+(define_insn "vec_extendhisi2"
+ [(set (match_operand:V4SI 0 "register_operand" "=r")
+ (sign_extend:V4SI (match_operand:V4SI 1 "register_operand" "r")))]
+ ""
+ "xshw\t%0,%1")
+
+(define_expand "vec_unpacks_hi_v8hi"
+ [(set (match_operand:V4SI 0 "register_operand" "=r")
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+ UNSPEC_VUPKHSH))]
+ ""
+ "
+{
+ rtx tmp1 = gen_reg_rtx (V4SImode);)]
+ rtx tmp2 = gen_reg_rtx (V4SImode);)]
+
+ emit_insn (gen_vec_unpacku_hi_v8hi (tmp1, operands[1]));
+ emit_insn (gen_vec_extendhisi2 (tmp2, tmp1));
+ emit_move_insn (operands[0], tmp2);
+
+ DONE;
+}")
+}
+(define_expand "vec_unpacks_lo_v8hi";
+ [(set (match_operand:V4SI 0 "register_operand" "=r")]));
+ (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+ UNSPEC_VUPKLSH))]
+ ""
+ "
+{
+ rtx tmp1 = gen_reg_rtx (V4SImode);)]
+ rtx tmp2 = gen_reg_rtx (V4SImode);)]
+
+ emit_insn (gen_vec_unpacku_lo_v8hi (tmp1, operands[1]));
+ emit_insn (gen_vec_extendhisi2 (tmp2, tmp1));
+ emit_move_insn (operands[0], tmp2);
+
+DONE;
+}")
+}
+(define_expand "vec_unpacku_hi_v16qi"
+ [(set (match_operand:V8HI 0 "register_operand" "=r")]));
+ (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+ UNSPEC_VUPKHUB))]
+ ""
+ "
+{
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+ 0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));)))]
+
+ DONE;
+}")
+}
+(define_expand "vec_unpacku_lo_v16qi"
+ [(set (match_operand:V8HI 0 "register_operand" "=r")
+ (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")];)))]
+ UNSPEC_VUPKLUB))]
+ ""
+ "
+{
+ rtx mask = gen_reg_rtx (TImode);
+ unsigned char arr[16] = {
+ 0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+ 0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+ emit_move_insn (mask, array_to_constant (TImode, arr));
+ emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));)))]
+
+ DONE;
+}")
+}
+(define_insn "vec_extendqihi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=r")
+ (sign_extend:V8HI (match_operand:V8HI 1 "register_operand" "r")))]
+ ""
+ "xsbh\t%0,%1")
+
+(define_expand "vec_unpacks_hi_v16qi"
+ [(set (match_operand:V8HI 0 "register_operand" "=r")
+ (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+ UNSPEC_VUPKHSB))]
+ ""
+ "
+{
+ rtx tmp1 = gen_reg_rtx (V8HImode);)]
+ rtx tmp2 = gen_reg_rtx (V8HImode);)]
+
+ emit_insn (gen_vec_unpacku_hi_v16qi (tmp1, operands[1]));
+ emit_insn (gen_vec_extendqihi2 (tmp2, tmp1));
+ emit_move_insn (operands[0], tmp2);
+
+ DONE;
+}")
+}
+(define_expand "vec_unpacks_lo_v16qi"
+ [(set (match_operand:V8HI 0 "register_operand" "=r")
+ (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+ UNSPEC_VUPKLSB))]
+ ""
+ "
+{
+ rtx tmp1 = gen_reg_rtx (V8HImode);)]
+ rtx tmp2 = gen_reg_rtx (V8HImode);)]
+
+ emit_insn (gen_vec_unpacku_lo_v16qi (tmp1, operands[1]));
+ emit_insn (gen_vec_extendqihi2 (tmp2, tmp1));
+ emit_move_insn (operands[0], tmp2);
+
+DONE;
+}")
+}
Index: testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c
===================================================================
--- testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c (revision 131005)
+++ testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c (working copy)
@@ -48,9 +48,9 @@ main (void)
return 0;,
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target { vect_short_mult && vect_widen_sum_hi_to_si } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si ||
vect_unpack } } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { ! vect_short_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { ! vect_widen_sum_hi_to_si } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { { ! vect_widen_sum_hi_to_si } && { !
vect_unpack } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp (revision 131005)
+++ testsuite/lib/target-supports.exp (working copy)
@@ -1632,7 +1632,8 @@ proc check_effective_target_vect_unpack
set et_vect_unpack_saved 0
if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*])
|| [istarget i?86-*-*]
- || [istarget x86_64-*-*] } {!
+ || [istarget x86_64-*-*]
+ || [istarget spu-*-*] } {}
set et_vect_unpack_saved 1}
}
}
@@ -1841,6 +1842,7 @@ proc check_effective_target_vect_short_m
} else {
set et_vect_short_mult_saved 0
if { [istarget ia64-*-*]
+ || [istarget spu-*-*]
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_short_mult_saved 1