[patch, spu] Implementation of vector unpack for Cell SPU

Ira Rosen IRAR@il.ibm.com
Mon Dec 24 12:44:00 GMT 2007


Hi,

This patch implements vector unpack operations (short to int and char to
short for both signed and unsigned types) for Cell SPU.

Tested on SPU. OK for 4.4?

Thanks,
Ira

ChangeLog:

      * config/spu/spu.md (vec_unpacku_hi_v8hi, vec_unpacku_lo_v8hi,
      vec_extendhisi2, vec_unpacks_hi_v8hi, vec_unpacks_lo_v8hi,
      vec_unpacku_hi_v16qi, vec_unpacku_lo_v16qi, vec_extendqihi2,
      vec_unpacks_lo_v16qi): Implement.

testsuite/ChangeLog:

      * lib/target-supports.exp (check_effective_target_vect_unpack):
      Return true for SPU.
      (check_effective_target_vect_short_mult): Likewise.
      * gcc.dg/vect/vect-reduc-dot-s16b.c: Expect vectorization of
      the loop on targets that support vect_unpack.

Index: config/spu/spu.md
===================================================================
--- config/spu/spu.md   (revision 131146)
+++ config/spu/spu.md   (working copy)
@@ -153,6 +153,14 @@
  (UNSPEC_SPU_REALIGN_LOAD 49)
  (UNSPEC_SPU_MASK_FOR_LOAD 50)
  (UNSPEC_DFTSV          51)
+ (UNSPEC_VUPKHUH         52)
+ (UNSPEC_VUPKLUH         53)
+ (UNSPEC_VUPKHSH         54)
+ (UNSPEC_VUPKLSH         55)
+ (UNSPEC_VUPKHUB        56))
+ (UNSPEC_VUPKLUB         57)
+ (UNSPEC_VUPKHSB         58)
+ (UNSPEC_VUPKLSB         59)
 ])

 (include "predicates.md")
@@ -4403,3 +4411,159 @@ selb\t%0,%4,%0,%3"

   DONE;,
 }")
+}
+(define_expand "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=r")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+                     UNSPEC_VUPKHUH))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+}")
+}
+(define_expand "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=r")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]);
+                     UNSPEC_VUPKLUH))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+}")
+}
+(define_insn "vec_extendhisi2"
+  [(set (match_operand:V4SI 0 "register_operand" "=r")
+        (sign_extend:V4SI (match_operand:V4SI 1 "register_operand" "r")))]
+  ""
+  "xshw\t%0,%1")
+
+(define_expand "vec_unpacks_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=r")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+                     UNSPEC_VUPKHSH))]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (V4SImode);)]
+  rtx tmp2 = gen_reg_rtx (V4SImode);)]
+
+  emit_insn (gen_vec_unpacku_hi_v8hi (tmp1, operands[1]));
+  emit_insn (gen_vec_extendhisi2 (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+}")
+}
+(define_expand "vec_unpacks_lo_v8hi";
+  [(set (match_operand:V4SI 0 "register_operand" "=r")]));
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+                     UNSPEC_VUPKLSH))]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (V4SImode);)]
+  rtx tmp2 = gen_reg_rtx (V4SImode);)]
+
+  emit_insn (gen_vec_unpacku_lo_v8hi (tmp1, operands[1]));
+  emit_insn (gen_vec_extendhisi2 (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+}")
+}
+(define_expand "vec_unpacku_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")]));
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+                     UNSPEC_VUPKHUB))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));)))]
+
+  DONE;
+}")
+}
+(define_expand "vec_unpacku_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")];)))]
+                     UNSPEC_VUPKLUB))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));)))]
+
+  DONE;
+}")
+}
+(define_insn "vec_extendqihi2"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")
+        (sign_extend:V8HI (match_operand:V8HI 1 "register_operand" "r")))]
+  ""
+  "xsbh\t%0,%1")
+
+(define_expand "vec_unpacks_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+                     UNSPEC_VUPKHSB))]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);)]
+  rtx tmp2 = gen_reg_rtx (V8HImode);)]
+
+  emit_insn (gen_vec_unpacku_hi_v16qi (tmp1, operands[1]));
+  emit_insn (gen_vec_extendqihi2 (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+}")
+}
+(define_expand "vec_unpacks_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+                     UNSPEC_VUPKLSB))]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);)]
+  rtx tmp2 = gen_reg_rtx (V8HImode);)]
+
+  emit_insn (gen_vec_unpacku_lo_v16qi (tmp1, operands[1]));
+  emit_insn (gen_vec_extendqihi2 (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+}")
+}

Index: testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c
===================================================================
--- testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c (revision 131005)
+++ testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c (working copy)
@@ -48,9 +48,9 @@ main (void)
   return 0;,
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target { vect_short_mult && vect_widen_sum_hi_to_si } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si ||
vect_unpack } } } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { ! vect_short_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { ! vect_widen_sum_hi_to_si } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { { ! vect_widen_sum_hi_to_si } && { !
vect_unpack } } } } } */

 /* { dg-final { cleanup-tree-dump "vect" } } */

Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp   (revision 131005)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -1632,7 +1632,8 @@ proc check_effective_target_vect_unpack
         set et_vect_unpack_saved 0
         if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*])
              || [istarget i?86-*-*]
-             || [istarget x86_64-*-*] } {!
+             || [istarget x86_64-*-*]
+             || [istarget spu-*-*] } {}
             set et_vect_unpack_saved 1}
         }
     }
@@ -1841,6 +1842,7 @@ proc check_effective_target_vect_short_m
     } else {
        set et_vect_short_mult_saved 0
        if { [istarget ia64-*-*]
+            || [istarget spu-*-*]
             || [istarget i?86-*-*]
             || [istarget x86_64-*-*] } {
           set et_vect_short_mult_saved 1



More information about the Gcc-patches mailing list