This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch, spu] Implementation of vector unpack for Cell SPU


Hi,

This patch implements vector unpack operations (short to int and char to
short for both signed and unsigned types) for Cell SPU.

Tested on SPU. OK for 4.4?

Thanks,
Ira

ChangeLog:

      * config/spu/spu.md (vec_unpacku_hi_v8hi, vec_unpacku_lo_v8hi,
      vec_extendhisi2, vec_unpacks_hi_v8hi, vec_unpacks_lo_v8hi,
      vec_unpacku_hi_v16qi, vec_unpacku_lo_v16qi, vec_extendqihi2,
      vec_unpacks_lo_v16qi): Implement.

testsuite/ChangeLog:

      * lib/target-supports.exp (check_effective_target_vect_unpack):
      Return true for SPU.
      (check_effective_target_vect_short_mult): Likewise.
      * gcc.dg/vect/vect-reduc-dot-s16b.c: Expect vectorization of
      the loop on targets that support vect_unpack.

Index: config/spu/spu.md
===================================================================
--- config/spu/spu.md   (revision 131146)
+++ config/spu/spu.md   (working copy)
@@ -153,6 +153,14 @@
  (UNSPEC_SPU_REALIGN_LOAD 49)
  (UNSPEC_SPU_MASK_FOR_LOAD 50)
  (UNSPEC_DFTSV          51)
+ (UNSPEC_VUPKHUH         52)
+ (UNSPEC_VUPKLUH         53)
+ (UNSPEC_VUPKHSH         54)
+ (UNSPEC_VUPKLSH         55)
+ (UNSPEC_VUPKHUB        56))
+ (UNSPEC_VUPKLUB         57)
+ (UNSPEC_VUPKHSB         58)
+ (UNSPEC_VUPKLSB         59)
 ])

 (include "predicates.md")
@@ -4403,3 +4411,159 @@ selb\t%0,%4,%0,%3"

   DONE;,
 }")
+}
+(define_expand "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=r")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+                     UNSPEC_VUPKHUH))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x00, 0x01, 0x80, 0x80, 0x02, 0x03,
+    0x80, 0x80, 0x04, 0x05, 0x80, 0x80, 0x06, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+}")
+}
+(define_expand "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=r")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]);
+                     UNSPEC_VUPKLUH))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x80, 0x08, 0x09, 0x80, 0x80, 0x0A, 0x0B,
+    0x80, 0x80, 0x0C, 0x0D, 0x80, 0x80, 0x0E, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));
+
+  DONE;
+}")
+}
+(define_insn "vec_extendhisi2"
+  [(set (match_operand:V4SI 0 "register_operand" "=r")
+        (sign_extend:V4SI (match_operand:V4SI 1 "register_operand" "r")))]
+  ""
+  "xshw\t%0,%1")
+
+(define_expand "vec_unpacks_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=r")
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+                     UNSPEC_VUPKHSH))]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (V4SImode);)]
+  rtx tmp2 = gen_reg_rtx (V4SImode);)]
+
+  emit_insn (gen_vec_unpacku_hi_v8hi (tmp1, operands[1]));
+  emit_insn (gen_vec_extendhisi2 (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+}")
+}
+(define_expand "vec_unpacks_lo_v8hi";
+  [(set (match_operand:V4SI 0 "register_operand" "=r")]));
+        (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "r")]
+                     UNSPEC_VUPKLSH))]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (V4SImode);)]
+  rtx tmp2 = gen_reg_rtx (V4SImode);)]
+
+  emit_insn (gen_vec_unpacku_lo_v8hi (tmp1, operands[1]));
+  emit_insn (gen_vec_extendhisi2 (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+}")
+}
+(define_expand "vec_unpacku_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")]));
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+                     UNSPEC_VUPKHUB))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x00, 0x80, 0x01, 0x80, 0x02, 0x80, 0x03,
+    0x80, 0x04, 0x80, 0x05, 0x80, 0x06, 0x80, 0x07};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));)))]
+
+  DONE;
+}")
+}
+(define_expand "vec_unpacku_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")];)))]
+                     UNSPEC_VUPKLUB))]
+  ""
+  "
+{
+  rtx mask = gen_reg_rtx (TImode);
+  unsigned char arr[16] = {
+    0x80, 0x08, 0x80, 0x09, 0x80, 0x0A, 0x80, 0x0B,
+    0x80, 0x0C, 0x80, 0x0D, 0x80, 0x0E, 0x80, 0x0F};
+
+  emit_move_insn (mask, array_to_constant (TImode, arr));
+  emit_insn (gen_shufb (operands[0], operands[1], operands[1], mask));)))]
+
+  DONE;
+}")
+}
+(define_insn "vec_extendqihi2"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")
+        (sign_extend:V8HI (match_operand:V8HI 1 "register_operand" "r")))]
+  ""
+  "xsbh\t%0,%1")
+
+(define_expand "vec_unpacks_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+                     UNSPEC_VUPKHSB))]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);)]
+  rtx tmp2 = gen_reg_rtx (V8HImode);)]
+
+  emit_insn (gen_vec_unpacku_hi_v16qi (tmp1, operands[1]));
+  emit_insn (gen_vec_extendqihi2 (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+  DONE;
+}")
+}
+(define_expand "vec_unpacks_lo_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=r")
+        (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "r")]
+                     UNSPEC_VUPKLSB))]
+  ""
+  "
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);)]
+  rtx tmp2 = gen_reg_rtx (V8HImode);)]
+
+  emit_insn (gen_vec_unpacku_lo_v16qi (tmp1, operands[1]));
+  emit_insn (gen_vec_extendqihi2 (tmp2, tmp1));
+  emit_move_insn (operands[0], tmp2);
+
+DONE;
+}")
+}

Index: testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c
===================================================================
--- testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c (revision 131005)
+++ testsuite/gcc.dg/vect/vect-reduc-dot-s16b.c (working copy)
@@ -48,9 +48,9 @@ main (void)
   return 0;,
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target { vect_short_mult && vect_widen_sum_hi_to_si } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1
"vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si ||
vect_unpack } } } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { ! vect_short_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { ! vect_widen_sum_hi_to_si } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0
"vect" { target { { ! vect_widen_sum_hi_to_si } && { !
vect_unpack } } } } } */

 /* { dg-final { cleanup-tree-dump "vect" } } */

Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp   (revision 131005)
+++ testsuite/lib/target-supports.exp   (working copy)
@@ -1632,7 +1632,8 @@ proc check_effective_target_vect_unpack
         set et_vect_unpack_saved 0
         if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*])
              || [istarget i?86-*-*]
-             || [istarget x86_64-*-*] } {!
+             || [istarget x86_64-*-*]
+             || [istarget spu-*-*] } {}
             set et_vect_unpack_saved 1}
         }
     }
@@ -1841,6 +1842,7 @@ proc check_effective_target_vect_short_m
     } else {
        set et_vect_short_mult_saved 0
        if { [istarget ia64-*-*]
+            || [istarget spu-*-*]
             || [istarget i?86-*-*]
             || [istarget x86_64-*-*] } {
           set et_vect_short_mult_saved 1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]