This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Vectorize conversions directly


On 11/24/2010 06:23 PM, Dmitry Plotnikov wrote:
Hi,

This patch enables vector conversions for ARM NEON architecture. In its current state vectorizer can't handle type conversions in the hottest loop of libmp3lame on NEON since its backend doesn't have appropriate builtins for type conversion. For x86_64 and rs6000 architectures that also can vectorize conversions the default behavior is retained. We have rewritten condition in vectorizable_conversion() in tree-vect-stmts.c for the case of NONE modifier. Now It first looks in convert_optab for suitable operation and then in builtins. It's hard to make such fix in arm backend, because neon builtins are not saved and enumerated as it's done for x86_64 and rs6000. Bootstrapped and regtested on x86_64 without any regressions.

Ok for trunk? 4.7?


Sorry, I forgot to attach the patch.
2010-11-24  Dmitry Plotnikov  <dplotnikov@ispras.ru>

gcc/
	* tree-cfg.c (verify_gimple_assign_unary): Allow vector conversions.
	* tree-vect-stmts.c (supportable_convert_operation): New function.
	  (vectorizable_conversion): Call it.  Change condition and behavior 
	  for NONE modifier case.
	* tree-vectorizer.h (supportable_convert_operation): New prototype.
	* tree.h (VECTOR_INTEGER_TYPE_P): New macro.

gcc/config/arm/
	* neon.md (floatv2siv2sf2): New.
	  (floatunsv2siv2sf2): New.
	  (fix_truncv2sfv2si2): New.
	  (fix_truncunsv2sfv2si2): New.
	  (floatv4siv4sf2): New.
	  (floatunsv4siv4sf2): New.
	  (fix_truncv4sfv4si2): New.
	  (fix_truncunsv4sfv4si2): New.
	
gcc/testsuite/
	* gcc.target/arm/vect-vcvt.c: New test.
	* gcc.target/arm/vect-vcvtq.c: New test.

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 06bbc52..3eeb5a5 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -143,7 +143,9 @@
    (UNSPEC_VZIP2               204)
    (UNSPEC_MISALIGNED_ACCESS   205)
    (UNSPEC_VCLE                        206)
-   (UNSPEC_VCLT                        207)])
+   (UNSPEC_VCLT                        207)
+   (UNSPEC_FIXU                 208)
+   (UNSPEC_FLOATU               209)])
 
 
 ;; Attribute used to permit string comparisons against <VQH_mnem> in
@@ -3053,6 +3055,66 @@
   [(set_attr "neon_type" "neon_bp_simple")]
 )
 
+(define_insn "floatv2siv2sf2"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+       (fix:V2SF (match_operand:V2SI 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.f32.s32\t%P0, %P1"
+)
+
+(define_insn "floatunsv2siv2sf2"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+       (unspec:V2SF [(match_operand:V2SI 1 "s_register_operand" "w")] 
+                    UNSPEC_FLOATU))]
+  "TARGET_NEON"
+  "vcvt.f32.u32\t%P0, %P1"
+)
+
+(define_insn "fix_truncv2sfv2si2"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+        (fix:V2SI (match_operand:V2SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.s32.f32\t%P0, %P1"
+)
+
+(define_insn "fixuns_truncv2sfv2si2"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+        (unspec:V2SI [(match_operand:V2SF 1 "s_register_operand" "w")]
+                     UNSPEC_FIXU))]
+  "TARGET_NEON"
+  "vcvt.u32.f32\t%P0, %P1"
+)
+
+(define_insn "floatv4siv4sf2"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+       (fix:V4SF (match_operand:V4SI 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.f32.s32\t%q0, %q1"
+)
+
+(define_insn "floatunsv4siv4sf2"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+       (unspec:V4SF [(match_operand:V4SI 1 "s_register_operand" "w")]
+                    UNSPEC_FLOATU))]
+  "TARGET_NEON"
+  "vcvt.f32.u32\t%q0, %q1"
+)
+
+(define_insn "fix_truncv4sfv4si2"
+  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
+        (fix:V4SI (match_operand:V4SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.s32.f32\t%q0, %q1"
+)
+
+(define_insn "fixuns_truncv4sfv4si2"
+  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
+        (unspec:V4SI [(match_operand:V4SF 1 "s_register_operand" "w")]
+                     UNSPEC_FIXU))]
+  "TARGET_NEON"
+  "vcvt.u32.f32\t%q0, %q1"
+)
+
 (define_insn "neon_vcvt<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
        (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index bffa679..bf151eb 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3278,7 +3278,9 @@ verify_gimple_assign_unary (gimple stmt)
 
     case FLOAT_EXPR:
       {
-       if (!INTEGRAL_TYPE_P (rhs1_type) || !SCALAR_FLOAT_TYPE_P (lhs_type))
+       if ((!INTEGRAL_TYPE_P (rhs1_type) || !SCALAR_FLOAT_TYPE_P (lhs_type))
+           && (!VECTOR_INTEGER_TYPE_P (rhs1_type)
+               || !VECTOR_FLOAT_TYPE_P(lhs_type)))
          {
            error ("invalid types in conversion to floating point");
            debug_generic_expr (lhs_type);
@@ -3291,7 +3293,9 @@ verify_gimple_assign_unary (gimple stmt)
 
     case FIX_TRUNC_EXPR:
       {
-       if (!INTEGRAL_TYPE_P (lhs_type) || !SCALAR_FLOAT_TYPE_P (rhs1_type))
+       if ((!INTEGRAL_TYPE_P (lhs_type) || !SCALAR_FLOAT_TYPE_P (rhs1_type))
+           && (!VECTOR_INTEGER_TYPE_P (lhs_type)
+               || !VECTOR_FLOAT_TYPE_P(rhs1_type)))
          {
            error ("invalid types in conversion to integer");
            debug_generic_expr (lhs_type);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 3617ec3..94fbd11 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1640,6 +1640,59 @@ vect_gen_widened_results_half (enum tree_code code,
   return new_stmt;
 }
 
+/* Function supportable_convert_operation
+
+   Check whether an operation represented by the code CODE is a
+   convert operation that is supported by the target platform in
+   vector form (i.e., when operating on arguments of type VECTYPE_IN
+   producing a result of type VECTYPE_OUT).
+   
+   Convert operations we currently support directly are FIX_TRUNC and FLOAT.
+   This function checks if these operations are supported
+   by the target platform either directly (via vector tree-codes), or via
+   target builtins.
+   
+   Output:
+   - CODE1 is code of vector operation to be used when
+   vectorizing the operation, if available.
+   - DECL is decl of target builtin functions to be used
+   when vectorizing the operation, if available.  In this case,
+   CODE1 is CALL_EXPR.  */
+
+bool
+supportable_convert_operation (enum tree_code code,
+                                   tree vectype_out, tree vectype_in,
+                                   tree *decl, enum tree_code *code1)
+{
+  enum machine_mode m1,m2;
+  convert_optab optab1 = NULL;
+
+  /* First check if we can done conversion directly.  */
+  if (code == FIX_TRUNC_EXPR)
+    optab1 = (TYPE_UNSIGNED (vectype_out)) ? ufixtrunc_optab : sfixtrunc_optab;
+  else if (code == FLOAT_EXPR)
+    optab1 = (TYPE_UNSIGNED (vectype_in)) ? ufloat_optab : sfloat_optab;
+  
+  m1 = TYPE_MODE (vectype_in);
+  m2 = TYPE_MODE (vectype_out);
+
+  if (convert_optab_handler (optab1, m2, m1) != CODE_FOR_nothing)
+    {
+      *code1 = code;
+      return true;
+    }
+  
+  /* Now check for builtin.  */
+  if (targetm.vectorize.builtin_conversion
+      && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+    {
+      *code1 = CALL_EXPR;
+      *decl = targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in);
+      return true;
+    }
+  return false;
+}
+
 
 /* Check if STMT performs a conversion operation, that can be vectorized.
    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
@@ -1669,7 +1722,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
   tree vectype_out, vectype_in;
   int ncopies, j;
   tree rhs_type;
-  tree builtin_decl;
   enum { NARROW, NONE, WIDEN } modifier;
   int i;
   VEC(tree,heap) *vec_oprnds0 = NULL;
@@ -1758,7 +1810,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
 
   /* Supportable by target?  */
   if ((modifier == NONE
-       && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+       && !supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1))
       || (modifier == WIDEN
          && !supportable_widening_operation (code, stmt,
                                              vectype_out, vectype_in,
@@ -1808,19 +1860,28 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
          else
            vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
 
-         builtin_decl =
-           targetm.vectorize.builtin_conversion (code,
-                                                 vectype_out, vectype_in);
-         FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
-           {
-             /* Arguments are ready. create the new vector stmt.  */
-             new_stmt = gimple_build_call (builtin_decl, 1, vop0);
-             new_temp = make_ssa_name (vec_dest, new_stmt);
-             gimple_call_set_lhs (new_stmt, new_temp);
-             vect_finish_stmt_generation (stmt, new_stmt, gsi);
-             if (slp_node)
-               VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
-           }
+         FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
+         {
+           /* Arguments are ready, create the new vector stmt.  */
+            if (code1 == CALL_EXPR)
+             {
+               new_stmt = gimple_build_call (decl1, 1, vop0);
+               new_temp = make_ssa_name (vec_dest, new_stmt);
+               gimple_call_set_lhs (new_stmt, new_temp);
+             }
+           else
+              {
+                gcc_assert (TREE_CODE_LENGTH (code) == unary_op);
+                new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0,
+                                                        NULL);
+                new_temp = make_ssa_name (vec_dest, new_stmt);
+                gimple_assign_set_lhs (new_stmt, new_temp);
+             }
+
+           vect_finish_stmt_generation (stmt, new_stmt, gsi);
+            if (slp_node)
+              VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+         }
 
          if (j == 0)
            STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index f2a5889..c016963 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -815,6 +815,9 @@ extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *,
                                  bool *, slp_tree, slp_instance);
 extern void vect_remove_stores (gimple);
 extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
+extern bool supportable_convert_operation (enum tree_code, tree, tree,
+                                          tree *, enum tree_code *);
+
 extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
                                     tree, int);
 extern void vect_get_load_cost (struct data_reference *, int, bool,
diff --git a/gcc/tree.h b/gcc/tree.h
index 3877ae5..e4b4501 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1047,6 +1047,13 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
   (TREE_CODE (TYPE) == COMPLEX_TYPE    \
    && TREE_CODE (TREE_TYPE (TYPE)) == REAL_TYPE)
 
+/* Nonzero if TYPE represents a vector integer type.  */
+                
+#define VECTOR_INTEGER_TYPE_P(TYPE)                   \
+             (TREE_CODE (TYPE) == VECTOR_TYPE      \
+                 && TREE_CODE (TREE_TYPE (TYPE)) == INTEGER_TYPE)
+
+
 /* Nonzero if TYPE represents a vector floating-point type.  */
 
 #define VECTOR_FLOAT_TYPE_P(TYPE)      \
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
new file mode 100644
index 0000000..f33206c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -fdump-tree-vect-details" } */
+
+#include <stdarg.h>
+
+#define N 32
+
+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+float fa[N];
+int ia[N];
+
+int convert()
+{
+  int i;
+
+  /* int -> float */
+  for (i = 0; i < N; i++)
+    fa[i] = (float) ib[i];
+
+  /* float -> int */
+  for (i = 0; i < N; i++)
+    ia[i] = (int) fa[i];
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c b/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c
new file mode 100644
index 0000000..3412cf2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -fdump-tree-vect-details -mvectorize-with-neon-quad" } */
+
+#include <stdarg.h>
+
+#define N 32
+
+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+float fa[N];
+int ia[N];
+
+int convert()
+{
+  int i;
+
+  /* int -> float */
+  for (i = 0; i < N; i++)
+    fa[i] = (float) ib[i];
+
+  /* float -> int */
+  for (i = 0; i < N; i++)
+    ia[i] = (int) fa[i];
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]