This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][PING] Vectorize conversions directly


Original discussion here: http://gcc.gnu.org/ml/gcc-patches/2010-12/msg00751.html

This patch enables vector conversions for ARM NEON architecture. In its current state vectorizer can't handle type conversions in the hottest loop of libmp3lame on NEON since its backend doesn't have appropriate builtins for type conversion. For x86_64 and rs6000 architectures that also can vectorize conversions the default behavior is retained. We have rewritten condition in vectorizable_conversion() in tree-vect-stmts.c for the case of NONE modifier. Now It first looks in convert_optab for suitable operation and then in builtins.

Regtested with arm-qemu ok.
Initially few tests failed (gcc.dg/vect/slp-10.c, gcc.dg/vect/slp-11c.c, gcc.dg/vect/slp-33.c, gcc.dg/vect/fast-math-pr35982.c) because now it vectorizes more loops than they expected to. We adjusted target-supports.exp so vectorizable conversions and even/odd extractions are now supported for NEON.
Ok for trunk?


2011-10-20 Dmitry Plotnikov <dplotnikov@ispras.ru>

gcc/
    * tree-cfg.c (verify_gimple_assign_unary): Allow vector conversions.
    * tree-vect-stmts.c (supportable_convert_operation): New function.
      (vectorizable_conversion): Call it.  Change condition and behavior
      for NONE modifier case.
    * tree-vectorizer.h (supportable_convert_operation): New prototype.
    * tree.h (VECTOR_INTEGER_TYPE_P): New macro.

gcc/config/arm/
    * neon.md (floatv2siv2sf2): New.
      (floatunsv2siv2sf2): New.
      (fix_truncv2sfv2si2): New.
      (fix_truncunsv2sfv2si2): New.
      (floatv4siv4sf2): New.
      (floatunsv4siv4sf2): New.
      (fix_truncv4sfv4si2): New.
      (fix_truncunsv4sfv4si2): New.

gcc/testsuite/
    * gcc.target/arm/vect-vcvt.c: New test.
    * gcc.target/arm/vect-vcvtq.c: New test.

gcc/testsuite/lib/
    * target-supports.exp (check_effective_target_vect_intfloat_cvt): True
      for ARM NEON.
      (check_effective_target_vect_uintfloat_cvt): Likewise.
      (check_effective_target_vect_intfloat_cvt): Likewise.
      (check_effective_target_vect_floatuint_cvt): Likewise.
      (check_effective_target_vect_floatint_cvt): Likewise.
      (check_effective_target_vect_extract_even_odd): Likewise.


diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index ea09da2..0dd13a6 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2945,6 +2945,62 @@
                    (const_string "neon_fp_vadd_qqq_vabs_qq")))]
 )
 
+(define_insn "floatv2siv2sf2"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+       (float:V2SF (match_operand:V2SI 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.f32.s32\t%P0, %P1"
+)
+
+(define_insn "floatunsv2siv2sf2"
+  [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+       (unsigned_float:V2SF (match_operand:V2SI 1 "s_register_operand" "w")))] 
+  "TARGET_NEON"
+  "vcvt.f32.u32\t%P0, %P1"
+)
+
+(define_insn "fix_truncv2sfv2si2"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+        (fix:V2SI (match_operand:V2SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.s32.f32\t%P0, %P1"
+)
+
+(define_insn "fixuns_truncv2sfv2si2"
+  [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+        (unsigned_fix:V2SI (match_operand:V2SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.u32.f32\t%P0, %P1"
+)
+
+(define_insn "floatv4siv4sf2"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+       (float:V4SF (match_operand:V4SI 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.f32.s32\t%q0, %q1"
+)
+
+(define_insn "floatunsv4siv4sf2"
+  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
+       (unsigned_float:V4SF (match_operand:V4SI 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.f32.u32\t%q0, %q1"
+)
+
+(define_insn "fix_truncv4sfv4si2"
+  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
+        (fix:V4SI (match_operand:V4SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.s32.f32\t%q0, %q1"
+)
+
+(define_insn "fixuns_truncv4sfv4si2"
+  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
+        (unsigned_fix:V4SI (match_operand:V4SF 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vcvt.u32.f32\t%q0, %q1"
+)
+
 (define_insn "neon_vcvt<mode>"
   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
 	(unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index a3b5311..c785b0c 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1806,7 +1806,9 @@ proc check_effective_target_vect_intfloat_cvt { } {
         if { [istarget i?86-*-*]
               || ([istarget powerpc*-*-*]
                    && ![istarget powerpc-*-linux*paired*])
-              || [istarget x86_64-*-*] } {
+              || [istarget x86_64-*-*] 
+              || ([istarget arm*-*-*]
+                  && [check_effective_target_arm_neon_ok])} {
            set et_vect_intfloat_cvt_saved 1
         }
     }
@@ -1842,7 +1844,9 @@ proc check_effective_target_vect_uintfloat_cvt { } {
         if { [istarget i?86-*-*]
 	      || ([istarget powerpc*-*-*]
 		  && ![istarget powerpc-*-linux*paired*])
-	      || [istarget x86_64-*-*] } {
+	      || [istarget x86_64-*-*] 
+              || ([istarget arm*-*-*]
+                  && [check_effective_target_arm_neon_ok])} {
            set et_vect_uintfloat_cvt_saved 1
         }
     }
@@ -1865,7 +1869,9 @@ proc check_effective_target_vect_floatint_cvt { } {
         if { [istarget i?86-*-*]
               || ([istarget powerpc*-*-*]
                    && ![istarget powerpc-*-linux*paired*])
-              || [istarget x86_64-*-*] } {
+              || [istarget x86_64-*-*]
+              || ([istarget arm*-*-*]
+                  && [check_effective_target_arm_neon_ok])} {
            set et_vect_floatint_cvt_saved 1
         }
     }
@@ -1885,7 +1891,9 @@ proc check_effective_target_vect_floatuint_cvt { } {
     } else {
         set et_vect_floatuint_cvt_saved 0
         if { ([istarget powerpc*-*-*]
-	      && ![istarget powerpc-*-linux*paired*]) } {
+	      && ![istarget powerpc-*-linux*paired*]) 
+             || ([istarget arm*-*-*]
+                 && [check_effective_target_arm_neon_ok])} {
            set et_vect_floatuint_cvt_saved 1
         }
     }
@@ -3335,7 +3343,9 @@ proc check_effective_target_vect_extract_even_odd { } {
              || [istarget i?86-*-*]
              || [istarget x86_64-*-*]
              || [istarget ia64-*-*]
-             || [istarget spu-*-*] } {
+             || [istarget spu-*-*] 
+             || ([istarget arm*-*-*]
+                  && [check_effective_target_arm_neon_ok])} {
            set et_vect_extract_even_odd_saved 1
         }
     }
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index bcf71b9..1f3f10a 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3342,7 +3342,9 @@ verify_gimple_assign_unary (gimple stmt)
 
     case FLOAT_EXPR:
       {
-	if (!INTEGRAL_TYPE_P (rhs1_type) || !SCALAR_FLOAT_TYPE_P (lhs_type))
+	if ((!INTEGRAL_TYPE_P (rhs1_type) || !SCALAR_FLOAT_TYPE_P (lhs_type))
+	    && (!VECTOR_INTEGER_TYPE_P (rhs1_type)
+	        || !VECTOR_FLOAT_TYPE_P(lhs_type)))
 	  {
 	    error ("invalid types in conversion to floating point");
 	    debug_generic_expr (lhs_type);
@@ -3355,7 +3357,9 @@ verify_gimple_assign_unary (gimple stmt)
 
     case FIX_TRUNC_EXPR:
       {
-	if (!INTEGRAL_TYPE_P (lhs_type) || !SCALAR_FLOAT_TYPE_P (rhs1_type))
+        if ((!INTEGRAL_TYPE_P (lhs_type) || !SCALAR_FLOAT_TYPE_P (rhs1_type))
+            && (!VECTOR_INTEGER_TYPE_P (lhs_type)
+                || !VECTOR_FLOAT_TYPE_P(rhs1_type)))
 	  {
 	    error ("invalid types in conversion to integer");
 	    debug_generic_expr (lhs_type);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index f2ac8c7..ecf6a91 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1821,6 +1821,59 @@ vect_gen_widened_results_half (enum tree_code code,
   return new_stmt;
 }
 
+/* Function supportable_convert_operation
+
+   Check whether an operation represented by the code CODE is a
+   convert operation that is supported by the target platform in
+   vector form (i.e., when operating on arguments of type VECTYPE_IN
+   producing a result of type VECTYPE_OUT).
+   
+   Convert operations we currently support directly are FIX_TRUNC and FLOAT.
+   This function checks if these operations are supported
+   by the target platform either directly (via vector tree-codes), or via
+   target builtins.
+   
+   Output:
+   - CODE1 is code of vector operation to be used when
+   vectorizing the operation, if available.
+   - DECL is decl of target builtin functions to be used
+   when vectorizing the operation, if available.  In this case,
+   CODE1 is CALL_EXPR.  */
+
+bool
+supportable_convert_operation (enum tree_code code,
+                                   tree vectype_out, tree vectype_in,
+                                   tree *decl, enum tree_code *code1)
+{
+  enum machine_mode m1,m2;
+  convert_optab optab1 = NULL;
+
+  /* First check if we can do the conversion directly.  */
+  if (code == FIX_TRUNC_EXPR)
+    optab1 = (TYPE_UNSIGNED (vectype_out)) ? ufixtrunc_optab : sfixtrunc_optab;
+  else if (code == FLOAT_EXPR)
+    optab1 = (TYPE_UNSIGNED (vectype_in)) ? ufloat_optab : sfloat_optab;
+  
+  m1 = TYPE_MODE (vectype_in);
+  m2 = TYPE_MODE (vectype_out);
+
+  if (convert_optab_handler (optab1, m2, m1) != CODE_FOR_nothing)
+    {
+      *code1 = code;
+      return true;
+    }
+  
+  /* Now check for builtin.  */
+  if (targetm.vectorize.builtin_conversion
+      && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+    {
+      *code1 = CALL_EXPR;
+      *decl = targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in);
+      return true;
+    }
+  return false;
+}
+
 
 /* Check if STMT performs a conversion operation, that can be vectorized.
    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
@@ -1850,7 +1903,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
   tree vectype_out, vectype_in;
   int ncopies, j;
   tree rhs_type;
-  tree builtin_decl;
   enum { NARROW, NONE, WIDEN } modifier;
   int i;
   VEC(tree,heap) *vec_oprnds0 = NULL;
@@ -1939,7 +1991,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
 
   /* Supportable by target?  */
   if ((modifier == NONE
-       && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+       && !supportable_convert_operation (code, vectype_out, vectype_in, &decl1, &code1))
       || (modifier == WIDEN
 	  && !supportable_widening_operation (code, stmt,
 					      vectype_out, vectype_in,
@@ -1989,19 +2041,28 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
 	  else
 	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
 
-	  builtin_decl =
-	    targetm.vectorize.builtin_conversion (code,
-						  vectype_out, vectype_in);
 	  FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
-	    {
-	      /* Arguments are ready. create the new vector stmt.  */
-	      new_stmt = gimple_build_call (builtin_decl, 1, vop0);
-	      new_temp = make_ssa_name (vec_dest, new_stmt);
-	      gimple_call_set_lhs (new_stmt, new_temp);
-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-	      if (slp_node)
-		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
-	    }
+          {
+            /* Arguments are ready, create the new vector stmt.  */
+            if (code1 == CALL_EXPR)
+              {
+                new_stmt = gimple_build_call (decl1, 1, vop0);
+                new_temp = make_ssa_name (vec_dest, new_stmt);
+                gimple_call_set_lhs (new_stmt, new_temp);
+              }
+            else
+              {
+                gcc_assert (TREE_CODE_LENGTH (code) == unary_op);
+                new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0,
+                                                        NULL);
+                new_temp = make_ssa_name (vec_dest, new_stmt);
+                gimple_assign_set_lhs (new_stmt, new_temp);
+              }
+
+            vect_finish_stmt_generation (stmt, new_stmt, gsi);
+            if (slp_node)
+              VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
+          }
 
 	  if (j == 0)
 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index f22add6..d1d1835 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -818,6 +818,9 @@ extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *,
                                  bool *, slp_tree, slp_instance);
 extern void vect_remove_stores (gimple);
 extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
+extern bool supportable_convert_operation (enum tree_code, tree, tree,
+                                          tree *, enum tree_code *);
+
 extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
                                     tree, int);
 extern void vect_get_load_cost (struct data_reference *, int, bool,
diff --git a/gcc/tree.h b/gcc/tree.h
index 18fdd07..537e54b 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1120,6 +1120,13 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
   (TREE_CODE (TYPE) == COMPLEX_TYPE	\
    && TREE_CODE (TREE_TYPE (TYPE)) == REAL_TYPE)
 
+/* Nonzero if TYPE represents a vector integer type.  */
+                
+#define VECTOR_INTEGER_TYPE_P(TYPE)                   \
+             (TREE_CODE (TYPE) == VECTOR_TYPE      \
+                 && TREE_CODE (TREE_TYPE (TYPE)) == INTEGER_TYPE)
+
+
 /* Nonzero if TYPE represents a vector floating-point type.  */
 
 #define VECTOR_FLOAT_TYPE_P(TYPE)	\
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
new file mode 100644
index 0000000..f33206c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -fdump-tree-vect-details" } */
+
+#define N 32
+
+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+float fa[N];
+int ia[N];
+
+int convert()
+{
+  int i;
+
+  /* int -> float */
+  for (i = 0; i < N; i++)
+    fa[i] = (float) ib[i];
+
+  /* float -> int */
+  for (i = 0; i < N; i++)
+    ia[i] = (int) fa[i];
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c b/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c
new file mode 100644
index 0000000..3412cf2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -fdump-tree-vect-details -mvectorize-with-neon-quad" } */
+
+#define N 32
+
+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+float fa[N];
+int ia[N];
+
+int convert()
+{
+  int i;
+
+  /* int -> float */
+  for (i = 0; i < N; i++)
+    fa[i] = (float) ib[i];
+
+  /* float -> int */
+  for (i = 0; i < N; i++)
+    ia[i] = (int) fa[i];
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]