This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][ARM] Vectorize bswap[16,32,64] operations


Hi all,

This is the aarch32 version of https://gcc.gnu.org/ml/gcc-patches/2014-04/msg00850.html that allows us to (auto-)vectorise the __builtin_bswap[16,32,64] functions using the vrev instructions. For that we create some new NEON builtins and get arm_builtin_vectorized_function to map to them when asked to vectorise the corresponding builtin.

The tests for this were added with the aarch64 patch mentioned above but were disabled for arm. This patch enables them now that we support the operations (of course they now pass on arm)

Tested arm-none-eabi and bootstrapped on arm-none-linux-gnueabihf.

Ok for trunk?


Thanks,
Kyrill

2014-05-16  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

    * config/arm/neon.md (neon_bswap<mode>): New pattern.
    * config/arm/arm.c (neon_itype): Add NEON_BSWAP.
    (arm_init_neon_builtins): Handle NEON_BSWAP.
    Define required type nodes.
    (arm_expand_neon_builtin): Handle NEON_BSWAP.
    (arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins.
    * config/arm/arm_neon_builtins.def (bswap): Define builtins.
    * config/arm/iterators.md (VDQHSD): New mode iterator.

2014-05-16  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

    * lib/target-supports.exp (check_effective_target_vect_bswap):
    Specify arm*-*-* support.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1e44080..6d430a2 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -23064,6 +23064,7 @@ typedef enum {
   NEON_BINOP,
   NEON_TERNOP,
   NEON_UNOP,
+  NEON_BSWAP,
   NEON_GETLANE,
   NEON_SETLANE,
   NEON_CREATE,
@@ -23531,14 +23532,19 @@ arm_init_neon_builtins (void)
 
   tree V8QI_type_node;
   tree V4HI_type_node;
+  tree V4UHI_type_node;
   tree V4HF_type_node;
   tree V2SI_type_node;
+  tree V2USI_type_node;
   tree V2SF_type_node;
   tree V16QI_type_node;
   tree V8HI_type_node;
+  tree V8UHI_type_node;
   tree V4SI_type_node;
+  tree V4USI_type_node;
   tree V4SF_type_node;
   tree V2DI_type_node;
+  tree V2UDI_type_node;
 
   tree intUQI_type_node;
   tree intUHI_type_node;
@@ -23634,16 +23640,26 @@ arm_init_neon_builtins (void)
   const_intDI_pointer_node = build_pointer_type (const_intDI_node);
   const_float_pointer_node = build_pointer_type (const_float_node);
 
+  /* Unsigned integer types for various mode sizes.  */
+  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
+  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
+  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
+  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
+  neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
   /* Now create vector types based on our NEON element types.  */
   /* 64-bit vectors.  */
   V8QI_type_node =
     build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
   V4HI_type_node =
     build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
+  V4UHI_type_node =
+    build_vector_type_for_mode (intUHI_type_node, V4HImode);
   V4HF_type_node =
     build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
   V2SI_type_node =
     build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
+  V2USI_type_node =
+    build_vector_type_for_mode (intUSI_type_node, V2SImode);
   V2SF_type_node =
     build_vector_type_for_mode (neon_float_type_node, V2SFmode);
   /* 128-bit vectors.  */
@@ -23651,19 +23667,18 @@ arm_init_neon_builtins (void)
     build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
   V8HI_type_node =
     build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
+  V8UHI_type_node =
+    build_vector_type_for_mode (intUHI_type_node, V8HImode);
   V4SI_type_node =
     build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
+  V4USI_type_node =
+    build_vector_type_for_mode (intUSI_type_node, V4SImode);
   V4SF_type_node =
     build_vector_type_for_mode (neon_float_type_node, V4SFmode);
   V2DI_type_node =
     build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
-
-  /* Unsigned integer types for various mode sizes.  */
-  intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
-  intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
-  intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
-  intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
-  neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
+  V2UDI_type_node =
+    build_vector_type_for_mode (intUDI_type_node, V2DImode);
 
 
   (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
@@ -23741,8 +23756,6 @@ arm_init_neon_builtins (void)
 
   if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
   {
-    tree V4USI_type_node =
-      build_vector_type_for_mode (intUSI_type_node, V4SImode);
 
     tree V16UQI_type_node =
       build_vector_type_for_mode (intUQI_type_node, V16QImode);
@@ -24106,6 +24119,31 @@ arm_init_neon_builtins (void)
 	    ftype = build_function_type_list (return_type, eltype, NULL);
 	    break;
 	  }
+	case NEON_BSWAP:
+	{
+	    tree eltype = NULL_TREE;
+	    switch (insn_data[d->code].operand[1].mode)
+	    {
+	      case V4HImode:
+	        eltype = V4UHI_type_node;
+	        break;
+	      case V8HImode:
+	        eltype = V8UHI_type_node;
+	        break;
+	      case V2SImode:
+	        eltype = V2USI_type_node;
+	        break;
+	      case V4SImode:
+	        eltype = V4USI_type_node;
+	        break;
+	      case V2DImode:
+	        eltype = V2UDI_type_node;
+	        break;
+	      default: gcc_unreachable ();
+	    }
+	    ftype = build_function_type_list (eltype, eltype, NULL);
+	    break;
+	}
 	default:
 	  gcc_unreachable ();
 	}
@@ -25280,6 +25318,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
     case NEON_SPLIT:
     case NEON_FLOAT_WIDEN:
     case NEON_FLOAT_NARROW:
+    case NEON_BSWAP:
     case NEON_REINTERP:
       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
@@ -29783,8 +29822,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
   int in_n, out_n;
 
   if (TREE_CODE (type_out) != VECTOR_TYPE
-      || TREE_CODE (type_in) != VECTOR_TYPE
-      || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
+      || TREE_CODE (type_in) != VECTOR_TYPE)
     return NULL_TREE;
 
   out_mode = TYPE_MODE (TREE_TYPE (type_out));
@@ -29796,7 +29834,13 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
    decl of the vectorized builtin for the appropriate vector mode.
    NULL_TREE is returned if no such builtin is available.  */
 #undef ARM_CHECK_BUILTIN_MODE
-#define ARM_CHECK_BUILTIN_MODE(C) \
+#define ARM_CHECK_BUILTIN_MODE(C)    \
+  (TARGET_NEON && TARGET_FPU_ARMV8   \
+   && flag_unsafe_math_optimizations \
+   && ARM_CHECK_BUILTIN_MODE_1 (C))
+
+#undef ARM_CHECK_BUILTIN_MODE_1
+#define ARM_CHECK_BUILTIN_MODE_1(C) \
   (out_mode == SFmode && out_n == C \
    && in_mode == SFmode && in_n == C)
 
@@ -29821,6 +29865,30 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
             return ARM_FIND_VRINT_VARIANT (vrintz);
           case BUILT_IN_ROUNDF:
             return ARM_FIND_VRINT_VARIANT (vrinta);
+#undef ARM_CHECK_BUILTIN_MODE
+#define ARM_CHECK_BUILTIN_MODE(C, N) \
+  (out_mode == N##Imode && out_n == C \
+   && in_mode == N##Imode && in_n == C)
+          case BUILT_IN_BSWAP16:
+            if (ARM_CHECK_BUILTIN_MODE (4, H))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
+            else if (ARM_CHECK_BUILTIN_MODE (8, H))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
+            else
+              return NULL_TREE;
+          case BUILT_IN_BSWAP32:
+            if (ARM_CHECK_BUILTIN_MODE (2, S))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
+            else if (ARM_CHECK_BUILTIN_MODE (4, S))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
+            else
+              return NULL_TREE;
+          case BUILT_IN_BSWAP64:
+            if (ARM_CHECK_BUILTIN_MODE (2, D))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
+            else
+              return NULL_TREE;
+
           default:
             return NULL_TREE;
         }
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 85215b5..7ca4ae1 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -88,6 +88,7 @@ VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di),
 VAR2 (UNOP, vcnt, v8qi, v16qi),
 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index aebab93..892e5f3 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -116,6 +116,9 @@
 ;; Vector modes including 64-bit integer elements, but no floats.
 (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
 
+;; Vector modes for H, S and D types.
+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
+
 ;; Vector modes for float->int conversions.
 (define_mode_iterator VCVTF [V2SF V4SF])
 
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 9ac393b..d0a60f8 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2547,6 +2547,14 @@
   [(set_attr "type" "neon_qabs<q>")]
 )
 
+(define_insn "neon_bswap<mode>"
+  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
+  "TARGET_NEON"
+  "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_rev<q>")]
+)
+
 (define_expand "neon_vneg<mode>"
   [(match_operand:VDQW 0 "s_register_operand" "")
    (match_operand:VDQW 1 "s_register_operand" "")
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index ef370fe..7e1ec71 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3280,7 +3280,8 @@ proc check_effective_target_vect_bswap { } {
 	verbose "check_effective_target_vect_bswap: using cached result" 2
     } else {
 	set et_vect_bswap_saved 0
-	if { [istarget aarch64*-*-*] } {
+	if { [istarget aarch64*-*-*]
+	     || [istarget arm*-*-*] } {
 	   set et_vect_bswap_saved 1
 	}
     }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]