This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch ARM] Fix bswap patterns for ARM / Thumb and Thumb2.


Hi, 

This patch by Stephen Thomas is useful in fixing code generation for
bswap instructions on the ARM port. As a result, bswap64 reduces to 3
instructions rather than 91 for v7-a ! 

This supports use of the rev instructions on architectures that support
it (v6 onwards) and uses a smaller instruction sequence than what is
generated by default on v5te cores for ARM as well as Thumb1 when not
optimizing for size. It generates a libcall to bswapsi2 when optimizing
for size. 

This was tested cross on qemu for cpu=arm7tdmi and cpu=cortex-a9 for
arm-eabi with multilibs for Thumb1 and Thumb2 with no regressions. 

Ok to commit to trunk ?

cheers
Ramana



2010-01-21 Stephen Thomas <stephen.thomas@arm.com>

	* config/arm/arm.md (bswapsi2): Add support for bswapsi2.
	(arm_rev): New.
	(arm_legacy_rev): Likewise.
	(thumb_legacy_rev): Likewise.


2010-01-21 Stephen Thomas <stephen.thomas@arm.com>

	* testsuite/gcc.dg/optimize-bswap*.c: Add ARM target


Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 156205)
+++ gcc/config/arm/arm.md	(working copy)
@@ -11194,6 +11194,107 @@
    (set_attr "length" "4")]
 )
 
+(define_insn "arm_rev"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
+  "TARGET_EITHER && arm_arch6"
+  "rev\t%0, %1"
+  [(set (attr "length")
+        (if_then_else (eq_attr "is_thumb" "yes")
+		      (const_int 2)
+		      (const_int 4)))]
+)
+
+(define_expand "arm_legacy_rev"
+  [(set (match_operand:SI 2 "s_register_operand" "")
+	(xor:SI (rotatert:SI (match_operand:SI 1 "s_register_operand" "")
+			     (const_int 16))
+		(match_dup 1)))
+   (set (match_dup 2)
+	(lshiftrt:SI (match_dup 2)
+		     (const_int 8)))
+   (set (match_operand:SI 3 "s_register_operand" "")
+	(rotatert:SI (match_dup 1)
+		     (const_int 8)))
+   (set (match_dup 2)
+	(and:SI (match_dup 2)
+		(const_int -65281)))
+   (set (match_operand:SI 0 "s_register_operand" "")
+	(xor:SI (match_dup 3)
+		(match_dup 2)))]
+  "TARGET_32BIT"
+  ""
+)
+
+;; Reuse temporaries to keep register pressure down.
+(define_expand "thumb_legacy_rev"
+  [(set (match_operand:SI 2 "s_register_operand" "")
+     (ashift:SI (match_operand:SI 1 "s_register_operand" "")
+                (const_int 24)))
+   (set (match_operand:SI 3 "s_register_operand" "")
+     (lshiftrt:SI (match_dup 1)
+		  (const_int 24)))
+   (set (match_dup 3)
+     (ior:SI (match_dup 3)
+	     (match_dup 2)))
+   (set (match_operand:SI 4 "s_register_operand" "")
+     (const_int 16))
+   (set (match_operand:SI 5 "s_register_operand" "")
+     (rotatert:SI (match_dup 1)
+		  (match_dup 4)))
+   (set (match_dup 2)
+     (ashift:SI (match_dup 5)
+                (const_int 24)))
+   (set (match_dup 5)
+     (lshiftrt:SI (match_dup 5)
+		  (const_int 24)))
+   (set (match_dup 5)
+     (ior:SI (match_dup 5)
+	     (match_dup 2)))
+   (set (match_dup 5)
+     (rotatert:SI (match_dup 5)
+		  (match_dup 4)))
+   (set (match_operand:SI 0 "s_register_operand" "")
+     (ior:SI (match_dup 5)
+             (match_dup 3)))]
+  "TARGET_THUMB"
+  ""
+)
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  	(bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
+"TARGET_EITHER"
+"
+  if (!arm_arch6)
+    {
+      if (!optimize_size)
+	{
+	  rtx op2 = gen_reg_rtx (SImode);
+	  rtx op3 = gen_reg_rtx (SImode);
+
+	  if (TARGET_THUMB)
+	    {
+	      rtx op4 = gen_reg_rtx (SImode);
+	      rtx op5 = gen_reg_rtx (SImode);
+
+	      emit_insn (gen_thumb_legacy_rev (operands[0], operands[1],
+					       op2, op3, op4, op5));
+	    }
+	  else
+	    {
+	      emit_insn (gen_arm_legacy_rev (operands[0], operands[1],
+					     op2, op3));
+	    }
+
+	  DONE;
+	}
+      else
+	FAIL;
+    }
+  "
+)
+
 ;; Load the FPA co-processor patterns
 (include "fpa.md")
 ;; Load the Maverick co-processor patterns
Index: gcc/testsuite/gcc.dg/optimize-bswapdi-1.c
===================================================================
--- gcc/testsuite/gcc.dg/optimize-bswapdi-1.c	(revision 156205)
+++ gcc/testsuite/gcc.dg/optimize-bswapdi-1.c	(working copy)
@@ -1,4 +1,4 @@
-/* { dg-do compile { target alpha*-*-* ia64*-*-* x86_64-*-* s390x-*-* powerpc*-*-* rs6000-*-* } } */
+/* { dg-do compile { target arm*-*-* alpha*-*-* ia64*-*-* x86_64-*-* s390x-*-* powerpc*-*-* rs6000-*-* } } */
 /* { dg-require-effective-target stdint_types } */
 /* { dg-require-effective-target lp64 } */
 /* { dg-options "-O2 -fdump-tree-bswap" } */
Index: gcc/testsuite/gcc.dg/optimize-bswapsi-1.c
===================================================================
--- gcc/testsuite/gcc.dg/optimize-bswapsi-1.c	(revision 156205)
+++ gcc/testsuite/gcc.dg/optimize-bswapsi-1.c	(working copy)
@@ -1,4 +1,4 @@
-/* { dg-do compile { target alpha*-*-* i?86-*-* powerpc*-*-* rs6000-*-* x86_64-*-* s390*-*-* } } */
+/* { dg-do compile { target arm*-*-* alpha*-*-* i?86-*-* powerpc*-*-* rs6000-*-* x86_64-*-* s390*-*-* } } */
 /* { dg-require-effective-target stdint_types } */
 /* { dg-options "-O2 -fdump-tree-bswap" } */
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]