This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH: PR target/40697] Use shifts to extract most or least significant bits


This patch is to implement optimization suggested by

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40416

Gcc transforms the expression extracting most or least significant bits into
an AND with a mask. Thumb1 ISA doesn't contain an AND with immediate
instruction, so first it needs to load a constant then it AND two registers to
get the result. Depending on the constant, the code sequence needs two or more
instructions and may contain a memory access and a constant in memory.

Instead two shifts can be used to do the same thing. It is always better or
same as AND with immediate in terms of both size and speed.


ChangeLog:
2009-07-14  Wei Guozhi  <carrot@google.com>

        PR target/40416
        * config/arm/arm.c (all_ones_in_most_significant_bits): New function.
        (all_ones_in_least_significant_bits): New function.
        * config/arm/arm-protos.h (all_ones_in_most_significant_bits):
		Declare it.
		(all_ones_in_least_significant_bits): Declare it.
        * config/arm/arm.md: Add peephole2 rules to do the optimization.
		
2009-07-14  Wei Guozhi  <carrot@google.com>

        PR target/40416
        * gcc.target/arm/pr40697-1.c: New testcase.
        * gcc.target/arm/pr40697-2.c: New testcase.
	
	
Index: arm.c
===================================================================
--- arm.c       (revision 149523)
+++ arm.c       (working copy)
@@ -20038,4 +20038,45 @@ arm_frame_pointer_required (void)
           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
 }

+/* If all 1s in word i are in most significant bits returns the number of
+   1s. Otherwise returns 0.  */
+int
+all_ones_in_most_significant_bits (HOST_WIDE_INT i)
+{
+  int ones = 32;
+  unsigned HOST_WIDE_INT mask = 0xffffffff;
+  i &= mask;
+
+  while (mask)
+  {
+    if (i == mask)
+      return ones;
+    mask <<= 1;
+    mask &= (unsigned HOST_WIDE_INT) 0xffffffff;
+    ones--;
+  }
+
+  return 0;
+}
+
+/* If all 1s in word i are in least significant bits returns the number of
+   1s. Otherwise returns 0.  */
+int
+all_ones_in_least_significant_bits (HOST_WIDE_INT i)
+{
+  int ones = 32;
+  unsigned HOST_WIDE_INT mask = 0xffffffff;
+  i &= mask;
+
+  while (mask)
+  {
+    if (i == mask)
+      return ones;
+    mask >>= 1;
+    ones--;
+  }
+
+  return 0;
+}
+
 #include "gt-arm.h"


Index: arm-protos.h
===================================================================
--- arm-protos.h        (revision 149523)
+++ arm-protos.h        (working copy)
@@ -44,6 +44,8 @@ extern void arm_output_fn_unwind (FILE *


 #ifdef RTX_CODE
+extern int all_ones_in_most_significant_bits (HOST_WIDE_INT);
+extern int all_ones_in_least_significant_bits (HOST_WIDE_INT);
 extern bool arm_vector_mode_supported_p (enum machine_mode);
 extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);


Index: arm.md
===================================================================
--- arm.md      (revision 149523)
+++ arm.md      (working copy)
@@ -1955,6 +1955,46 @@
   "
 )

+;; Thumb1 doesn't have the and with an immediate instruction.
+;; If all 1s in the immediate are in one end of the word, we can
+;; simplify it as two shifts.
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arm_general_register_operand" "")
+        (and:SI (match_operand:SI 3 "arm_general_register_operand" "")
+                (match_dup 0)))]
+  "TARGET_THUMB1
+  && all_ones_in_most_significant_bits (INTVAL (operands[1])) > 0"
+  ; To be conservative we must keep the insn that set operand[0] because
+  ; we don't know if it will be used by other insns. Usually it can be
+  ; removed by a later dead code elimination pass.
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (lshiftrt:SI (match_dup 3) (match_dup 4)))
+   (set (match_dup 2) (ashift:SI (match_dup 2) (match_dup 4)))]
+  "{
+     int ones = all_ones_in_most_significant_bits (INTVAL (operands[1]));
+     operands[4] = GEN_INT (32 - ones);
+  }"
+)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arm_general_register_operand" "")
+        (and:SI (match_operand:SI 3 "arm_general_register_operand" "")
+                (match_dup 0)))]
+  "TARGET_THUMB1
+  && all_ones_in_least_significant_bits (INTVAL (operands[1])) > 0"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (ashift:SI (match_dup 3) (match_dup 4)))
+   (set (match_dup 2) (lshiftrt:SI (match_dup 2) (match_dup 4)))]
+  "{
+     int ones = all_ones_in_least_significant_bits (INTVAL (operands[1]));
+     operands[4] = GEN_INT (32 - ones);
+  }"
+)
+
 ; ??? Check split length for Thumb-2
 (define_insn_and_split "*arm_andsi3_insn"
   [(set (match_operand:SI         0 "s_register_operand" "=r,r,r")


Index: pr40697-1.c
===================================================================
--- pr40697-1.c (revision 0)
+++ pr40697-1.c (revision 0)
@@ -0,0 +1,7 @@
+/* { dg-options "-mthumb -Os -march=armv5te" }  */
+/* { dg-final { scan-assembler-not "ldr" } } */
+
+unsigned get_least_bits(unsigned value)
+{
+  return value << 9 >> 9;
+}


Index: pr40697-2.c
===================================================================
--- pr40697-2.c (revision 0)
+++ pr40697-2.c (revision 0)
@@ -0,0 +1,7 @@
+/* { dg-options "-mthumb -Os -march=armv5te" }  */
+/* { dg-final { scan-assembler-not "ldr" } } */
+
+unsigned get_most_bits(unsigned value)
+{
+  return value >> 9 << 9;
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]