This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH: PR target/40697] Use shifts to extract most or least significant bits


Thank you, Paolo

On Tue, Jul 14, 2009 at 1:38 PM, Paolo Bonzini<bonzini@gnu.org> wrote:
>
>> +/* If all 1s in word i are in most significant bits returns the number of
>> + ? 1s. Otherwise returns 0. ?*/
>> +int
>> +all_ones_in_most_significant_bits (HOST_WIDE_INT i)
>
> return (i | (i - 1)) == ~0;
>
> 1111110000000000 |
> 1111101111111111 =
> 1111111111111111
>
>> +/* If all 1s in word i are in least significant bits returns the number
>> of
>> + ? 1s. Otherwise returns 0. ?*/
>> +int
>> +all_ones_in_least_significant_bits (HOST_WIDE_INT i)
>
> return (i & (i + 1)) == 0;
>
> 00000000001111111 &
> 00000000010000000 =
> 00000000000000000
>
These two functions need to return the number of 1s in the given word,
so I can compute the amount to shift later.

>> +&& ?all_ones_in_most_significant_bits (INTVAL (operands[1]))> ?0"
>
> No > 0, no two spaces before "all", one space to indent correctly.
done.
>
>> +&& ?all_ones_in_least_significant_bits (INTVAL (operands[1]))> ?0"
>
> No > 0, no two spaces before "all", one space to indent correctly.
done.
It's strange that I can see two spaces before "all" in your replied email,
but I can only see one in my email and source code.
>
> Does it help if you do the same optimization on QI and HI modes too?
I just tested this function and it can also be optimized

unsigned short least_bits_of_short(unsigned short value)
{
  return value & 0x07FF;
}

For QI mode the constant loading needs only 1 mov instruction.
It's not important to transform it or not.

>
>> Index: pr40697-1.c
>> ===================================================================
>> --- pr40697-1.c (revision 0)
>> +++ pr40697-1.c (revision 0)
>> @@ -0,0 +1,7 @@
>> +/* { dg-options "-mthumb -Os -march=armv5te" } ?*/
>> +/* { dg-final { scan-assembler-not "ldr" } } */
>> +
>> +unsigned get_least_bits(unsigned value)
>> +{
>> + ?return value<< ?9>> ?9;
>> +}
>
> Please do the same test also with "value & 0x007FFFFF"...
done.
>
>>
>> Index: pr40697-2.c
>> ===================================================================
>> --- pr40697-2.c (revision 0)
>> +++ pr40697-2.c (revision 0)
>> @@ -0,0 +1,7 @@
>> +/* { dg-options "-mthumb -Os -march=armv5te" } ?*/
>> +/* { dg-final { scan-assembler-not "ldr" } } */
>> +
>> +unsigned get_most_bits(unsigned value)
>> +{
>> + ?return value>> ?9<< ?9;
>> +}
>
> And with "value & 0x1FF". ?You can place each pair of tests in one file.
done. I guess you mean "value & 0xFFFFFE00" to get most significant bits.

>
> Paolo
>

Test:
This patch was applied to trunk GCC and tested on the arm emulator with newlib.
No new failure.

ChangeLog:
2009-07-14  Wei Guozhi  <carrot@google.com>

        PR target/40416
        * config/arm/arm.c (all_ones_in_most_significant_bits): New function.
        (all_ones_in_least_significant_bits): New function.
        * config/arm/arm-protos.h (all_ones_in_most_significant_bits):
		Declare it.
		(all_ones_in_least_significant_bits): Declare it.
        * config/arm/arm.md: Add peephole2 rules to do the optimization.
		
2009-07-14  Wei Guozhi  <carrot@google.com>

        PR target/40416
        * gcc.target/arm/pr40697-1.c: New testcase.
        * gcc.target/arm/pr40697-2.c: New testcase.
	
	
Index: arm.c
===================================================================
--- arm.c       (revision 149523)
+++ arm.c       (working copy)
@@ -20038,4 +20038,45 @@ arm_frame_pointer_required (void)
           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
 }

+/* If all 1s in word i are in most significant bits returns the number of
+   1s. Otherwise returns 0.  */
+int
+all_ones_in_most_significant_bits (HOST_WIDE_INT i)
+{
+  int ones = 32;
+  unsigned HOST_WIDE_INT mask = 0xffffffff;
+  i &= mask;
+
+  while (mask)
+  {
+    if (i == mask)
+      return ones;
+    mask <<= 1;
+    mask &= (unsigned HOST_WIDE_INT) 0xffffffff;
+    ones--;
+  }
+
+  return 0;
+}
+
+/* If all 1s in word i are in least significant bits returns the number of
+   1s. Otherwise returns 0.  */
+int
+all_ones_in_least_significant_bits (HOST_WIDE_INT i)
+{
+  int ones = 32;
+  unsigned HOST_WIDE_INT mask = 0xffffffff;
+  i &= mask;
+
+  while (mask)
+  {
+    if (i == mask)
+      return ones;
+    mask >>= 1;
+    ones--;
+  }
+
+  return 0;
+}
+
 #include "gt-arm.h"


Index: arm-protos.h
===================================================================
--- arm-protos.h        (revision 149523)
+++ arm-protos.h        (working copy)
@@ -44,6 +44,8 @@ extern void arm_output_fn_unwind (FILE *


 #ifdef RTX_CODE
+extern int all_ones_in_most_significant_bits (HOST_WIDE_INT);
+extern int all_ones_in_least_significant_bits (HOST_WIDE_INT);
 extern bool arm_vector_mode_supported_p (enum machine_mode);
 extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);


Index: arm.md
===================================================================
--- arm.md      (revision 149523)
+++ arm.md      (working copy)
@@ -1955,6 +1955,46 @@
   "
 )

+;; Thumb1 doesn't have the and with an immediate instruction.
+;; If all 1s in the immediate are in one end of the word, we can
+;; simplify it as two shifts.
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arm_general_register_operand" "")
+        (and:SI (match_operand:SI 3 "arm_general_register_operand" "")
+                (match_dup 0)))]
+  "TARGET_THUMB1
+  && all_ones_in_most_significant_bits (INTVAL (operands[1]))"
+  ; To be conservative we must keep the insn that set operand[0] because
+  ; we don't know if it will be used by other insns. Usually it can be
+  ; removed by a later dead code elimination pass.
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (lshiftrt:SI (match_dup 3) (match_dup 4)))
+   (set (match_dup 2) (ashift:SI (match_dup 2) (match_dup 4)))]
+  "{
+     int ones = all_ones_in_most_significant_bits (INTVAL (operands[1]));
+     operands[4] = GEN_INT (32 - ones);
+  }"
+)
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arm_general_register_operand" "")
+        (match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arm_general_register_operand" "")
+        (and:SI (match_operand:SI 3 "arm_general_register_operand" "")
+                (match_dup 0)))]
+  "TARGET_THUMB1
+  && all_ones_in_least_significant_bits (INTVAL (operands[1]))"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (ashift:SI (match_dup 3) (match_dup 4)))
+   (set (match_dup 2) (lshiftrt:SI (match_dup 2) (match_dup 4)))]
+  "{
+     int ones = all_ones_in_least_significant_bits (INTVAL (operands[1]));
+     operands[4] = GEN_INT (32 - ones);
+  }"
+)
+
 ; ??? Check split length for Thumb-2
 (define_insn_and_split "*arm_andsi3_insn"
   [(set (match_operand:SI         0 "s_register_operand" "=r,r,r")


Index: pr40697-1.c
===================================================================
--- pr40697-1.c (revision 0)
+++ pr40697-1.c (revision 0)
@@ -0,0 +1,12 @@
+/* { dg-options "-mthumb -Os -march=armv5te" }  */
+/* { dg-final { scan-assembler-not "ldr" } } */
+
+unsigned get_least_bits(unsigned value)
+{
+  return value << 9 >> 9;
+}
+
+unsigned get_least_bits_1(unsigned value)
+{
+  return value & 0x007FFFFF;
+}


Index: pr40697-2.c
===================================================================
--- pr40697-2.c (revision 0)
+++ pr40697-2.c (revision 0)
@@ -0,0 +1,12 @@
+/* { dg-options "-mthumb -Os -march=armv5te" }  */
+/* { dg-final { scan-assembler-not "ldr" } } */
+
+unsigned get_most_bits(unsigned value)
+{
+  return value >> 9 << 9;
+}
+
+unsigned get_most_bits_1(unsigned value)
+{
+  return value & 0xFFFFFE00;
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]