This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH: PR target/40697] Use shifts to extract most or least significant bits
Thank you, Paolo
On Tue, Jul 14, 2009 at 1:38 PM, Paolo Bonzini<bonzini@gnu.org> wrote:
>
>> +/* If all 1s in word i are in most significant bits returns the number of
>> + ? 1s. Otherwise returns 0. ?*/
>> +int
>> +all_ones_in_most_significant_bits (HOST_WIDE_INT i)
>
> return (i | (i - 1)) == ~0;
>
> 1111110000000000 |
> 1111101111111111 =
> 1111111111111111
>
>> +/* If all 1s in word i are in least significant bits returns the number
>> of
>> + ? 1s. Otherwise returns 0. ?*/
>> +int
>> +all_ones_in_least_significant_bits (HOST_WIDE_INT i)
>
> return (i & (i + 1)) == 0;
>
> 00000000001111111 &
> 00000000010000000 =
> 00000000000000000
>
These two functions need to return the number of 1s in the given word,
so I can compute the amount to shift later.
>> +&& ?all_ones_in_most_significant_bits (INTVAL (operands[1]))> ?0"
>
> No > 0, no two spaces before "all", one space to indent correctly.
done.
>
>> +&& ?all_ones_in_least_significant_bits (INTVAL (operands[1]))> ?0"
>
> No > 0, no two spaces before "all", one space to indent correctly.
done.
It's strange that I can see two spaces before "all" in your replied email,
but I can only see one in my email and source code.
>
> Does it help if you do the same optimization on QI and HI modes too?
I just tested this function and it can also be optimized
unsigned short least_bits_of_short(unsigned short value)
{
return value & 0x07FF;
}
For QI mode the constant loading needs only 1 mov instruction.
It's not important to transform it or not.
>
>> Index: pr40697-1.c
>> ===================================================================
>> --- pr40697-1.c (revision 0)
>> +++ pr40697-1.c (revision 0)
>> @@ -0,0 +1,7 @@
>> +/* { dg-options "-mthumb -Os -march=armv5te" } ?*/
>> +/* { dg-final { scan-assembler-not "ldr" } } */
>> +
>> +unsigned get_least_bits(unsigned value)
>> +{
>> + ?return value<< ?9>> ?9;
>> +}
>
> Please do the same test also with "value & 0x007FFFFF"...
done.
>
>>
>> Index: pr40697-2.c
>> ===================================================================
>> --- pr40697-2.c (revision 0)
>> +++ pr40697-2.c (revision 0)
>> @@ -0,0 +1,7 @@
>> +/* { dg-options "-mthumb -Os -march=armv5te" } ?*/
>> +/* { dg-final { scan-assembler-not "ldr" } } */
>> +
>> +unsigned get_most_bits(unsigned value)
>> +{
>> + ?return value>> ?9<< ?9;
>> +}
>
> And with "value & 0x1FF". ?You can place each pair of tests in one file.
done. I guess you mean "value & 0xFFFFFE00" to get most significant bits.
>
> Paolo
>
Test:
This patch was applied to trunk GCC and tested on the arm emulator with newlib.
No new failure.
ChangeLog:
2009-07-14 Wei Guozhi <carrot@google.com>
PR target/40416
* config/arm/arm.c (all_ones_in_most_significant_bits): New function.
(all_ones_in_least_significant_bits): New function.
* config/arm/arm-protos.h (all_ones_in_most_significant_bits):
Declare it.
(all_ones_in_least_significant_bits): Declare it.
* config/arm/arm.md: Add peephole2 rules to do the optimization.
2009-07-14 Wei Guozhi <carrot@google.com>
PR target/40416
* gcc.target/arm/pr40697-1.c: New testcase.
* gcc.target/arm/pr40697-2.c: New testcase.
Index: arm.c
===================================================================
--- arm.c (revision 149523)
+++ arm.c (working copy)
@@ -20038,4 +20038,45 @@ arm_frame_pointer_required (void)
|| (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
}
+/* If all 1s in word i are in most significant bits returns the number of
+ 1s. Otherwise returns 0. */
+int
+all_ones_in_most_significant_bits (HOST_WIDE_INT i)
+{
+ int ones = 32;
+ unsigned HOST_WIDE_INT mask = 0xffffffff;
+ i &= mask;
+
+ while (mask)
+ {
+ if (i == mask)
+ return ones;
+ mask <<= 1;
+ mask &= (unsigned HOST_WIDE_INT) 0xffffffff;
+ ones--;
+ }
+
+ return 0;
+}
+
+/* If all 1s in word i are in least significant bits returns the number of
+ 1s. Otherwise returns 0. */
+int
+all_ones_in_least_significant_bits (HOST_WIDE_INT i)
+{
+ int ones = 32;
+ unsigned HOST_WIDE_INT mask = 0xffffffff;
+ i &= mask;
+
+ while (mask)
+ {
+ if (i == mask)
+ return ones;
+ mask >>= 1;
+ ones--;
+ }
+
+ return 0;
+}
+
#include "gt-arm.h"
Index: arm-protos.h
===================================================================
--- arm-protos.h (revision 149523)
+++ arm-protos.h (working copy)
@@ -44,6 +44,8 @@ extern void arm_output_fn_unwind (FILE *
#ifdef RTX_CODE
+extern int all_ones_in_most_significant_bits (HOST_WIDE_INT);
+extern int all_ones_in_least_significant_bits (HOST_WIDE_INT);
extern bool arm_vector_mode_supported_p (enum machine_mode);
extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
extern int const_ok_for_arm (HOST_WIDE_INT);
Index: arm.md
===================================================================
--- arm.md (revision 149523)
+++ arm.md (working copy)
@@ -1955,6 +1955,46 @@
"
)
+;; Thumb1 doesn't have the and with an immediate instruction.
+;; If all 1s in the immediate are in one end of the word, we can
+;; simplify it as two shifts.
+(define_peephole2
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
+ (match_operand:SI 1 "const_int_operand" ""))
+ (set (match_operand:SI 2 "arm_general_register_operand" "")
+ (and:SI (match_operand:SI 3 "arm_general_register_operand" "")
+ (match_dup 0)))]
+ "TARGET_THUMB1
+ && all_ones_in_most_significant_bits (INTVAL (operands[1]))"
+ ; To be conservative we must keep the insn that set operand[0] because
+ ; we don't know if it will be used by other insns. Usually it can be
+ ; removed by a later dead code elimination pass.
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (lshiftrt:SI (match_dup 3) (match_dup 4)))
+ (set (match_dup 2) (ashift:SI (match_dup 2) (match_dup 4)))]
+ "{
+ int ones = all_ones_in_most_significant_bits (INTVAL (operands[1]));
+ operands[4] = GEN_INT (32 - ones);
+ }"
+)
+
+(define_peephole2
+ [(set (match_operand:SI 0 "arm_general_register_operand" "")
+ (match_operand:SI 1 "const_int_operand" ""))
+ (set (match_operand:SI 2 "arm_general_register_operand" "")
+ (and:SI (match_operand:SI 3 "arm_general_register_operand" "")
+ (match_dup 0)))]
+ "TARGET_THUMB1
+ && all_ones_in_least_significant_bits (INTVAL (operands[1]))"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (ashift:SI (match_dup 3) (match_dup 4)))
+ (set (match_dup 2) (lshiftrt:SI (match_dup 2) (match_dup 4)))]
+ "{
+ int ones = all_ones_in_least_significant_bits (INTVAL (operands[1]));
+ operands[4] = GEN_INT (32 - ones);
+ }"
+)
+
; ??? Check split length for Thumb-2
(define_insn_and_split "*arm_andsi3_insn"
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
Index: pr40697-1.c
===================================================================
--- pr40697-1.c (revision 0)
+++ pr40697-1.c (revision 0)
@@ -0,0 +1,12 @@
+/* { dg-options "-mthumb -Os -march=armv5te" } */
+/* { dg-final { scan-assembler-not "ldr" } } */
+
+unsigned get_least_bits(unsigned value)
+{
+ return value << 9 >> 9;
+}
+
+unsigned get_least_bits_1(unsigned value)
+{
+ return value & 0x007FFFFF;
+}
Index: pr40697-2.c
===================================================================
--- pr40697-2.c (revision 0)
+++ pr40697-2.c (revision 0)
@@ -0,0 +1,12 @@
+/* { dg-options "-mthumb -Os -march=armv5te" } */
+/* { dg-final { scan-assembler-not "ldr" } } */
+
+unsigned get_most_bits(unsigned value)
+{
+ return value >> 9 << 9;
+}
+
+unsigned get_most_bits_1(unsigned value)
+{
+ return value & 0xFFFFFE00;
+}