[gcc r13-3179] pru: Optimize DI shifts
Dimitar Dimitrov
dimitar@gcc.gnu.org
Sun Oct 9 11:39:51 GMT 2022
https://gcc.gnu.org/g:73137f365a01327ae245fa0a9f0b127cb06e0cb3
commit r13-3179-g73137f365a01327ae245fa0a9f0b127cb06e0cb3
Author: Dimitar Dimitrov <dimitar@dinux.eu>
Date: Thu Sep 22 23:08:43 2022 +0300
pru: Optimize DI shifts
If the number of shift positions is a constant, then the DI shift
operation is expanded to a sequence of 2 to 4 machine instructions.
That is more efficient than the default action to call libgcc.
gcc/ChangeLog:
* config/pru/pru.md (lshrdi3): New expand pattern.
(ashldi3): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/pru/ashiftdi-1.c: New test.
* gcc.target/pru/lshiftrtdi-1.c: New test.
Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
Diff:
---
gcc/config/pru/pru.md | 196 ++++++++++++++++++++++++++++
gcc/testsuite/gcc.target/pru/ashiftdi-1.c | 53 ++++++++
gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c | 53 ++++++++
3 files changed, 302 insertions(+)
diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index 144cd35d809..53ffff07708 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -703,6 +703,202 @@
[(set_attr "type" "alu")
(set_attr "length" "12")])
+
+; 64-bit LSHIFTRT with a constant shift count can be expanded into
+; more efficient code sequence than a variable register shift.
+;
+; 1. For shift >= 32:
+; dst_lo = (src_hi >> (shift - 32))
+; dst_hi = 0
+;
+; 2. For shift==1 there is no need for a temporary:
+; dst_lo = (src_lo >> 1)
+; if (src_hi & 1)
+; dst_lo |= (1 << 31)
+; dst_hi = (src_hi >> 1)
+;
+; 3. For shift < 32:
+; dst_lo = (src_lo >> shift)
+; tmp = (src_hi << (32 - shift)
+; dst_lo |= tmp
+; dst_hi = (src_hi >> shift)
+;
+; 4. For shift in a register:
+; Fall back to calling libgcc.
+(define_expand "lshrdi3"
+ [(set (match_operand:DI 0 "register_operand")
+ (lshiftrt:DI
+ (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))]
+ ""
+{
+ gcc_assert (CONST_INT_P (operands[2]));
+
+ const int nshifts = INTVAL (operands[2]);
+ rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+ rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+ rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+ rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+ if (nshifts >= 32)
+ {
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_LSHIFTRT (SImode,
+ src_hi,
+ GEN_INT (nshifts - 32))));
+ emit_insn (gen_rtx_SET (dst_hi, const0_rtx));
+ DONE;
+ }
+
+ gcc_assert (can_create_pseudo_p ());
+
+ /* The expansions which follow are safe only if DST_LO and SRC_HI
+ do not overlap. If they do, then fix by using a temporary register.
+ Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI
+ is set, SRC_LO is no longer live. */
+ if (reg_overlap_mentioned_p (dst_lo, src_hi))
+ {
+ rtx new_src_hi = gen_reg_rtx (SImode);
+
+ emit_move_insn (new_src_hi, src_hi);
+ src_hi = new_src_hi;
+ }
+
+ if (nshifts == 1)
+ {
+ rtx_code_label *skip_hiset_label;
+ rtx j;
+
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx)));
+
+ /* The code generated by `genemit' would create a LABEL_REF. */
+ skip_hiset_label = gen_label_rtx ();
+ j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
+ SImode,
+ src_hi,
+ GEN_INT (0),
+ skip_hiset_label));
+ JUMP_LABEL (j) = skip_hiset_label;
+ LABEL_NUSES (skip_hiset_label)++;
+
+ emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31)));
+ emit_label (skip_hiset_label);
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx)));
+ DONE;
+ }
+
+ if (nshifts < 32)
+ {
+ rtx tmpval = gen_reg_rtx (SImode);
+
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_LSHIFTRT (SImode,
+ src_lo,
+ GEN_INT (nshifts))));
+ emit_insn (gen_rtx_SET (tmpval,
+ gen_rtx_ASHIFT (SImode,
+ src_hi,
+ GEN_INT (32 - nshifts))));
+ emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval));
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_LSHIFTRT (SImode,
+ src_hi,
+ GEN_INT (nshifts))));
+ DONE;
+ }
+ gcc_unreachable ();
+})
+
+; 64-bit ASHIFT with a constant shift count can be expanded into
+; more efficient code sequence than the libgcc call required by
+; a variable shift in a register.
+
+(define_expand "ashldi3"
+ [(set (match_operand:DI 0 "register_operand")
+ (ashift:DI
+ (match_operand:DI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))]
+ ""
+{
+ gcc_assert (CONST_INT_P (operands[2]));
+
+ const int nshifts = INTVAL (operands[2]);
+ rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+ rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+ rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+ rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+ if (nshifts >= 32)
+ {
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_ASHIFT (SImode,
+ src_lo,
+ GEN_INT (nshifts - 32))));
+ emit_insn (gen_rtx_SET (dst_lo, const0_rtx));
+ DONE;
+ }
+
+ gcc_assert (can_create_pseudo_p ());
+
+ /* The expansions which follow are safe only if DST_HI and SRC_LO
+ do not overlap. If they do, then fix by using a temporary register.
+ Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO
+ is set, SRC_HI is no longer live. */
+ if (reg_overlap_mentioned_p (dst_hi, src_lo))
+ {
+ rtx new_src_lo = gen_reg_rtx (SImode);
+
+ emit_move_insn (new_src_lo, src_lo);
+ src_lo = new_src_lo;
+ }
+
+ if (nshifts == 1)
+ {
+ rtx_code_label *skip_hiset_label;
+ rtx j;
+
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_ASHIFT (SImode, src_hi, const1_rtx)));
+
+ skip_hiset_label = gen_label_rtx ();
+ j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
+ SImode,
+ src_lo,
+ GEN_INT (31),
+ skip_hiset_label));
+ JUMP_LABEL (j) = skip_hiset_label;
+ LABEL_NUSES (skip_hiset_label)++;
+
+ emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0)));
+ emit_label (skip_hiset_label);
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_ASHIFT (SImode, src_lo, const1_rtx)));
+ DONE;
+ }
+
+ if (nshifts < 32)
+ {
+ rtx tmpval = gen_reg_rtx (SImode);
+
+ emit_insn (gen_rtx_SET (dst_hi,
+ gen_rtx_ASHIFT (SImode,
+ src_hi,
+ GEN_INT (nshifts))));
+ emit_insn (gen_rtx_SET (tmpval,
+ gen_rtx_LSHIFTRT (SImode,
+ src_lo,
+ GEN_INT (32 - nshifts))));
+ emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval));
+ emit_insn (gen_rtx_SET (dst_lo,
+ gen_rtx_ASHIFT (SImode,
+ src_lo,
+ GEN_INT (nshifts))));
+ DONE;
+ }
+ gcc_unreachable ();
+})
;; Include ALU patterns with zero-extension of operands. That's where
;; the real insns are defined.
diff --git a/gcc/testsuite/gcc.target/pru/ashiftdi-1.c b/gcc/testsuite/gcc.target/pru/ashiftdi-1.c
new file mode 100644
index 00000000000..516e5a86102
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/ashiftdi-1.c
@@ -0,0 +1,53 @@
+/* Functional test for DI left shift. */
+
+/* { dg-do run } */
+/* { dg-options "-pedantic-errors" } */
+
+#include <stddef.h>
+#include <stdint.h>
+
+extern void abort (void);
+
+uint64_t __attribute__((noinline)) ashift_1 (uint64_t a)
+{
+ return a << 1;
+}
+
+uint64_t __attribute__((noinline)) ashift_10 (uint64_t a)
+{
+ return a << 10;
+}
+
+uint64_t __attribute__((noinline)) ashift_32 (uint64_t a)
+{
+ return a << 32;
+}
+
+uint64_t __attribute__((noinline)) ashift_36 (uint64_t a)
+{
+ return a << 36;
+}
+
+int
+main (int argc, char** argv)
+{
+ if (ashift_1 (0xaaaa5555aaaa5555ull) != 0x5554aaab5554aaaaull)
+ abort();
+ if (ashift_10 (0xaaaa5555aaaa5555ull) != 0xa95556aaa9555400ull)
+ abort();
+ if (ashift_32 (0xaaaa5555aaaa5555ull) != 0xaaaa555500000000ull)
+ abort();
+ if (ashift_36 (0xaaaa5555aaaa5555ull) != 0xaaa5555000000000ull)
+ abort();
+
+ if (ashift_1 (0x1234567822334455ull) != 0x2468acf0446688aaull)
+ abort();
+ if (ashift_10 (0x1234567822334455ull) != 0xd159e088cd115400ull)
+ abort();
+ if (ashift_32 (0x1234567822334455ull) != 0x2233445500000000ull)
+ abort();
+ if (ashift_36 (0x1234567822334455ull) != 0x2334455000000000ull)
+ abort();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c b/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c
new file mode 100644
index 00000000000..7adae6ccc13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c
@@ -0,0 +1,53 @@
+/* Functional test for DI right shift. */
+
+/* { dg-do run } */
+/* { dg-options "-pedantic-errors" } */
+
+#include <stddef.h>
+#include <stdint.h>
+
+extern void abort (void);
+
+uint64_t __attribute__((noinline)) lshift_1 (uint64_t a)
+{
+ return a >> 1;
+}
+
+uint64_t __attribute__((noinline)) lshift_10 (uint64_t a)
+{
+ return a >> 10;
+}
+
+uint64_t __attribute__((noinline)) lshift_32 (uint64_t a)
+{
+ return a >> 32;
+}
+
+uint64_t __attribute__((noinline)) lshift_36 (uint64_t a)
+{
+ return a >> 36;
+}
+
+int
+main (int argc, char** argv)
+{
+ if (lshift_1 (0xaaaa5555aaaa5555ull) != 0x55552aaad5552aaaull)
+ abort();
+ if (lshift_10 (0xaaaa5555aaaa5555ull) != 0x002aaa95556aaa95ull)
+ abort();
+ if (lshift_32 (0xaaaa5555aaaa5555ull) != 0x00000000aaaa5555ull)
+ abort();
+ if (lshift_36 (0xaaaa5555aaaa5555ull) != 0x000000000aaaa555ull)
+ abort();
+
+ if (lshift_1 (0x1234567822334455ull) != 0x091a2b3c1119a22aull)
+ abort();
+ if (lshift_10 (0x1234567822334455ull) != 0x00048d159e088cd1ull)
+ abort();
+ if (lshift_32 (0x1234567822334455ull) != 0x0000000012345678ull)
+ abort();
+ if (lshift_36 (0x1234567822334455ull) != 0x0000000001234567ull)
+ abort();
+
+ return 0;
+}
More information about the Gcc-cvs
mailing list