[gcc r14-6495] ARC: Add *extvsi_n_0 define_insn_and_split for PR 110717.

Roger Sayle sayle@gcc.gnu.org
Wed Dec 13 13:37:42 GMT 2023


https://gcc.gnu.org/g:ff8d0ce17fb585a29a83349acbc67b2dd3556629

commit r14-6495-gff8d0ce17fb585a29a83349acbc67b2dd3556629
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Wed Dec 13 13:36:44 2023 +0000

    ARC: Add *extvsi_n_0 define_insn_and_split for PR 110717.
    
    This patch improves the code generated for bitfield sign extensions on
    ARC cpus without a barrel shifter.
    
    Compiling the following test case:
    
    int foo(int x) { return (x<<27)>>27; }
    
    with -O2 -mcpu=em, generates two loops:
    
    foo:    mov     lp_count,27
            lp      2f
            add     r0,r0,r0
            nop
    2:      # end single insn loop
            mov     lp_count,27
            lp      2f
            asr     r0,r0
            nop
    2:      # end single insn loop
            j_s     [blink]
    
    and the closely related test case:
    
    struct S { int a : 5; };
    int bar (struct S *p) { return p->a; }
    
    generates the slightly better:
    
    bar:    ldb_s   r0,[r0]
            mov_s   r2,0    ;3
            add3    r0,r2,r0
            sexb_s  r0,r0
            asr_s   r0,r0
            asr_s   r0,r0
            j_s.d   [blink]
            asr_s   r0,r0
    
    which uses 6 instructions to perform this particular sign extension.
    It turns out that sign extensions can always be implemented using at
    most three instructions on ARC (without a barrel shifter) using the
    idiom ((x&mask)^msb)-msb [as described in section "2-5 Sign Extension"
    of Henry Warren's book "Hacker's Delight"].  Using this, the sign
    extensions above on ARC's EM both become:
    
            bmsk_s  r0,r0,4
            xor     r0,r0,16
            sub     r0,r0,16
    
    which takes about 3 cycles, compared to the ~112 cycles for the loops
    in foo.
    
    2023-12-13  Roger Sayle  <roger@nextmovesoftware.com>
                Jeff Law  <jlaw@ventanamicro.com>
    
    gcc/ChangeLog
            * config/arc/arc.md (*extvsi_n_0): New define_insn_and_split to
            implement SImode sign extract using a AND, XOR and MINUS sequence.
    
    gcc/testsuite/ChangeLog
            * gcc.target/arc/extvsi-1.c: New test case.
            * gcc.target/arc/extvsi-2.c: Likewise.

Diff:
---
 gcc/config/arc/arc.md                   | 20 ++++++++++++++++++++
 gcc/testsuite/gcc.target/arc/extvsi-1.c | 15 +++++++++++++++
 gcc/testsuite/gcc.target/arc/extvsi-2.c | 12 ++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index bf9f88eff04..d980876eff8 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -6127,6 +6127,26 @@ archs4x, archs4xd"
   ""
   [(set_attr "length" "8")])
 
+(define_insn_and_split "*extvsi_n_0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extract:SI (match_operand:SI 1 "register_operand" "0")
+			 (match_operand:QI 2 "const_int_operand")
+			 (const_int 0)))]
+  "!TARGET_BARREL_SHIFTER
+   && IN_RANGE (INTVAL (operands[2]), 2,
+		(optimize_insn_for_size_p () ? 28 : 30))"
+  "#"
+  "&& 1"
+[(set (match_dup 0) (and:SI (match_dup 0) (match_dup 3)))
+ (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 4)))
+ (set (match_dup 0) (minus:SI (match_dup 0) (match_dup 4)))]
+{
+  int tmp = INTVAL (operands[2]);
+  operands[3] = GEN_INT (~(HOST_WIDE_INT_M1U << tmp));
+  operands[4] = GEN_INT (HOST_WIDE_INT_1U << (tmp - 1));
+}
+  [(set_attr "length" "14")])
+
 (define_insn_and_split "rotlsi3_cnt1"
   [(set (match_operand:SI 0 "dest_reg_operand"            "=r")
 	(rotate:SI (match_operand:SI 1 "register_operand" "r")
diff --git a/gcc/testsuite/gcc.target/arc/extvsi-1.c b/gcc/testsuite/gcc.target/arc/extvsi-1.c
new file mode 100644
index 00000000000..5ac6feafae3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/extvsi-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em" } */
+struct S { int a : 5; };
+
+int foo (struct S *p)
+{
+  return p->a;
+}
+
+/* { dg-final { scan-assembler "msk_s\\s+r0,r0,4" } } */
+/* { dg-final { scan-assembler "xor\\s+r0,r0,16" } } */
+/* { dg-final { scan-assembler "sub\\s+r0,r0,16" } } */
+/* { dg-final { scan-assembler-not "add3\\s+r0,r2,r0" } } */
+/* { dg-final { scan-assembler-not "sext_s\\s+r0,r0" } } */
+/* { dg-final { scan-assembler-not "asr_s\\s+r0,r0" } } */
diff --git a/gcc/testsuite/gcc.target/arc/extvsi-2.c b/gcc/testsuite/gcc.target/arc/extvsi-2.c
new file mode 100644
index 00000000000..953ea6a8b24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/extvsi-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=em" } */
+
+int foo(int x)
+{
+  return (x<<27)>>27;
+}
+
+/* { dg-final { scan-assembler "msk_s\\s+r0,r0,4" } } */
+/* { dg-final { scan-assembler "xor\\s+r0,r0,16" } } */
+/* { dg-final { scan-assembler "sub\\s+r0,r0,16" } } */
+/* { dg-final { scan-assembler-not "lp\\s+2f" } } */


More information about the Gcc-cvs mailing list