This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch,AVR] ad PR50931: Implement mulpsi3


This adds missing 24-Bit multiplication.

Besides implementing __mulpsi3 in libgcc the patch adds some minor tweaks to
multiply with small numbers and to represent the reduced register footprint of
asm implementations.

With this patch PR50931 is complete from the target side.
(But there is still a segfault in the frontends, see PR51527)

Ok for trunk?

Johann

libgcc/
	PR target/50931
	* config/avr/t-avr (LIB1ASMSRC): Add _mulpsi3, _mulsqipsi3.
	* config/avr/lib1funcs.S (__mulpsi3, __mulsqipsi3): New functions.
gcc/
	PR target/50931
	* config/avr/avr.md (mulpsi3): New expander.
	(*umulqihipsi3, *umulhiqipsi3): New insns.
	(*mulsqipsi3.libgcc, *mulpsi3.libgcc): New insns.
	(mulsqipsi3, *mulpsi3): New insn-and-splits.
	(ashlpsi3): Turn to expander.  Move insn code to...
	(*ashlpsi3): ...this new insn.
testsuite/
	PR target/50931
	* gcc.target/avr/torture/int24-mul.c: New testcase.

Index: gcc/config/avr/avr.md
===================================================================
--- gcc/config/avr/avr.md	(revision 182277)
+++ gcc/config/avr/avr.md	(working copy)
@@ -2113,7 +2113,7 @@ (define_insn "*mulhi3_call"
   [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])
 
-;; To support widening multiplicatioon with constant we postpone
+;; To support widening multiplication with constant we postpone
 ;; expanding to the implicit library call until post combine and
 ;; prior to register allocation.  Clobber all hard registers that
 ;; might be used by the (widening) multiply until it is split and
@@ -2575,6 +2575,132 @@ (define_insn "*udivmodhi4_call"
    (set_attr "cc" "clobber")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; 24-bit multiply
+
+;; To support widening multiplication with constant we postpone
+;; expanding to the implicit library call until post combine and
+;; prior to register allocation.  Clobber all hard registers that
+;; might be used by the (widening) multiply until it is split and
+;; it's final register footprint is worked out.
+
+(define_expand "mulpsi3"
+  [(parallel [(set (match_operand:PSI 0 "register_operand" "")
+                   (mult:PSI (match_operand:PSI 1 "register_operand" "")
+                             (match_operand:PSI 2 "nonmemory_operand" "")))
+              (clobber (reg:HI 26))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  {
+    if (s8_operand (operands[2], PSImode))
+      {
+        rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode));
+        emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
+        DONE;
+      }
+  })
+
+(define_insn "*umulqihipsi3"
+  [(set (match_operand:PSI 0 "register_operand"                         "=&r")
+        (mult:PSI (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))
+                  (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))))]
+  "AVR_HAVE_MUL"
+  "mul %1,%A2
+	movw %A0,r0
+	mul %1,%B2
+	clr %C0
+	add %B0,r0
+	adc %C0,r1
+	clr __zero_reg__"
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*umulhiqipsi3"
+  [(set (match_operand:PSI 0 "register_operand"                         "=&r")
+        (mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))
+                  (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))))]
+  "AVR_HAVE_MUL"
+  "mul %1,%A2
+	movw %A0,r0
+	mul %1,%B2
+	add %B0,r0
+	mov %C0,r1
+	clr __zero_reg__
+	adc %C0,__zero_reg__"
+  [(set_attr "length" "7")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "mulsqipsi3"
+  [(set (match_operand:PSI 0 "pseudo_register_operand"                          "=r")
+        (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r"))
+                  (match_operand:PSI 2 "pseudo_register_or_const_int_operand"    "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:QI 25)
+        (match_dup 1))
+   (set (reg:PSI 22)
+        (match_dup 2))
+   (set (reg:PSI 18)
+        (mult:PSI (sign_extend:PSI (reg:QI 25))
+                  (reg:PSI 22)))
+   (set (match_dup 0)
+        (reg:PSI 18))])
+
+(define_insn_and_split "*mulpsi3"
+  [(set (match_operand:PSI 0 "pseudo_register_operand"                       "=r")
+        (mult:PSI (match_operand:PSI 1 "pseudo_register_operand"              "r")
+                  (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn")))
+   (clobber (reg:HI 26))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:PSI 18)
+        (match_dup 1))
+   (set (reg:PSI 22) 
+        (match_dup 2))
+   (parallel [(set (reg:PSI 22)
+                   (mult:PSI (reg:PSI 22)
+                             (reg:PSI 18)))
+              (clobber (reg:QI 21))
+              (clobber (reg:QI 25))
+              (clobber (reg:HI 26))])
+   (set (match_dup 0)
+        (reg:PSI 22))]
+  {
+    if (s8_operand (operands[2], PSImode))
+      {
+        rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode));
+        emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1]));
+        DONE;
+      }
+  })
+
+(define_insn "*mulsqipsi3.libgcc"
+  [(set (reg:PSI 18)
+        (mult:PSI (sign_extend:PSI (reg:QI 25))
+                  (reg:PSI 22)))]
+  "AVR_HAVE_MUL"
+  "%~call __mulsqipsi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mulpsi3.libgcc"
+  [(set (reg:PSI 22)
+        (mult:PSI (reg:PSI 22)
+                  (reg:PSI 18)))
+   (clobber (reg:QI 21))
+   (clobber (reg:QI 25))
+   (clobber (reg:HI 26))]
+  "AVR_HAVE_MUL"
+  "%~call __mulpsi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; 24-bit signed/unsigned division and modulo.
 ;; Notice that the libgcc implementation return the quotient in R22
 ;; and the remainder in R18 whereas the 32-bit [u]divmodsi4
@@ -3363,7 +3489,34 @@ (define_insn "*ashlsi3_const"
    (set_attr "adjust_len" "ashlsi")
    (set_attr "cc" "none,set_n,clobber,clobber")])
 
-(define_insn "ashlpsi3"
+(define_expand "ashlpsi3"
+  [(parallel [(set (match_operand:PSI 0 "register_operand"             "")
+                   (ashift:PSI (match_operand:PSI 1 "register_operand" "")
+                               (match_operand:QI 2 "nonmemory_operand" "")))
+              (clobber (scratch:QI))])]
+  ""
+  {
+    if (AVR_HAVE_MUL
+        && CONST_INT_P (operands[2]))
+      {
+        if (IN_RANGE (INTVAL (operands[2]), 3, 6))
+          {
+            rtx xoffset = force_reg (QImode, gen_int_mode (1 << INTVAL (operands[2]), QImode));
+            emit_insn (gen_mulsqipsi3 (operands[0], xoffset, operands[1])); 
+            DONE;
+          }
+        else if (optimize_insn_for_speed_p ()
+                 && INTVAL (operands[2]) != 16
+                 && IN_RANGE (INTVAL (operands[2]), 9, 22))
+          {
+            rtx xoffset = force_reg (PSImode, gen_int_mode (1 << INTVAL (operands[2]), PSImode));
+            emit_insn (gen_mulpsi3 (operands[0], operands[1], xoffset)); 
+            DONE;
+          }
+      }
+  })
+
+(define_insn "*ashlpsi3"
   [(set (match_operand:PSI 0 "register_operand"             "=r,r,r,r")
         (ashift:PSI (match_operand:PSI 1 "register_operand"  "0,0,r,0")
                     (match_operand:QI 2 "nonmemory_operand"  "r,P,O,n")))
Index: libgcc/config/avr/lib1funcs.S
===================================================================
--- libgcc/config/avr/lib1funcs.S	(revision 182277)
+++ libgcc/config/avr/lib1funcs.S	(working copy)
@@ -466,6 +466,153 @@ ENDF __mulsi3
 #endif /* __AVR_HAVE_MUL__ */
 
 /*******************************************************
+       Multiplication 24 x 24
+*******************************************************/
+
+#if defined (L_mulpsi3)
+
+;; A[0..2]: In: Multiplicand; Out: Product
+#define A0  22
+#define A1  A0+1
+#define A2  A0+2
+
+;; B[0..2]: In: Multiplier
+#define B0  18
+#define B1  B0+1
+#define B2  B0+2
+
+#if defined (__AVR_HAVE_MUL__)
+
+;; C[0..2]: Expand Result
+#define C0  22
+#define C1  C0+1
+#define C2  C0+2
+
+;; R24:R22 *= R20:R18
+;; Clobbers: r21, r25, r26, r27, __tmp_reg__
+
+#define AA0 26
+#define AA2 21
+
+DEFUN __mulpsi3
+    wmov    AA0, A0
+    mov     AA2, A2
+    XCALL   __umulhisi3
+    mul     AA2, B0     $  add  C2, r0
+    mul     AA0, B2     $  add  C2, r0
+    clr     __zero_reg__
+    ret
+ENDF __mulpsi3
+
+#undef AA2
+#undef AA0
+
+#undef C2
+#undef C1
+#undef C0
+
+#else /* !HAVE_MUL */
+
+;; C[0..2]: Expand Result
+#define C0  0
+#define C1  C0+1
+#define C2  21
+
+;; R24:R22 *= R20:R18
+;; Clobbers: __tmp_reg__, R18, R19, R20, R21
+
+DEFUN __mulpsi3
+
+    ;; C[] = 0
+    clr     __tmp_reg__
+    clr     C2
+    
+0:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
+    LSR  B2     $  ror  B1     $  ror  B0
+    
+    ;; If the N-th Bit of B[] was set...
+    brcc    1f
+    
+    ;; ...then add A[] * 2^N to the Result C[]
+    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
+    
+1:  ;; Multiply A[] by 2
+    LSL  A0     $  rol  A1     $  rol  A2
+    
+    ;; Loop until B[] is 0
+    subi B0,0   $  sbci B1,0   $  sbci B2,0
+    brne    0b
+    
+    ;; Copy C[] to the return Register A[]
+    wmov    A0, C0
+    mov     A2, C2
+
+    clr     __zero_reg__
+    ret
+ENDF __mulpsi3
+
+#undef C2
+#undef C1
+#undef C0
+
+#endif /* HAVE_MUL */
+
+#undef B2
+#undef B1
+#undef B0
+
+#undef A2
+#undef A1
+#undef A0
+
+#endif /* L_mulpsi3 */
+
+#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
+
+;; A[0..2]: In: Multiplicand
+#define A0  22
+#define A1  A0+1
+#define A2  A0+2
+
+;; BB: In: Multiplier
+#define BB  25
+
+;; C[0..2]: Result
+#define C0  18
+#define C1  C0+1
+#define C2  C0+2
+
+;; C[] = A[] * sign_extend (BB)
+DEFUN __mulsqipsi3
+    mul     A0, BB
+    movw    C0, r0
+    mul     A2, BB
+    mov     C2, r0
+    mul     A1, BB
+    add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    sbrs    BB, 7
+    ret
+    ;; One-extend BB
+    sub     C1, A0
+    sbc     C2, A1
+    ret
+ENDF __mulsqipsi3
+
+#undef C2
+#undef C1
+#undef C0
+
+#undef BB
+
+#undef A2
+#undef A1
+#undef A0
+
+#endif /* L_mulsqipsi3  &&  HAVE_MUL */
+
+/*******************************************************
        Multiplication 64 x 64
 *******************************************************/
 
Index: libgcc/config/avr/t-avr
===================================================================
--- libgcc/config/avr/t-avr	(revision 182277)
+++ libgcc/config/avr/t-avr	(working copy)
@@ -2,6 +2,7 @@ LIB1ASMSRC = avr/lib1funcs.S
 LIB1ASMFUNCS = \
 	_mulqi3 \
 	_mulhi3 \
+	_mulpsi3 _mulsqipsi3 \
 	_mulhisi3 \
 	_umulhisi3 \
 	_usmulhisi3 \
Index: gcc/testsuite/gcc.target/avr/torture/int24-mul.c
===================================================================
--- gcc/testsuite/gcc.target/avr/torture/int24-mul.c	(revision 0)
+++ gcc/testsuite/gcc.target/avr/torture/int24-mul.c	(revision 0)
@@ -0,0 +1,86 @@
+/* { dg-do run } */
+/* { dg-options "-w" } */
+
+#include <stdlib.h>
+
+const __pgm __int24 vals[] =
+  {
+    0, 1, 2, 3, -1, -2, -3, 0xff, 0x100, 0x101,
+    0xffL * 0xff, 0xfffL * 0xfff, 0x101010L, 0xaaaaaaL
+  };
+
+void test_u (void)
+{
+  unsigned int i;
+  unsigned long la, lb, lc;
+  __uint24 a, b, c;
+
+  int S = sizeof (vals) / sizeof (*vals);
+
+  for (i = 0; i < 500; i++)
+    {
+      if (i < S*S)
+        {
+          a = vals[i / S];
+          b = vals[i % S];
+        }
+      else
+        {
+          if (i & 1)
+            a += 0x7654321L;
+          else
+            b += 0x5fe453L;
+        }
+
+      c = a * b;
+
+      la = a;
+      lb = b;
+      lc = 0xffffff & (la * lb);
+      
+      if (c != lc)
+        abort();
+    }
+}
+
+#define TEST_N_U(A1,A2,B)                       \
+  do {                                          \
+    if ((0xffffff & (A1*B)) != A2*B)            \
+      abort();                                  \
+  } while (0)
+
+void test_nu (void)
+{
+  unsigned long la;
+  unsigned int i;
+  int S = sizeof (vals) / sizeof (*vals);
+  __uint24 a;
+  
+  for (i = 0; i < 500; i++)
+    {
+      a = i < S
+        ? vals[i % S]
+        : a + 0x7654321;
+
+      la = a;
+
+      TEST_N_U (la, a, 2);
+      TEST_N_U (la, a, 3);
+      TEST_N_U (la, a, 4);
+      TEST_N_U (la, a, 5);
+      TEST_N_U (la, a, 15);
+      TEST_N_U (la, a, 16);
+      TEST_N_U (la, a, 128);
+      TEST_N_U (la, a, 0x1000);
+    }
+}
+     
+int main (void)
+{
+  test_u();
+  test_nu();
+  
+  exit(0);
+    
+  return 0;
+}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]