This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch, AVR]: Fix PR49687: Better widening mul 16=8*8


For widening multiply there is room for optimization, e.g.:

* (mult:HI (extend:HI(QI)) HI) is better than
  (extend:HI(QI)) and (mult:HI HI HI)

* For mult with power of 2 sometimes a mult is
  better than a shift left.

* Support MULSU instruction, i.e.
  (mult:HI (sign_extend:HI(QI))
           (zero_extend:HI(QI)))

* (mult:HI (HI small_const)) can be optimized.

Some insns are expanded in mulhi3 expander, others
are synthesized in combine and then split in split1.

This requires the function avr_gate_split1 to avoid that
IRA/reload recombines the insn.  This is needed to have
constants CSEd out, see discussion in
   http://gcc.gnu.org/ml/gcc/2011-07/msg00136.html

I prefer this over clobber regs (because no CSE) and over
combine-split (because it is not clear that combine will
come up with a spare reg and the mode of the spare reg HI
is suboptimal).

FYI, I attached output of a test case compiled with
-Os -dp for an ATmega8 with .0. the original output
without the patch.

Some cases like the qmul8_xy test case are not optimized
(combine flaw), and there are superfluous move instructions
because of early-clobber (IRA/reload flaw).

Tested without regressions.

Ok to commit?

Johann

	PR target/49687
	* config/avr/avr.md (mulhi3): Use register_or_s8_u8_operand for
	operand2 and expand appropriately if there is a CONST_INT in
	operand2.
	(*mulsu,*mulus): New insns.
	(mulsqihi3): New insn.
	(muluqihi3): New insn.
	(*muluqihi3.uconst): New insn_and_split.
	(*muluqihi3.sconst): New insn_and_split.
	(*mulsqihi3.sconst): New insn_and_split.
	(*mulsqihi3.uconst): New insn_and_split.
	(*ashifthi3.signx.const): New insn_and_split.
	(*ashifthi3.signx.const7): New insn_and_split.
	(*ashifthi3.zerox.const): New insn_and_split.
	* config/avr/avr.c (avr_rtx_costs): Report costs of above insns.
	(avr_gate_split1): New function.
	* config/avr/avr-protos.h (avr_gate_split1): New prototype.
	* config/avr/predicates.md (const_2_to_7_operand): New.
	(const_2_to_6_operand): New.
	(u8_operand): New.
	(s8_operand): New.
	(register_or_s8_u8_operand): New.

Index: config/avr/predicates.md
===================================================================
--- config/avr/predicates.md	(revision 176136)
+++ config/avr/predicates.md	(working copy)
@@ -73,6 +73,16 @@ (define_predicate "const_0_to_7_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
 
+;; Return 1 if OP is constant integer 2..7 for MODE.
+(define_predicate "const_2_to_7_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 2, 7)")))
+
+;; Return 1 if OP is constant integer 2..6 for MODE.
+(define_predicate "const_2_to_6_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 2, 6)")))
+
 ;; Returns true if OP is either the constant zero or a register.
 (define_predicate "reg_or_0_operand"
   (ior (match_operand 0 "register_operand")
@@ -156,3 +166,17 @@ (define_predicate "const_8_16_24_operand
   (and (match_code "const_int")
        (match_test "8 == INTVAL(op) || 16 == INTVAL(op) || 24 == INTVAL(op)")))
 
+;; Unsigned CONST_INT that fits in 8 bits, i.e. 0..255.
+(define_predicate "u8_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+;; Signed CONST_INT that fits in 8 bits, i.e. -128..127.
+(define_predicate "s8_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -128, 127)")))
+
+(define_predicate "register_or_s8_u8_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "u8_operand")
+       (match_operand 0 "s8_operand")))
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 176136)
+++ config/avr/avr.md	(working copy)
@@ -1017,19 +1017,245 @@ (define_insn "umulqihi3"
   [(set_attr "length" "3")
    (set_attr "cc" "clobber")])
 
+(define_insn "*mulsu"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (zero_extend:HI (match_operand:QI 2 "register_operand" "a"))))]
+  "AVR_HAVE_MUL"
+  "mulsu %1,%2
+	movw %0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mulus"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (sign_extend:HI (match_operand:QI 2 "register_operand" "a"))))]
+  "AVR_HAVE_MUL"
+  "mulsu %2,%1
+	movw %0,r0
+	clr __zero_reg__"
+  [(set_attr "length" "3")
+   (set_attr "cc" "clobber")])
+
+;******************************************************************************
+; mul HI: $1 = sign/zero-extend, $2 = small constant
+;******************************************************************************
+
+(define_insn_and_split "*muluqihi3.uconst"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:HI 2 "u8_operand"                       "M")))]
+  "AVR_HAVE_MUL
+   && avr_gate_split1()"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; *mul
+   (set (match_dup 0)
+        (mult:HI (zero_extend:HI (match_dup 1))
+                 (zero_extend:HI (match_dup 3))))]
+  {
+    operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]),
+                                               QImode));
+    operands[3] = gen_reg_rtx (QImode);
+  })
+
+(define_insn_and_split "*muluqihi3.sconst"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (match_operand:HI 2 "s8_operand"                       "n")))]
+  "AVR_HAVE_MUL
+   && avr_gate_split1()"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; *mulsu
+   (set (match_dup 0)
+        (mult:HI (sign_extend:HI (match_dup 3))
+                 (zero_extend:HI (match_dup 1))))]
+  {
+    operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]),
+                                               QImode));
+    operands[3] = gen_reg_rtx (QImode);
+  })
+
+(define_insn_and_split "*mulsqihi3.sconst"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
+                 (match_operand:HI 2 "s8_operand"                       "n")))]
+  "AVR_HAVE_MUL
+   && avr_gate_split1()"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; *muls
+   (set (match_dup 0)
+        (mult:HI (sign_extend:HI (match_dup 1))
+                 (sign_extend:HI (match_dup 3))))]
+  {
+    operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]),
+                                               QImode));
+    operands[3] = gen_reg_rtx (QImode);
+  })
+
+(define_insn_and_split "*mulsqihi3.uconst"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (match_operand:HI 2 "u8_operand"                       "M")))]
+  "AVR_HAVE_MUL
+   && avr_gate_split1()"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; *mulsu
+   (set (match_dup 0)
+        (mult:HI (sign_extend:HI (match_dup 1))
+                 (zero_extend:HI (match_dup 3))))]
+  {
+    operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]),
+                                               QImode));
+    operands[3] = gen_reg_rtx (QImode);
+  })
+
+
+;; The EXTEND of $1 only appears in combine, we don't see it in expand so that
+;; expand decides to use ASHIFT instead of MUL at that time.  Fix that.
+
+(define_insn_and_split "*ashifthi3.signx.const"
+  [(set (match_operand:HI 0 "register_operand"                           "=r")
+        (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
+                   (match_operand:HI 2 "const_2_to_6_operand"             "I")))]
+  "AVR_HAVE_MUL
+   && avr_gate_split1()"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; *muls
+   (set (match_dup 0)
+        (mult:HI (sign_extend:HI (match_dup 1))
+                 (sign_extend:HI (match_dup 3))))]
+  {
+    operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+    operands[3] = gen_reg_rtx (QImode);
+  })
+
+(define_insn_and_split "*ashifthi3.signx.const7"
+  [(set (match_operand:HI 0 "register_operand"                           "=r")
+        (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                   (const_int 7)))]
+  "AVR_HAVE_MUL
+   && avr_gate_split1()"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; *mulsu
+   (set (match_dup 0)
+        (mult:HI (sign_extend:HI (match_dup 1))
+                 (zero_extend:HI (match_dup 3))))]
+  {
+    operands[2] = GEN_INT (trunc_int_for_mode (1 << 7, QImode));
+    operands[3] = gen_reg_rtx (QImode);
+  })
+
+(define_insn_and_split "*ashifthi3.zerox.const"
+  [(set (match_operand:HI 0 "register_operand"                           "=r")
+        (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                   (match_operand:HI 2 "const_2_to_7_operand"             "I")))]
+  "AVR_HAVE_MUL
+   && avr_gate_split1()"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (match_dup 3)
+        (match_dup 2))
+   ; *mul
+   (set (match_dup 0)
+        (mult:HI (zero_extend:HI (match_dup 1))
+                 (zero_extend:HI (match_dup 3))))]
+  {
+    operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]),
+                                               QImode));
+    operands[3] = gen_reg_rtx (QImode);
+  })
+
+;******************************************************************************
+; mul HI: $1 = sign/zero-extend, $2 = reg
+;******************************************************************************
+
+(define_insn "mulsqihi3"
+  [(set (match_operand:HI 0 "register_operand"                        "=&r")
+        (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+                 (match_operand:HI 2 "register_operand"                 "a")))]
+  "AVR_HAVE_MUL"
+  "mulsu %1,%A2
+	movw  %0,r0
+	mul   %1,%B2
+	add   %B0,r0
+	clr   __zero_reg__"
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+(define_insn "muluqihi3"
+  [(set (match_operand:HI 0 "register_operand"                        "=&r")
+        (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:HI 2 "register_operand"                 "r")))]
+  "AVR_HAVE_MUL"
+  "mul  %1,%A2
+	movw %0,r0
+	mul  %1,%B2
+	add  %B0,r0
+	clr  __zero_reg__"
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
+;******************************************************************************
+
 (define_expand "mulhi3"
   [(set (match_operand:HI 0 "register_operand" "")
 	(mult:HI (match_operand:HI 1 "register_operand" "")
-		 (match_operand:HI 2 "register_operand" "")))]
+                 (match_operand:HI 2 "register_or_s8_u8_operand" "")))]
   ""
-  "
-{
-  if (!AVR_HAVE_MUL)
-    {
-      emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2]));
-      DONE;
-    }
-}")
+  {
+    if (!AVR_HAVE_MUL)
+      {
+        if (!register_operand (operands[2], HImode))
+          operands[2] = force_reg (HImode, operands[2]);
+
+        emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+
+    /* For small constants we can do better by extending them on the fly.
+       The constant can be loaded in one instruction and the widening
+       multiplication is shorter.  First try the unsigned variant because it
+       allows constraint "d" instead of "a" for the signed version.  */
+
+    if (u8_operand (operands[2], HImode))
+      {
+        rtx x = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode));
+        emit_insn (gen_muluqihi3 (operands[0],
+                                  force_reg (QImode, x), operands[1]));
+        DONE;
+      } 
+
+    if (s8_operand (operands[2], HImode))
+      {
+        rtx x = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode));
+        emit_insn (gen_mulsqihi3 (operands[0],
+                                  force_reg (QImode, x), operands[1]));
+        DONE;
+      } 
+
+    if (!register_operand (operands[2], HImode))
+      operands[2] = force_reg (HImode, operands[2]);
+  })
 
 (define_insn "*mulhi3_enh"
   [(set (match_operand:HI 0 "register_operand" "=&r")
Index: config/avr/avr-protos.h
===================================================================
--- config/avr/avr-protos.h	(revision 176136)
+++ config/avr/avr-protos.h	(working copy)
@@ -117,3 +117,4 @@ extern int class_max_nregs (enum reg_cla
 #ifdef REAL_VALUE_TYPE
 extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n);
 #endif
+extern bool avr_gate_split1(void);
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 176141)
+++ config/avr/avr.c	(working copy)
@@ -5466,7 +5466,42 @@ avr_rtx_costs (rtx x, int codearg, int o
 
 	case HImode:
 	  if (AVR_HAVE_MUL)
-	    *total = COSTS_N_INSNS (!speed ? 7 : 10);
+            {
+              rtx op0 = XEXP (x, 0);
+              rtx op1 = XEXP (x, 1);
+              enum rtx_code code0 = GET_CODE (op0);
+              enum rtx_code code1 = GET_CODE (op1);
+              bool ex0 = SIGN_EXTEND == code0 || ZERO_EXTEND == code0;
+              bool ex1 = SIGN_EXTEND == code1 || ZERO_EXTEND == code1;
+
+              if (ex0
+                  && (u8_operand (op1, HImode)
+                      || s8_operand (op1, HImode)))
+                {
+                  *total = COSTS_N_INSNS (!speed ? 4 : 6);
+                  return true;
+                }
+              if (ex0
+                  && register_operand (op1, HImode))
+                {
+                  *total = COSTS_N_INSNS (!speed ? 5 : 8);
+                  return true;
+                }
+              else if (ex0 || ex1)
+                {
+                  *total = COSTS_N_INSNS (!speed ? 3 : 5);
+                  return true;
+                }
+              else if (register_operand (op0, HImode)
+                       && (u8_operand (op1, HImode)
+                           || s8_operand (op1, HImode)))
+                {
+                  *total = COSTS_N_INSNS (!speed ? 6 : 9);
+                  return true;
+                }
+              else
+                *total = COSTS_N_INSNS (!speed ? 7 : 10);
+            }
 	  else if (!speed)
 	    *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
 	  else
@@ -5549,6 +5584,17 @@ avr_rtx_costs (rtx x, int codearg, int o
 	  break;
 
 	case HImode:
+          if (AVR_HAVE_MUL)
+            {
+              if (const_2_to_7_operand (XEXP (x, 1), HImode)
+                  && (SIGN_EXTEND == GET_CODE (XEXP (x, 0))
+                      || ZERO_EXTEND == GET_CODE (XEXP (x, 0))))
+                {
+                  *total = COSTS_N_INSNS (!speed ? 4 : 6);
+                  return true;
+                }
+            }
+          
 	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 5 : 41);
@@ -6881,4 +6927,29 @@ avr_expand_builtin (tree exp, rtx target
 }
 
 
+/* FIXME:  We compose some insns by means of insn combine
+      and split them in split1.  We don't want IRA/reload
+      to combine them to the original insns again because
+      that avoid some CSE optimizations if constants are
+      involved.  If IRA/reload combines, the recombined
+      insns get split again after reload, but then CSE
+      does not take place.
+         It appears that at present there is no other way
+      to take away the insn from IRA.  Notice that split1
+      runs unconditionally so that all our insns will get
+      split no matter of command line options.  */
+   
+#include "tree-pass.h"
+
+bool
+avr_gate_split1 (void)
+{
+  if (current_pass->static_pass_number
+      < pass_match_asm_constraints.pass.static_pass_number)
+    return true;
+
+  return false;
+}
+
+
 #include "gt-avr.h"
int mul16_16 (int x)
{
    return x * 16;
}

int mul16_17 (int x)
{
    return x * 17;
}

int mul16_126 (int x)
{
    return x * 126;
}

int mul8_55 (char x)
{
    return x * 55;
}

int mul8_126 (char x)
{
    return x * 126;
}

int mul8_155 (char x)
{
    return x * 155;
}

int mul8_2 (char x)
{
    return x * 2;
}

int mul8_4 (char x)
{
    return x * 4;
}

int umul8_4 (unsigned char x)
{
    return x * 4;
}

int mul8_3 (char x)
{
    return x * 3;
}

unsigned int umul8_3 (unsigned char x)
{
    return x * 3;
}


int y15;

int imul8_15 (int x)
{
    y15 = 15;
    return x * 15;
}

int y15;
int x15;

void cmul_15 (char x, char y)
{
    y15 = y * 15;
    x15 = x * 15;
}

void ymul8_15 (int x, int y)
{
    y15 = y * 15;
    x15 = x * 15;
}

void qmul8_xy (char c, int x, int y)
{
    y15 = y * c;
    x15 = x * c;
}


unsigned int uimul8_4 (unsigned int x)
{
    return x * 5;
}

int mul8_m155 (char x)
{
    return x * (-155);
}

int mul8_m64 (char x)
{
    return x * (-64);
}

int mul_s8u8 (char a, unsigned char b)
{
    return a*b;
}

	.file	"wmul.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
	.text
.global	mul16_16
	.type	mul16_16, @function
mul16_16:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,4	 ;  26	*ashlhi3_const/5	[length = 5]
1:	lsl r24
	rol r25
	dec r18
	brne 1b
/* epilogue start */
	ret	 ;  24	return	[length = 1]
	.size	mul16_16, .-mul16_16
.global	mul16_17
	.type	mul16_17, @function
mul16_17:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	movw r18,r24	 ;  25	*movhi/1	[length = 1]
	ldi r20,4	 ;  30	*ashlhi3_const/5	[length = 5]
1:	lsl r18
	rol r19
	dec r20
	brne 1b
	add r18,r24	 ;  9	*addhi3/1	[length = 2]
	adc r19,r25
	movw r24,r18	 ;  31	*movhi/1	[length = 1]
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	mul16_17, .-mul16_17
.global	mul16_126
	.type	mul16_126, @function
mul16_126:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	movw r20,r24	 ;  31	*movhi/1	[length = 1]
	lsl r20	 ;  38	*ashlhi3_const/2	[length = 2]
	rol r21
	lsr r25	 ;  39	*ashlhi3_const/5	[length = 5]
	mov r25,r24
	clr r24
	ror r25
	ror r24
	sub r24,r20	 ;  10	subhi3/1	[length = 2]
	sbc r25,r21
/* epilogue start */
	ret	 ;  35	return	[length = 1]
	.size	mul16_126, .-mul16_126
.global	mul8_55
	.type	mul8_55, @function
mul8_55:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(55)	 ;  7	*movqi/2	[length = 1]
	muls r24,r25	 ;  8	mulqihi3	[length = 3]
	movw r24,r0
	clr r1
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	mul8_55, .-mul8_55
.global	mul8_126
	.type	mul8_126, @function
mul8_126:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	clr r25	 ;  7	extendqihi2/1	[length = 3]
	sbrc r24,7
	com r25
	movw r18,r24	 ;  28	*movhi/1	[length = 1]
	lsl r18	 ;  33	*ashlhi3_const/2	[length = 2]
	rol r19
	lsr r25	 ;  34	*ashlhi3_const/5	[length = 5]
	mov r25,r24
	clr r24
	ror r25
	ror r24
	sub r24,r18	 ;  12	subhi3/1	[length = 2]
	sbc r25,r19
/* epilogue start */
	ret	 ;  31	return	[length = 1]
	.size	mul8_126, .-mul8_126
.global	mul8_155
	.type	mul8_155, @function
mul8_155:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	mov r20,r24	 ;  6	extendqihi2/2	[length = 4]
	clr r21
	sbrc r20,7
	com r21
	ldi r18,lo8(155)	 ;  7	*movhi/4	[length = 2]
	ldi r19,hi8(155)
	mul r20,r18	 ;  8	*mulhi3_enh	[length = 7]
	movw r24,r0
	mul r20,r19
	add r25,r0
	mul r21,r18
	add r25,r0
	clr r1
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	mul8_155, .-mul8_155
.global	mul8_2
	.type	mul8_2, @function
mul8_2:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	clr r25	 ;  7	extendqihi2/1	[length = 3]
	sbrc r24,7
	com r25
	lsl r24	 ;  28	*ashlhi3_const/2	[length = 2]
	rol r25
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	mul8_2, .-mul8_2
.global	mul8_4
	.type	mul8_4, @function
mul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	clr r25	 ;  7	extendqihi2/1	[length = 3]
	sbrc r24,7
	com r25
	lsl r24	 ;  28	*ashlhi3_const/4	[length = 4]
	rol r25
	lsl r24
	rol r25
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	mul8_4, .-mul8_4
.global	umul8_4
	.type	umul8_4, @function
umul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(0)	 ;  25	*movqi/1	[length = 1]
	lsl r24	 ;  30	*ashlhi3_const/4	[length = 4]
	rol r25
	lsl r24
	rol r25
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	umul8_4, .-umul8_4
.global	mul8_3
	.type	mul8_3, @function
mul8_3:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	mov r18,r24	 ;  7	extendqihi2/2	[length = 4]
	clr r19
	sbrc r18,7
	com r19
	movw r24,r18	 ;  27	*movhi/1	[length = 1]
	lsl r24	 ;  32	*ashlhi3_const/2	[length = 2]
	rol r25
	add r24,r18	 ;  11	*addhi3/1	[length = 2]
	adc r25,r19
/* epilogue start */
	ret	 ;  30	return	[length = 1]
	.size	mul8_3, .-mul8_3
.global	umul8_3
	.type	umul8_3, @function
umul8_3:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	mov r18,r24	 ;  28	*movqi/1	[length = 1]
	ldi r19,lo8(0)	 ;  29	*movqi/1	[length = 1]
	movw r24,r18	 ;  27	*movhi/1	[length = 1]
	lsl r24	 ;  34	*ashlhi3_const/2	[length = 2]
	rol r25
	add r24,r18	 ;  11	*addhi3/1	[length = 2]
	adc r25,r19
/* epilogue start */
	ret	 ;  32	return	[length = 1]
	.size	umul8_3, .-umul8_3
.global	imul8_15
	.type	imul8_15, @function
imul8_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,lo8(15)	 ;  6	*movhi/4	[length = 2]
	ldi r19,hi8(15)
	sts y15+1,r19	 ;  7	*movhi/3	[length = 4]
	sts y15,r18
	movw r18,r24	 ;  27	*movhi/1	[length = 1]
	ldi r22,4	 ;  32	*ashlhi3_const/5	[length = 5]
1:	lsl r18
	rol r19
	dec r22
	brne 1b
	sub r18,r24	 ;  11	subhi3/1	[length = 2]
	sbc r19,r25
	movw r24,r18	 ;  33	*movhi/1	[length = 1]
/* epilogue start */
	ret	 ;  30	return	[length = 1]
	.size	imul8_15, .-imul8_15
.global	cmul_15
	.type	cmul_15, @function
cmul_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	clr r23	 ;  8	extendqihi2/1	[length = 3]
	sbrc r22,7
	com r23
	movw r18,r22	 ;  24	*movhi/1	[length = 1]
	ldi r30,4	 ;  30	*ashlhi3_const/5	[length = 5]
1:	lsl r18
	rol r19
	dec r30
	brne 1b
	sub r18,r22	 ;  12	subhi3/1	[length = 2]
	sbc r19,r23
	sts y15+1,r19	 ;  13	*movhi/3	[length = 4]
	sts y15,r18
	clr r25	 ;  15	extendqihi2/1	[length = 3]
	sbrc r24,7
	com r25
	movw r18,r24	 ;  25	*movhi/1	[length = 1]
	ldi r31,4	 ;  31	*ashlhi3_const/5	[length = 5]
1:	lsl r18
	rol r19
	dec r31
	brne 1b
	sub r18,r24	 ;  19	subhi3/1	[length = 2]
	sbc r19,r25
	sts x15+1,r19	 ;  20	*movhi/3	[length = 4]
	sts x15,r18
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	cmul_15, .-cmul_15
.global	ymul8_15
	.type	ymul8_15, @function
ymul8_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	movw r18,r22	 ;  20	*movhi/1	[length = 1]
	ldi r26,4	 ;  26	*ashlhi3_const/5	[length = 5]
1:	lsl r18
	rol r19
	dec r26
	brne 1b
	sub r18,r22	 ;  10	subhi3/1	[length = 2]
	sbc r19,r23
	sts y15+1,r19	 ;  11	*movhi/3	[length = 4]
	sts y15,r18
	movw r18,r24	 ;  21	*movhi/1	[length = 1]
	ldi r27,4	 ;  27	*ashlhi3_const/5	[length = 5]
1:	lsl r18
	rol r19
	dec r27
	brne 1b
	sub r18,r24	 ;  15	subhi3/1	[length = 2]
	sbc r19,r25
	sts x15+1,r19	 ;  16	*movhi/3	[length = 4]
	sts x15,r18
/* epilogue start */
	ret	 ;  24	return	[length = 1]
	.size	ymul8_15, .-ymul8_15
.global	qmul8_xy
	.type	qmul8_xy, @function
qmul8_xy:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	clr r25	 ;  8	extendqihi2/1	[length = 3]
	sbrc r24,7
	com r25
	mul r20,r24	 ;  9	*mulhi3_enh	[length = 7]
	movw r18,r0
	mul r20,r25
	add r19,r0
	mul r21,r24
	add r19,r0
	clr r1
	sts y15+1,r19	 ;  10	*movhi/3	[length = 4]
	sts y15,r18
	mul r22,r24	 ;  11	*mulhi3_enh	[length = 7]
	movw r18,r0
	mul r22,r25
	add r19,r0
	mul r23,r24
	add r19,r0
	clr r1
	sts x15+1,r19	 ;  12	*movhi/3	[length = 4]
	sts x15,r18
/* epilogue start */
	ret	 ;  18	return	[length = 1]
	.size	qmul8_xy, .-qmul8_xy
.global	uimul8_4
	.type	uimul8_4, @function
uimul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	movw r18,r24	 ;  25	*movhi/1	[length = 1]
	lsl r18	 ;  30	*ashlhi3_const/4	[length = 4]
	rol r19
	lsl r18
	rol r19
	add r18,r24	 ;  9	*addhi3/1	[length = 2]
	adc r19,r25
	movw r24,r18	 ;  31	*movhi/1	[length = 1]
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	uimul8_4, .-uimul8_4
.global	mul8_m155
	.type	mul8_m155, @function
mul8_m155:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	mov r20,r24	 ;  6	extendqihi2/2	[length = 4]
	clr r21
	sbrc r20,7
	com r21
	ldi r18,lo8(-155)	 ;  7	*movhi/4	[length = 2]
	ldi r19,hi8(-155)
	mul r20,r18	 ;  8	*mulhi3_enh	[length = 7]
	movw r24,r0
	mul r20,r19
	add r25,r0
	mul r21,r18
	add r25,r0
	clr r1
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	mul8_m155, .-mul8_m155
.global	mul8_m64
	.type	mul8_m64, @function
mul8_m64:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	clr r25	 ;  7	extendqihi2/1	[length = 3]
	sbrc r24,7
	com r25
	com r25	 ;  9	neghi2/1	[length = 3]
	neg r24
	sbci r25,lo8(-1)
	ldi r21,6	 ;  31	*ashlhi3_const/5	[length = 5]
1:	lsl r24
	rol r25
	dec r21
	brne 1b
/* epilogue start */
	ret	 ;  29	return	[length = 1]
	.size	mul8_m64, .-mul8_m64
.global	mul_s8u8
	.type	mul_s8u8, @function
mul_s8u8:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	mov r18,r24	 ;  7	extendqihi2/2	[length = 4]
	clr r19
	sbrc r18,7
	com r19
	ldi r23,lo8(0)	 ;  26	*movqi/1	[length = 1]
	mul r18,r22	 ;  9	*mulhi3_enh	[length = 7]
	movw r24,r0
	mul r18,r23
	add r25,r0
	mul r19,r22
	add r25,r0
	clr r1
/* epilogue start */
	ret	 ;  29	return	[length = 1]
	.size	mul_s8u8, .-mul_s8u8
	.comm	x15,2,1
	.comm	y15,2,1
	.ident	"GCC: (GNU) 4.7.0 20110711 (experimental)"
.global __do_clear_bss
	.file	"wmul.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
	.text
.global	mul16_16
	.type	mul16_16, @function
mul16_16:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,4	 ;  26	*ashlhi3_const/5	[length = 5]
1:	lsl r24
	rol r25
	dec r18
	brne 1b
/* epilogue start */
	ret	 ;  24	return	[length = 1]
	.size	mul16_16, .-mul16_16
.global	mul16_17
	.type	mul16_17, @function
mul16_17:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,lo8(17)	 ;  6	*movqi/2	[length = 1]
	movw r20,r24	 ;  23	*movhi/1	[length = 1]
	mul  r18,r20	 ;  7	muluqihi3	[length = 5]
	movw r24,r0
	mul  r18,r21
	add  r25,r0
	clr  __zero_reg__
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	mul16_17, .-mul16_17
.global	mul16_126
	.type	mul16_126, @function
mul16_126:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,lo8(126)	 ;  6	*movqi/2	[length = 1]
	movw r20,r24	 ;  23	*movhi/1	[length = 1]
	mul  r18,r20	 ;  7	muluqihi3	[length = 5]
	movw r24,r0
	mul  r18,r21
	add  r25,r0
	clr  __zero_reg__
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	mul16_126, .-mul16_126
.global	mul8_55
	.type	mul8_55, @function
mul8_55:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(55)	 ;  23	*movqi/2	[length = 1]
	muls r24,r25	 ;  24	mulqihi3	[length = 3]
	movw r24,r0
	clr r1
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	mul8_55, .-mul8_55
.global	mul8_126
	.type	mul8_126, @function
mul8_126:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(126)	 ;  23	*movqi/2	[length = 1]
	muls r24,r25	 ;  24	mulqihi3	[length = 3]
	movw r24,r0
	clr r1
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	mul8_126, .-mul8_126
.global	mul8_155
	.type	mul8_155, @function
mul8_155:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,lo8(-101)	 ;  23	*movqi/2	[length = 1]
	mov r19,r24	 ;  26	*movqi/1	[length = 1]
	mulsu r19,r18	 ;  24	*mulsu	[length = 3]
	movw r24,r0
	clr __zero_reg__
/* epilogue start */
	ret	 ;  29	return	[length = 1]
	.size	mul8_155, .-mul8_155
.global	mul8_2
	.type	mul8_2, @function
mul8_2:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	clr r25	 ;  7	extendqihi2/1	[length = 3]
	sbrc r24,7
	com r25
	lsl r24	 ;  28	*ashlhi3_const/2	[length = 2]
	rol r25
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	mul8_2, .-mul8_2
.global	mul8_4
	.type	mul8_4, @function
mul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(4)	 ;  23	*movqi/2	[length = 1]
	muls r24,r25	 ;  24	mulqihi3	[length = 3]
	movw r24,r0
	clr r1
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	mul8_4, .-mul8_4
.global	umul8_4
	.type	umul8_4, @function
umul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(4)	 ;  23	*movqi/2	[length = 1]
	mul r24,r25	 ;  24	umulqihi3	[length = 3]
	movw r24,r0
	clr r1
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	umul8_4, .-umul8_4
.global	mul8_3
	.type	mul8_3, @function
mul8_3:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(3)	 ;  23	*movqi/2	[length = 1]
	muls r24,r25	 ;  24	mulqihi3	[length = 3]
	movw r24,r0
	clr r1
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	mul8_3, .-mul8_3
.global	umul8_3
	.type	umul8_3, @function
umul8_3:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(3)	 ;  23	*movqi/2	[length = 1]
	mul r24,r25	 ;  24	umulqihi3	[length = 3]
	movw r24,r0
	clr r1
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	umul8_3, .-umul8_3
.global	imul8_15
	.type	imul8_15, @function
imul8_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,lo8(15)	 ;  6	*movhi/4	[length = 2]
	ldi r19,hi8(15)
	sts y15+1,r19	 ;  7	*movhi/3	[length = 4]
	sts y15,r18
	ldi r18,lo8(15)	 ;  8	*movqi/2	[length = 1]
	movw r20,r24	 ;  25	*movhi/1	[length = 1]
	mul  r18,r20	 ;  9	muluqihi3	[length = 5]
	movw r24,r0
	mul  r18,r21
	add  r25,r0
	clr  __zero_reg__
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	imul8_15, .-imul8_15
.global	cmul_15
	.type	cmul_15, @function
cmul_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(15)	 ;  17	*movqi/2	[length = 1]
	muls r22,r25	 ;  18	mulqihi3	[length = 3]
	movw r22,r0
	clr r1
	sts y15+1,r23	 ;  10	*movhi/3	[length = 4]
	sts y15,r22
	muls r24,r25	 ;  20	mulqihi3	[length = 3]
	movw r24,r0
	clr r1
	sts x15+1,r25	 ;  14	*movhi/3	[length = 4]
	sts x15,r24
/* epilogue start */
	ret	 ;  24	return	[length = 1]
	.size	cmul_15, .-cmul_15
.global	ymul8_15
	.type	ymul8_15, @function
ymul8_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,lo8(15)	 ;  7	*movqi/2	[length = 1]
	mul  r18,r22	 ;  8	muluqihi3	[length = 5]
	movw r20,r0
	mul  r18,r23
	add  r21,r0
	clr  __zero_reg__
	sts y15+1,r21	 ;  9	*movhi/3	[length = 4]
	sts y15,r20
	movw r20,r24	 ;  16	*movhi/1	[length = 1]
	mul  r18,r20	 ;  11	muluqihi3	[length = 5]
	movw r24,r0
	mul  r18,r21
	add  r25,r0
	clr  __zero_reg__
	sts x15+1,r25	 ;  12	*movhi/3	[length = 4]
	sts x15,r24
/* epilogue start */
	ret	 ;  19	return	[length = 1]
	.size	ymul8_15, .-ymul8_15
.global	qmul8_xy
	.type	qmul8_xy, @function
qmul8_xy:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	clr r25	 ;  8	extendqihi2/1	[length = 3]
	sbrc r24,7
	com r25
	mul r20,r24	 ;  9	*mulhi3_enh	[length = 7]
	movw r18,r0
	mul r20,r25
	add r19,r0
	mul r21,r24
	add r19,r0
	clr r1
	sts y15+1,r19	 ;  10	*movhi/3	[length = 4]
	sts y15,r18
	mul r22,r24	 ;  11	*mulhi3_enh	[length = 7]
	movw r18,r0
	mul r22,r25
	add r19,r0
	mul r23,r24
	add r19,r0
	clr r1
	sts x15+1,r19	 ;  12	*movhi/3	[length = 4]
	sts x15,r18
/* epilogue start */
	ret	 ;  18	return	[length = 1]
	.size	qmul8_xy, .-qmul8_xy
.global	uimul8_4
	.type	uimul8_4, @function
uimul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,lo8(5)	 ;  6	*movqi/2	[length = 1]
	movw r20,r24	 ;  23	*movhi/1	[length = 1]
	mul  r18,r20	 ;  7	muluqihi3	[length = 5]
	movw r24,r0
	mul  r18,r21
	add  r25,r0
	clr  __zero_reg__
/* epilogue start */
	ret	 ;  26	return	[length = 1]
	.size	uimul8_4, .-uimul8_4
.global	mul8_m155
	.type	mul8_m155, @function
mul8_m155:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r18,lo8(-155)	 ;  7	*movhi/4	[length = 2]
	ldi r19,hi8(-155)
	mov r20,r24	 ;  24	*movqi/1	[length = 1]
	mulsu r20,r18	 ;  8	mulsqihi3	[length = 5]
	movw  r24,r0
	mul   r20,r19
	add   r25,r0
	clr   __zero_reg__
/* epilogue start */
	ret	 ;  27	return	[length = 1]
	.size	mul8_m155, .-mul8_m155
.global	mul8_m64
	.type	mul8_m64, @function
mul8_m64:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	ldi r25,lo8(-64)	 ;  23	*movqi/2	[length = 1]
	muls r24,r25	 ;  24	mulqihi3	[length = 3]
	movw r24,r0
	clr r1
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	mul8_m64, .-mul8_m64
.global	mul_s8u8
	.type	mul_s8u8, @function
mul_s8u8:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
	mov r18,r24	 ;  25	*movqi/1	[length = 1]
	mulsu r18,r22	 ;  9	*mulsu	[length = 3]
	movw r24,r0
	clr __zero_reg__
/* epilogue start */
	ret	 ;  28	return	[length = 1]
	.size	mul_s8u8, .-mul_s8u8
	.comm	x15,2,1
	.comm	y15,2,1
	.ident	"GCC: (GNU) 4.7.0 20110711 (experimental)"
.global __do_clear_bss

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]