This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch, AVR]: Fix PR49687: Better widening mul 16=8*8
- From: Georg-Johann Lay <avr at gjlay dot de>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Anatoly Sokolov <aesok at post dot ru>, Denis Chertykov <chertykov at gmail dot com>, Eric Weddington <eric dot weddington at atmel dot com>
- Date: Tue, 12 Jul 2011 12:35:48 +0200
- Subject: [Patch, AVR]: Fix PR49687: Better widening mul 16=8*8
For widening multiply there is room for optimization, e.g.:
* (mult:HI (extend:HI(QI)) HI) is better than
(extend:HI(QI)) and (mult:HI HI HI)
* For mult with power of 2 sometimes a mult is
better than a shift left.
* Support MULSU instruction, i.e.
(mult:HI (sign_extend:HI(QI))
(zero_extend:HI(QI)))
* (mult:HI (HI small_const)) can be optimized.
Some insns are expanded in mulhi3 expander, others
are synthesized in combine and then split in split1.
This requires the function avr_gate_split1 to avoid that
IRA/reload recombines the insn. This is needed to have
constants CSEd out, see discussion in
http://gcc.gnu.org/ml/gcc/2011-07/msg00136.html
I prefer this over clobber regs (because no CSE) and over
combine-split (because it is not clear that combine will
come up with a spare reg and the mode of the spare reg HI
is suboptimal).
FYI, I attached output of a test case compiled with
-Os -dp for an ATmega8 with .0. the original output
without the patch.
Some cases like the qmul8_xy test case are not optimized
(combine flaw), and there are superfluous move instructions
because of early-clobber (IRA/reload flaw).
Tested without regressions.
Ok to commit?
Johann
PR target/49687
* config/avr/avr.md (mulhi3): Use register_or_s8_u8_operand for
operand2 and expand appropriately if there is a CONST_INT in
operand2.
(*mulsu,*mulus): New insns.
(mulsqihi3): New insn.
(muluqihi3): New insn.
(*muluqihi3.uconst): New insn_and_split.
(*muluqihi3.sconst): New insn_and_split.
(*mulsqihi3.sconst): New insn_and_split.
(*mulsqihi3.uconst): New insn_and_split.
(*ashifthi3.signx.const): New insn_and_split.
(*ashifthi3.signx.const7): New insn_and_split.
(*ashifthi3.zerox.const): New insn_and_split.
* config/avr/avr.c (avr_rtx_costs): Report costs of above insns.
(avr_gate_split1): New function.
* config/avr/avr-protos.h (avr_gate_split1): New prototype.
* config/avr/predicates.md (const_2_to_7_operand): New.
(const_2_to_6_operand): New.
(u8_operand): New.
(s8_operand): New.
(register_or_s8_u8_operand): New.
Index: config/avr/predicates.md
===================================================================
--- config/avr/predicates.md (revision 176136)
+++ config/avr/predicates.md (working copy)
@@ -73,6 +73,16 @@ (define_predicate "const_0_to_7_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+;; Return 1 if OP is constant integer 2..7 for MODE.
+(define_predicate "const_2_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 2, 7)")))
+
+;; Return 1 if OP is constant integer 2..6 for MODE.
+(define_predicate "const_2_to_6_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 2, 6)")))
+
;; Returns true if OP is either the constant zero or a register.
(define_predicate "reg_or_0_operand"
(ior (match_operand 0 "register_operand")
@@ -156,3 +166,17 @@ (define_predicate "const_8_16_24_operand
(and (match_code "const_int")
(match_test "8 == INTVAL(op) || 16 == INTVAL(op) || 24 == INTVAL(op)")))
+;; Unsigned CONST_INT that fits in 8 bits, i.e. 0..255.
+(define_predicate "u8_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+;; Signed CONST_INT that fits in 8 bits, i.e. -128..127.
+(define_predicate "s8_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), -128, 127)")))
+
+(define_predicate "register_or_s8_u8_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "u8_operand")
+ (match_operand 0 "s8_operand")))
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md (revision 176136)
+++ config/avr/avr.md (working copy)
@@ -1017,19 +1017,245 @@ (define_insn "umulqihi3"
[(set_attr "length" "3")
(set_attr "cc" "clobber")])
+(define_insn "*mulsu"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+ (zero_extend:HI (match_operand:QI 2 "register_operand" "a"))))]
+ "AVR_HAVE_MUL"
+ "mulsu %1,%2
+ movw %0,r0
+ clr __zero_reg__"
+ [(set_attr "length" "3")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*mulus"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a"))
+ (sign_extend:HI (match_operand:QI 2 "register_operand" "a"))))]
+ "AVR_HAVE_MUL"
+ "mulsu %2,%1
+ movw %0,r0
+ clr __zero_reg__"
+ [(set_attr "length" "3")
+ (set_attr "cc" "clobber")])
+
+;******************************************************************************
+; mul HI: $1 = sign/zero-extend, $2 = small constant
+;******************************************************************************
+
+(define_insn_and_split "*muluqihi3.uconst"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+ (match_operand:HI 2 "u8_operand" "M")))]
+ "AVR_HAVE_MUL
+ && avr_gate_split1()"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (match_dup 3)
+ (match_dup 2))
+ ; *mul
+ (set (match_dup 0)
+ (mult:HI (zero_extend:HI (match_dup 1))
+ (zero_extend:HI (match_dup 3))))]
+ {
+ operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]),
+ QImode));
+ operands[3] = gen_reg_rtx (QImode);
+ })
+
+(define_insn_and_split "*muluqihi3.sconst"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a"))
+ (match_operand:HI 2 "s8_operand" "n")))]
+ "AVR_HAVE_MUL
+ && avr_gate_split1()"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (match_dup 3)
+ (match_dup 2))
+ ; *mulsu
+ (set (match_dup 0)
+ (mult:HI (sign_extend:HI (match_dup 3))
+ (zero_extend:HI (match_dup 1))))]
+ {
+ operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]),
+ QImode));
+ operands[3] = gen_reg_rtx (QImode);
+ })
+
+(define_insn_and_split "*mulsqihi3.sconst"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
+ (match_operand:HI 2 "s8_operand" "n")))]
+ "AVR_HAVE_MUL
+ && avr_gate_split1()"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (match_dup 3)
+ (match_dup 2))
+ ; *muls
+ (set (match_dup 0)
+ (mult:HI (sign_extend:HI (match_dup 1))
+ (sign_extend:HI (match_dup 3))))]
+ {
+ operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]),
+ QImode));
+ operands[3] = gen_reg_rtx (QImode);
+ })
+
+(define_insn_and_split "*mulsqihi3.uconst"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+ (match_operand:HI 2 "u8_operand" "M")))]
+ "AVR_HAVE_MUL
+ && avr_gate_split1()"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (match_dup 3)
+ (match_dup 2))
+ ; *mulsu
+ (set (match_dup 0)
+ (mult:HI (sign_extend:HI (match_dup 1))
+ (zero_extend:HI (match_dup 3))))]
+ {
+ operands[2] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]),
+ QImode));
+ operands[3] = gen_reg_rtx (QImode);
+ })
+
+
+;; The EXTEND of $1 only appears in combine, we don't see it in expand so that
+;; expand decides to use ASHIFT instead of MUL at that time. Fix that.
+
+(define_insn_and_split "*ashifthi3.signx.const"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d"))
+ (match_operand:HI 2 "const_2_to_6_operand" "I")))]
+ "AVR_HAVE_MUL
+ && avr_gate_split1()"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (match_dup 3)
+ (match_dup 2))
+ ; *muls
+ (set (match_dup 0)
+ (mult:HI (sign_extend:HI (match_dup 1))
+ (sign_extend:HI (match_dup 3))))]
+ {
+ operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+ operands[3] = gen_reg_rtx (QImode);
+ })
+
+(define_insn_and_split "*ashifthi3.signx.const7"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+ (const_int 7)))]
+ "AVR_HAVE_MUL
+ && avr_gate_split1()"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (match_dup 3)
+ (match_dup 2))
+ ; *mulsu
+ (set (match_dup 0)
+ (mult:HI (sign_extend:HI (match_dup 1))
+ (zero_extend:HI (match_dup 3))))]
+ {
+ operands[2] = GEN_INT (trunc_int_for_mode (1 << 7, QImode));
+ operands[3] = gen_reg_rtx (QImode);
+ })
+
+(define_insn_and_split "*ashifthi3.zerox.const"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+ (match_operand:HI 2 "const_2_to_7_operand" "I")))]
+ "AVR_HAVE_MUL
+ && avr_gate_split1()"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (match_dup 3)
+ (match_dup 2))
+ ; *mul
+ (set (match_dup 0)
+ (mult:HI (zero_extend:HI (match_dup 1))
+ (zero_extend:HI (match_dup 3))))]
+ {
+ operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]),
+ QImode));
+ operands[3] = gen_reg_rtx (QImode);
+ })
+
+;******************************************************************************
+; mul HI: $1 = sign/zero-extend, $2 = reg
+;******************************************************************************
+
+(define_insn "mulsqihi3"
+ [(set (match_operand:HI 0 "register_operand" "=&r")
+ (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a"))
+ (match_operand:HI 2 "register_operand" "a")))]
+ "AVR_HAVE_MUL"
+ "mulsu %1,%A2
+ movw %0,r0
+ mul %1,%B2
+ add %B0,r0
+ clr __zero_reg__"
+ [(set_attr "length" "5")
+ (set_attr "cc" "clobber")])
+
+(define_insn "muluqihi3"
+ [(set (match_operand:HI 0 "register_operand" "=&r")
+ (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+ (match_operand:HI 2 "register_operand" "r")))]
+ "AVR_HAVE_MUL"
+ "mul %1,%A2
+ movw %0,r0
+ mul %1,%B2
+ add %B0,r0
+ clr __zero_reg__"
+ [(set_attr "length" "5")
+ (set_attr "cc" "clobber")])
+
+;******************************************************************************
+
(define_expand "mulhi3"
[(set (match_operand:HI 0 "register_operand" "")
(mult:HI (match_operand:HI 1 "register_operand" "")
- (match_operand:HI 2 "register_operand" "")))]
+ (match_operand:HI 2 "register_or_s8_u8_operand" "")))]
""
- "
-{
- if (!AVR_HAVE_MUL)
- {
- emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2]));
- DONE;
- }
-}")
+ {
+ if (!AVR_HAVE_MUL)
+ {
+ if (!register_operand (operands[2], HImode))
+ operands[2] = force_reg (HImode, operands[2]);
+
+ emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+
+ /* For small constants we can do better by extending them on the fly.
+ The constant can be loaded in one instruction and the widening
+ multiplication is shorter. First try the unsigned variant because it
+ allows constraint "d" instead of "a" for the signed version. */
+
+ if (u8_operand (operands[2], HImode))
+ {
+ rtx x = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode));
+ emit_insn (gen_muluqihi3 (operands[0],
+ force_reg (QImode, x), operands[1]));
+ DONE;
+ }
+
+ if (s8_operand (operands[2], HImode))
+ {
+ rtx x = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode));
+ emit_insn (gen_mulsqihi3 (operands[0],
+ force_reg (QImode, x), operands[1]));
+ DONE;
+ }
+
+ if (!register_operand (operands[2], HImode))
+ operands[2] = force_reg (HImode, operands[2]);
+ })
(define_insn "*mulhi3_enh"
[(set (match_operand:HI 0 "register_operand" "=&r")
Index: config/avr/avr-protos.h
===================================================================
--- config/avr/avr-protos.h (revision 176136)
+++ config/avr/avr-protos.h (working copy)
@@ -117,3 +117,4 @@ extern int class_max_nregs (enum reg_cla
#ifdef REAL_VALUE_TYPE
extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n);
#endif
+extern bool avr_gate_split1(void);
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c (revision 176141)
+++ config/avr/avr.c (working copy)
@@ -5466,7 +5466,42 @@ avr_rtx_costs (rtx x, int codearg, int o
case HImode:
if (AVR_HAVE_MUL)
- *total = COSTS_N_INSNS (!speed ? 7 : 10);
+ {
+ rtx op0 = XEXP (x, 0);
+ rtx op1 = XEXP (x, 1);
+ enum rtx_code code0 = GET_CODE (op0);
+ enum rtx_code code1 = GET_CODE (op1);
+ bool ex0 = SIGN_EXTEND == code0 || ZERO_EXTEND == code0;
+ bool ex1 = SIGN_EXTEND == code1 || ZERO_EXTEND == code1;
+
+ if (ex0
+ && (u8_operand (op1, HImode)
+ || s8_operand (op1, HImode)))
+ {
+ *total = COSTS_N_INSNS (!speed ? 4 : 6);
+ return true;
+ }
+ if (ex0
+ && register_operand (op1, HImode))
+ {
+ *total = COSTS_N_INSNS (!speed ? 5 : 8);
+ return true;
+ }
+ else if (ex0 || ex1)
+ {
+ *total = COSTS_N_INSNS (!speed ? 3 : 5);
+ return true;
+ }
+ else if (register_operand (op0, HImode)
+ && (u8_operand (op1, HImode)
+ || s8_operand (op1, HImode)))
+ {
+ *total = COSTS_N_INSNS (!speed ? 6 : 9);
+ return true;
+ }
+ else
+ *total = COSTS_N_INSNS (!speed ? 7 : 10);
+ }
else if (!speed)
*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
else
@@ -5549,6 +5584,17 @@ avr_rtx_costs (rtx x, int codearg, int o
break;
case HImode:
+ if (AVR_HAVE_MUL)
+ {
+ if (const_2_to_7_operand (XEXP (x, 1), HImode)
+ && (SIGN_EXTEND == GET_CODE (XEXP (x, 0))
+ || ZERO_EXTEND == GET_CODE (XEXP (x, 0))))
+ {
+ *total = COSTS_N_INSNS (!speed ? 4 : 6);
+ return true;
+ }
+ }
+
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
{
*total = COSTS_N_INSNS (!speed ? 5 : 41);
@@ -6881,4 +6927,29 @@ avr_expand_builtin (tree exp, rtx target
}
+/* FIXME: We compose some insns by means of insn combine
+ and split them in split1. We don't want IRA/reload
+ to combine them to the original insns again because
+ that avoid some CSE optimizations if constants are
+ involved. If IRA/reload combines, the recombined
+ insns get split again after reload, but then CSE
+ does not take place.
+ It appears that at present there is no other way
+ to take away the insn from IRA. Notice that split1
+ runs unconditionally so that all our insns will get
+ split no matter of command line options. */
+
+#include "tree-pass.h"
+
+bool
+avr_gate_split1 (void)
+{
+ if (current_pass->static_pass_number
+ < pass_match_asm_constraints.pass.static_pass_number)
+ return true;
+
+ return false;
+}
+
+
#include "gt-avr.h"
int mul16_16 (int x)
{
return x * 16;
}
int mul16_17 (int x)
{
return x * 17;
}
int mul16_126 (int x)
{
return x * 126;
}
int mul8_55 (char x)
{
return x * 55;
}
int mul8_126 (char x)
{
return x * 126;
}
int mul8_155 (char x)
{
return x * 155;
}
int mul8_2 (char x)
{
return x * 2;
}
int mul8_4 (char x)
{
return x * 4;
}
int umul8_4 (unsigned char x)
{
return x * 4;
}
int mul8_3 (char x)
{
return x * 3;
}
unsigned int umul8_3 (unsigned char x)
{
return x * 3;
}
int y15;
int imul8_15 (int x)
{
y15 = 15;
return x * 15;
}
int y15;
int x15;
void cmul_15 (char x, char y)
{
y15 = y * 15;
x15 = x * 15;
}
void ymul8_15 (int x, int y)
{
y15 = y * 15;
x15 = x * 15;
}
void qmul8_xy (char c, int x, int y)
{
y15 = y * c;
x15 = x * c;
}
unsigned int uimul8_4 (unsigned int x)
{
return x * 5;
}
int mul8_m155 (char x)
{
return x * (-155);
}
int mul8_m64 (char x)
{
return x * (-64);
}
int mul_s8u8 (char a, unsigned char b)
{
return a*b;
}
.file "wmul.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
.text
.global mul16_16
.type mul16_16, @function
mul16_16:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,4 ; 26 *ashlhi3_const/5 [length = 5]
1: lsl r24
rol r25
dec r18
brne 1b
/* epilogue start */
ret ; 24 return [length = 1]
.size mul16_16, .-mul16_16
.global mul16_17
.type mul16_17, @function
mul16_17:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
movw r18,r24 ; 25 *movhi/1 [length = 1]
ldi r20,4 ; 30 *ashlhi3_const/5 [length = 5]
1: lsl r18
rol r19
dec r20
brne 1b
add r18,r24 ; 9 *addhi3/1 [length = 2]
adc r19,r25
movw r24,r18 ; 31 *movhi/1 [length = 1]
/* epilogue start */
ret ; 28 return [length = 1]
.size mul16_17, .-mul16_17
.global mul16_126
.type mul16_126, @function
mul16_126:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
movw r20,r24 ; 31 *movhi/1 [length = 1]
lsl r20 ; 38 *ashlhi3_const/2 [length = 2]
rol r21
lsr r25 ; 39 *ashlhi3_const/5 [length = 5]
mov r25,r24
clr r24
ror r25
ror r24
sub r24,r20 ; 10 subhi3/1 [length = 2]
sbc r25,r21
/* epilogue start */
ret ; 35 return [length = 1]
.size mul16_126, .-mul16_126
.global mul8_55
.type mul8_55, @function
mul8_55:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(55) ; 7 *movqi/2 [length = 1]
muls r24,r25 ; 8 mulqihi3 [length = 3]
movw r24,r0
clr r1
/* epilogue start */
ret ; 26 return [length = 1]
.size mul8_55, .-mul8_55
.global mul8_126
.type mul8_126, @function
mul8_126:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
clr r25 ; 7 extendqihi2/1 [length = 3]
sbrc r24,7
com r25
movw r18,r24 ; 28 *movhi/1 [length = 1]
lsl r18 ; 33 *ashlhi3_const/2 [length = 2]
rol r19
lsr r25 ; 34 *ashlhi3_const/5 [length = 5]
mov r25,r24
clr r24
ror r25
ror r24
sub r24,r18 ; 12 subhi3/1 [length = 2]
sbc r25,r19
/* epilogue start */
ret ; 31 return [length = 1]
.size mul8_126, .-mul8_126
.global mul8_155
.type mul8_155, @function
mul8_155:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
mov r20,r24 ; 6 extendqihi2/2 [length = 4]
clr r21
sbrc r20,7
com r21
ldi r18,lo8(155) ; 7 *movhi/4 [length = 2]
ldi r19,hi8(155)
mul r20,r18 ; 8 *mulhi3_enh [length = 7]
movw r24,r0
mul r20,r19
add r25,r0
mul r21,r18
add r25,r0
clr r1
/* epilogue start */
ret ; 26 return [length = 1]
.size mul8_155, .-mul8_155
.global mul8_2
.type mul8_2, @function
mul8_2:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
clr r25 ; 7 extendqihi2/1 [length = 3]
sbrc r24,7
com r25
lsl r24 ; 28 *ashlhi3_const/2 [length = 2]
rol r25
/* epilogue start */
ret ; 26 return [length = 1]
.size mul8_2, .-mul8_2
.global mul8_4
.type mul8_4, @function
mul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
clr r25 ; 7 extendqihi2/1 [length = 3]
sbrc r24,7
com r25
lsl r24 ; 28 *ashlhi3_const/4 [length = 4]
rol r25
lsl r24
rol r25
/* epilogue start */
ret ; 26 return [length = 1]
.size mul8_4, .-mul8_4
.global umul8_4
.type umul8_4, @function
umul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(0) ; 25 *movqi/1 [length = 1]
lsl r24 ; 30 *ashlhi3_const/4 [length = 4]
rol r25
lsl r24
rol r25
/* epilogue start */
ret ; 28 return [length = 1]
.size umul8_4, .-umul8_4
.global mul8_3
.type mul8_3, @function
mul8_3:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
mov r18,r24 ; 7 extendqihi2/2 [length = 4]
clr r19
sbrc r18,7
com r19
movw r24,r18 ; 27 *movhi/1 [length = 1]
lsl r24 ; 32 *ashlhi3_const/2 [length = 2]
rol r25
add r24,r18 ; 11 *addhi3/1 [length = 2]
adc r25,r19
/* epilogue start */
ret ; 30 return [length = 1]
.size mul8_3, .-mul8_3
.global umul8_3
.type umul8_3, @function
umul8_3:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
mov r18,r24 ; 28 *movqi/1 [length = 1]
ldi r19,lo8(0) ; 29 *movqi/1 [length = 1]
movw r24,r18 ; 27 *movhi/1 [length = 1]
lsl r24 ; 34 *ashlhi3_const/2 [length = 2]
rol r25
add r24,r18 ; 11 *addhi3/1 [length = 2]
adc r25,r19
/* epilogue start */
ret ; 32 return [length = 1]
.size umul8_3, .-umul8_3
.global imul8_15
.type imul8_15, @function
imul8_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,lo8(15) ; 6 *movhi/4 [length = 2]
ldi r19,hi8(15)
sts y15+1,r19 ; 7 *movhi/3 [length = 4]
sts y15,r18
movw r18,r24 ; 27 *movhi/1 [length = 1]
ldi r22,4 ; 32 *ashlhi3_const/5 [length = 5]
1: lsl r18
rol r19
dec r22
brne 1b
sub r18,r24 ; 11 subhi3/1 [length = 2]
sbc r19,r25
movw r24,r18 ; 33 *movhi/1 [length = 1]
/* epilogue start */
ret ; 30 return [length = 1]
.size imul8_15, .-imul8_15
.global cmul_15
.type cmul_15, @function
cmul_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
clr r23 ; 8 extendqihi2/1 [length = 3]
sbrc r22,7
com r23
movw r18,r22 ; 24 *movhi/1 [length = 1]
ldi r30,4 ; 30 *ashlhi3_const/5 [length = 5]
1: lsl r18
rol r19
dec r30
brne 1b
sub r18,r22 ; 12 subhi3/1 [length = 2]
sbc r19,r23
sts y15+1,r19 ; 13 *movhi/3 [length = 4]
sts y15,r18
clr r25 ; 15 extendqihi2/1 [length = 3]
sbrc r24,7
com r25
movw r18,r24 ; 25 *movhi/1 [length = 1]
ldi r31,4 ; 31 *ashlhi3_const/5 [length = 5]
1: lsl r18
rol r19
dec r31
brne 1b
sub r18,r24 ; 19 subhi3/1 [length = 2]
sbc r19,r25
sts x15+1,r19 ; 20 *movhi/3 [length = 4]
sts x15,r18
/* epilogue start */
ret ; 28 return [length = 1]
.size cmul_15, .-cmul_15
.global ymul8_15
.type ymul8_15, @function
ymul8_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
movw r18,r22 ; 20 *movhi/1 [length = 1]
ldi r26,4 ; 26 *ashlhi3_const/5 [length = 5]
1: lsl r18
rol r19
dec r26
brne 1b
sub r18,r22 ; 10 subhi3/1 [length = 2]
sbc r19,r23
sts y15+1,r19 ; 11 *movhi/3 [length = 4]
sts y15,r18
movw r18,r24 ; 21 *movhi/1 [length = 1]
ldi r27,4 ; 27 *ashlhi3_const/5 [length = 5]
1: lsl r18
rol r19
dec r27
brne 1b
sub r18,r24 ; 15 subhi3/1 [length = 2]
sbc r19,r25
sts x15+1,r19 ; 16 *movhi/3 [length = 4]
sts x15,r18
/* epilogue start */
ret ; 24 return [length = 1]
.size ymul8_15, .-ymul8_15
.global qmul8_xy
.type qmul8_xy, @function
qmul8_xy:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
clr r25 ; 8 extendqihi2/1 [length = 3]
sbrc r24,7
com r25
mul r20,r24 ; 9 *mulhi3_enh [length = 7]
movw r18,r0
mul r20,r25
add r19,r0
mul r21,r24
add r19,r0
clr r1
sts y15+1,r19 ; 10 *movhi/3 [length = 4]
sts y15,r18
mul r22,r24 ; 11 *mulhi3_enh [length = 7]
movw r18,r0
mul r22,r25
add r19,r0
mul r23,r24
add r19,r0
clr r1
sts x15+1,r19 ; 12 *movhi/3 [length = 4]
sts x15,r18
/* epilogue start */
ret ; 18 return [length = 1]
.size qmul8_xy, .-qmul8_xy
.global uimul8_4
.type uimul8_4, @function
uimul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
movw r18,r24 ; 25 *movhi/1 [length = 1]
lsl r18 ; 30 *ashlhi3_const/4 [length = 4]
rol r19
lsl r18
rol r19
add r18,r24 ; 9 *addhi3/1 [length = 2]
adc r19,r25
movw r24,r18 ; 31 *movhi/1 [length = 1]
/* epilogue start */
ret ; 28 return [length = 1]
.size uimul8_4, .-uimul8_4
.global mul8_m155
.type mul8_m155, @function
mul8_m155:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
mov r20,r24 ; 6 extendqihi2/2 [length = 4]
clr r21
sbrc r20,7
com r21
ldi r18,lo8(-155) ; 7 *movhi/4 [length = 2]
ldi r19,hi8(-155)
mul r20,r18 ; 8 *mulhi3_enh [length = 7]
movw r24,r0
mul r20,r19
add r25,r0
mul r21,r18
add r25,r0
clr r1
/* epilogue start */
ret ; 26 return [length = 1]
.size mul8_m155, .-mul8_m155
.global mul8_m64
.type mul8_m64, @function
mul8_m64:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
clr r25 ; 7 extendqihi2/1 [length = 3]
sbrc r24,7
com r25
com r25 ; 9 neghi2/1 [length = 3]
neg r24
sbci r25,lo8(-1)
ldi r21,6 ; 31 *ashlhi3_const/5 [length = 5]
1: lsl r24
rol r25
dec r21
brne 1b
/* epilogue start */
ret ; 29 return [length = 1]
.size mul8_m64, .-mul8_m64
.global mul_s8u8
.type mul_s8u8, @function
mul_s8u8:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
mov r18,r24 ; 7 extendqihi2/2 [length = 4]
clr r19
sbrc r18,7
com r19
ldi r23,lo8(0) ; 26 *movqi/1 [length = 1]
mul r18,r22 ; 9 *mulhi3_enh [length = 7]
movw r24,r0
mul r18,r23
add r25,r0
mul r19,r22
add r25,r0
clr r1
/* epilogue start */
ret ; 29 return [length = 1]
.size mul_s8u8, .-mul_s8u8
.comm x15,2,1
.comm y15,2,1
.ident "GCC: (GNU) 4.7.0 20110711 (experimental)"
.global __do_clear_bss
.file "wmul.c"
__SREG__ = 0x3f
__SP_H__ = 0x3e
__SP_L__ = 0x3d
__tmp_reg__ = 0
__zero_reg__ = 1
.text
.global mul16_16
.type mul16_16, @function
mul16_16:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,4 ; 26 *ashlhi3_const/5 [length = 5]
1: lsl r24
rol r25
dec r18
brne 1b
/* epilogue start */
ret ; 24 return [length = 1]
.size mul16_16, .-mul16_16
.global mul16_17
.type mul16_17, @function
mul16_17:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,lo8(17) ; 6 *movqi/2 [length = 1]
movw r20,r24 ; 23 *movhi/1 [length = 1]
mul r18,r20 ; 7 muluqihi3 [length = 5]
movw r24,r0
mul r18,r21
add r25,r0
clr __zero_reg__
/* epilogue start */
ret ; 26 return [length = 1]
.size mul16_17, .-mul16_17
.global mul16_126
.type mul16_126, @function
mul16_126:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,lo8(126) ; 6 *movqi/2 [length = 1]
movw r20,r24 ; 23 *movhi/1 [length = 1]
mul r18,r20 ; 7 muluqihi3 [length = 5]
movw r24,r0
mul r18,r21
add r25,r0
clr __zero_reg__
/* epilogue start */
ret ; 26 return [length = 1]
.size mul16_126, .-mul16_126
.global mul8_55
.type mul8_55, @function
mul8_55:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(55) ; 23 *movqi/2 [length = 1]
muls r24,r25 ; 24 mulqihi3 [length = 3]
movw r24,r0
clr r1
/* epilogue start */
ret ; 28 return [length = 1]
.size mul8_55, .-mul8_55
.global mul8_126
.type mul8_126, @function
mul8_126:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(126) ; 23 *movqi/2 [length = 1]
muls r24,r25 ; 24 mulqihi3 [length = 3]
movw r24,r0
clr r1
/* epilogue start */
ret ; 28 return [length = 1]
.size mul8_126, .-mul8_126
.global mul8_155
.type mul8_155, @function
mul8_155:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,lo8(-101) ; 23 *movqi/2 [length = 1]
mov r19,r24 ; 26 *movqi/1 [length = 1]
mulsu r19,r18 ; 24 *mulsu [length = 3]
movw r24,r0
clr __zero_reg__
/* epilogue start */
ret ; 29 return [length = 1]
.size mul8_155, .-mul8_155
.global mul8_2
.type mul8_2, @function
mul8_2:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
clr r25 ; 7 extendqihi2/1 [length = 3]
sbrc r24,7
com r25
lsl r24 ; 28 *ashlhi3_const/2 [length = 2]
rol r25
/* epilogue start */
ret ; 26 return [length = 1]
.size mul8_2, .-mul8_2
.global mul8_4
.type mul8_4, @function
mul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(4) ; 23 *movqi/2 [length = 1]
muls r24,r25 ; 24 mulqihi3 [length = 3]
movw r24,r0
clr r1
/* epilogue start */
ret ; 28 return [length = 1]
.size mul8_4, .-mul8_4
.global umul8_4
.type umul8_4, @function
umul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(4) ; 23 *movqi/2 [length = 1]
mul r24,r25 ; 24 umulqihi3 [length = 3]
movw r24,r0
clr r1
/* epilogue start */
ret ; 28 return [length = 1]
.size umul8_4, .-umul8_4
.global mul8_3
.type mul8_3, @function
mul8_3:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(3) ; 23 *movqi/2 [length = 1]
muls r24,r25 ; 24 mulqihi3 [length = 3]
movw r24,r0
clr r1
/* epilogue start */
ret ; 28 return [length = 1]
.size mul8_3, .-mul8_3
.global umul8_3
.type umul8_3, @function
umul8_3:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(3) ; 23 *movqi/2 [length = 1]
mul r24,r25 ; 24 umulqihi3 [length = 3]
movw r24,r0
clr r1
/* epilogue start */
ret ; 28 return [length = 1]
.size umul8_3, .-umul8_3
.global imul8_15
.type imul8_15, @function
imul8_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,lo8(15) ; 6 *movhi/4 [length = 2]
ldi r19,hi8(15)
sts y15+1,r19 ; 7 *movhi/3 [length = 4]
sts y15,r18
ldi r18,lo8(15) ; 8 *movqi/2 [length = 1]
movw r20,r24 ; 25 *movhi/1 [length = 1]
mul r18,r20 ; 9 muluqihi3 [length = 5]
movw r24,r0
mul r18,r21
add r25,r0
clr __zero_reg__
/* epilogue start */
ret ; 28 return [length = 1]
.size imul8_15, .-imul8_15
.global cmul_15
.type cmul_15, @function
cmul_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(15) ; 17 *movqi/2 [length = 1]
muls r22,r25 ; 18 mulqihi3 [length = 3]
movw r22,r0
clr r1
sts y15+1,r23 ; 10 *movhi/3 [length = 4]
sts y15,r22
muls r24,r25 ; 20 mulqihi3 [length = 3]
movw r24,r0
clr r1
sts x15+1,r25 ; 14 *movhi/3 [length = 4]
sts x15,r24
/* epilogue start */
ret ; 24 return [length = 1]
.size cmul_15, .-cmul_15
.global ymul8_15
.type ymul8_15, @function
ymul8_15:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,lo8(15) ; 7 *movqi/2 [length = 1]
mul r18,r22 ; 8 muluqihi3 [length = 5]
movw r20,r0
mul r18,r23
add r21,r0
clr __zero_reg__
sts y15+1,r21 ; 9 *movhi/3 [length = 4]
sts y15,r20
movw r20,r24 ; 16 *movhi/1 [length = 1]
mul r18,r20 ; 11 muluqihi3 [length = 5]
movw r24,r0
mul r18,r21
add r25,r0
clr __zero_reg__
sts x15+1,r25 ; 12 *movhi/3 [length = 4]
sts x15,r24
/* epilogue start */
ret ; 19 return [length = 1]
.size ymul8_15, .-ymul8_15
.global qmul8_xy
.type qmul8_xy, @function
qmul8_xy:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
clr r25 ; 8 extendqihi2/1 [length = 3]
sbrc r24,7
com r25
mul r20,r24 ; 9 *mulhi3_enh [length = 7]
movw r18,r0
mul r20,r25
add r19,r0
mul r21,r24
add r19,r0
clr r1
sts y15+1,r19 ; 10 *movhi/3 [length = 4]
sts y15,r18
mul r22,r24 ; 11 *mulhi3_enh [length = 7]
movw r18,r0
mul r22,r25
add r19,r0
mul r23,r24
add r19,r0
clr r1
sts x15+1,r19 ; 12 *movhi/3 [length = 4]
sts x15,r18
/* epilogue start */
ret ; 18 return [length = 1]
.size qmul8_xy, .-qmul8_xy
.global uimul8_4
.type uimul8_4, @function
uimul8_4:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,lo8(5) ; 6 *movqi/2 [length = 1]
movw r20,r24 ; 23 *movhi/1 [length = 1]
mul r18,r20 ; 7 muluqihi3 [length = 5]
movw r24,r0
mul r18,r21
add r25,r0
clr __zero_reg__
/* epilogue start */
ret ; 26 return [length = 1]
.size uimul8_4, .-uimul8_4
.global mul8_m155
.type mul8_m155, @function
mul8_m155:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r18,lo8(-155) ; 7 *movhi/4 [length = 2]
ldi r19,hi8(-155)
mov r20,r24 ; 24 *movqi/1 [length = 1]
mulsu r20,r18 ; 8 mulsqihi3 [length = 5]
movw r24,r0
mul r20,r19
add r25,r0
clr __zero_reg__
/* epilogue start */
ret ; 27 return [length = 1]
.size mul8_m155, .-mul8_m155
.global mul8_m64
.type mul8_m64, @function
mul8_m64:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
ldi r25,lo8(-64) ; 23 *movqi/2 [length = 1]
muls r24,r25 ; 24 mulqihi3 [length = 3]
movw r24,r0
clr r1
/* epilogue start */
ret ; 28 return [length = 1]
.size mul8_m64, .-mul8_m64
.global mul_s8u8
.type mul_s8u8, @function
mul_s8u8:
/* prologue: function */
/* frame size = 0 */
/* stack size = 0 */
.L__stack_usage = 0
mov r18,r24 ; 25 *movqi/1 [length = 1]
mulsu r18,r22 ; 9 *mulsu [length = 3]
movw r24,r0
clr __zero_reg__
/* epilogue start */
ret ; 28 return [length = 1]
.size mul_s8u8, .-mul_s8u8
.comm x15,2,1
.comm y15,2,1
.ident "GCC: (GNU) 4.7.0 20110711 (experimental)"
.global __do_clear_bss