This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch,AVR]: Fix PR50910: int/2 leads to libgcc call
- From: Georg-Johann Lay <avr at gjlay dot de>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Denis Chertykov <chertykov at gmail dot com>, Eric Weddington <eric dot weddington at atmel dot com>, Anatoly Sokolov <aesok at post dot ru>
- Date: Mon, 31 Oct 2011 19:28:53 +0100
- Subject: [Patch,AVR]: Fix PR50910: int/2 leads to libgcc call
This is a fix for optimization flaw when dividing int by 2.
There is really no need for a library call. Costs of [U]DIV/[U]MOD are adjusted
to take into account the costs of CONST_INT operands that must be loaded for
division by means of libgcc call.
There are some new combiner patterns suffixed .lt0 that so adjustment
frequently seen when division-by-const in lowered to arithmetic in order to
avoid more expensive libcall.
Moreover, there are two patterns for adding sign-extended QI to HI. These
patterns are shorter, faster and have lower register pressure than explicitly
sign-extending the QI before adding it. Example code is:
int add (int a, char b) { return a + b; }
int sub (int a, char b) { return a - b; }
add:
add r24,r22 ; 13 *addhi3.sign_extend1 [length = 4]
adc r25,__zero_reg__
sbrc r22,7
dec r25
ret
sub:
sub r24,r22 ; 13 *subhi3.sign_extend2 [length = 4]
sbc r25,__zero_reg__
sbrc r22,7
inc r25
ret
The reg_overlap_mentioned case is just for pathological code like, e.g.
a + (char) a
so that the expected size is 4 instructions.
Since beginning of time, BRANCH_COST was set to 0 so that some optimization
passes make code happily jumping around. The patch introduces a new command
line option for that; mainly because I don't know the rationale behind setting
BRANCH_COST to 0.
Regression-tested.
Ok for trunk?
Johann
* config/avr/avr.opt (-mbranch-cost=): New option.
* config/avr/avr.h (BRANCH_COST): Define to avr_branch_cost.
* config/avr/avr.c (avr_rtx_costs_1): Adjust [U]DIV/[U]MOD costs.
* config/avr/avr.md (*addqi3.lt0, *addhi3.lt0, *addsi3.lt0): New insns.
(*addhi3_zero_extend1): Remov % in constraint of operand 1.
(*addhi3.sign_extend1, *subhi3.sign_extend2): New insns.
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md (revision 180654)
+++ config/avr/avr.md (working copy)
@@ -776,27 +776,36 @@ (define_expand "addhi3"
(define_insn "*addhi3_zero_extend"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (plus:HI (zero_extend:HI
- (match_operand:QI 1 "register_operand" "r"))
- (match_operand:HI 2 "register_operand" "0")))]
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+ (match_operand:HI 2 "register_operand" "0")))]
""
- "add %A0,%1
- adc %B0,__zero_reg__"
+ "add %A0,%1\;adc %B0,__zero_reg__"
[(set_attr "length" "2")
(set_attr "cc" "set_n")])
(define_insn "*addhi3_zero_extend1"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (plus:HI (match_operand:HI 1 "register_operand" "%0")
- (zero_extend:HI
- (match_operand:QI 2 "register_operand" "r"))))]
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (plus:HI (match_operand:HI 1 "register_operand" "0")
+ (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
""
- "add %A0,%2
- adc %B0,__zero_reg__"
+ "add %A0,%2\;adc %B0,__zero_reg__"
[(set_attr "length" "2")
(set_attr "cc" "set_n")])
+(define_insn "*addhi3.sign_extend1"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (plus:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "r"))
+ (match_operand:HI 2 "register_operand" "0")))]
+ ""
+ {
+ return reg_overlap_mentioned_p (operands[0], operands[1])
+ ? "mov __tmp_reg__,%1\;add %A0,%1\;adc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;dec %B0"
+ : "add %A0,%1\;adc %B0,__zero_reg__\;sbrc %1,7\;dec %B0";
+ }
+ [(set_attr "length" "5")
+ (set_attr "cc" "clobber")])
+
(define_insn "*addhi3_sp"
[(set (match_operand:HI 1 "stack_register_operand" "=q")
(plus:HI (match_operand:HI 2 "stack_register_operand" "q")
@@ -956,6 +965,19 @@ (define_insn "*subhi3_zero_extend1"
[(set_attr "length" "2")
(set_attr "cc" "set_czn")])
+(define_insn "*subhi3.sign_extend2"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (minus:HI (match_operand:HI 1 "register_operand" "0")
+ (sign_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+ ""
+ {
+ return reg_overlap_mentioned_p (operands[0], operands[2])
+ ? "mov __tmp_reg__,%2\;sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;inc %B0"
+ : "sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc %2,7\;inc %B0";
+ }
+ [(set_attr "length" "5")
+ (set_attr "cc" "clobber")])
+
(define_insn "subsi3"
[(set (match_operand:SI 0 "register_operand" "=r")
(minus:SI (match_operand:SI 1 "register_operand" "0")
@@ -1054,6 +1076,41 @@ (define_insn "*subqi3.ashiftrt7"
[(set_attr "length" "2")
(set_attr "cc" "clobber")])
+(define_insn "*addqi3.lt0"
+ [(set (match_operand:QI 0 "register_operand" "=r")
+ (plus:QI (lt:QI (match_operand:QI 1 "register_operand" "r")
+ (const_int 0))
+ (match_operand:QI 2 "register_operand" "0")))]
+ ""
+ "sbrc %1,7\;inc %0"
+ [(set_attr "length" "2")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*addhi3.lt0"
+ [(set (match_operand:HI 0 "register_operand" "=w,r")
+ (plus:HI (lt:HI (match_operand:QI 1 "register_operand" "r,r")
+ (const_int 0))
+ (match_operand:HI 2 "register_operand" "0,0")))
+ (clobber (match_scratch:QI 3 "=X,&1"))]
+ ""
+ "@
+ sbrc %1,7\;adiw %0,1
+ lsl %1\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__"
+ [(set_attr "length" "2,3")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*addsi3.lt0"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int 31))
+ (match_operand:SI 2 "register_operand" "0")))]
+ ""
+ "mov __tmp_reg__,%D1\;lsl __tmp_reg__
+ adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__"
+ [(set_attr "length" "6")
+ (set_attr "cc" "clobber")])
+
+
;; "umulqihi3"
;; "mulqihi3"
(define_insn "<extend_u>mulqihi3"
Index: config/avr/avr.opt
===================================================================
--- config/avr/avr.opt (revision 180654)
+++ config/avr/avr.opt (working copy)
@@ -40,6 +40,10 @@ mno-interrupts
Target Report RejectNegative Mask(NO_INTERRUPTS)
Change the stack pointer without disabling interrupts
+mbranch-cost=
+Target Report Joined RejectNegative UInteger Var(avr_branch_cost) Init(0)
+Set the branch costs for conditional branch instructions. Reasonable values are small, non-negative integers. The default branch cost is 0.
+
morder1
Target Report Undocumented Mask(ORDER_1)
@@ -69,3 +73,7 @@ Accumulate outgoing function arguments a
mstrict-X
Target Report Var(avr_strict_X) Init(0)
When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register. Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.
+
+mbranch-cost=
+Target Report RejectNegative Joined UInteger Var(avr_branch_cost) Init(0)
+Set the cost of a branch instruction. Default value is 0.
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c (revision 180654)
+++ config/avr/avr.c (working copy)
@@ -6477,11 +6477,16 @@ avr_rtx_costs_1 (rtx x, int codearg, int
case UDIV:
case UMOD:
if (!speed)
- *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+ *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
else
- return false;
+ *total = COSTS_N_INSNS (15 * GET_MODE_SIZE (mode));
*total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
- *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ /* For div/mod with const-int divisor we have at least the cost of
+ loading the divisor. */
+ if (CONST_INT_P (XEXP (x, 1)))
+ *total += COSTS_N_INSNS (GET_MODE_SIZE (mode));
+ /* Add some overall penaly for clobbering and moving around registers */
+ *total += COSTS_N_INSNS (2);
return true;
case ROTATE:
Index: config/avr/avr.h
===================================================================
--- config/avr/avr.h (revision 180654)
+++ config/avr/avr.h (working copy)
@@ -385,7 +385,7 @@ typedef struct avr_args {
} \
} while (0)
-#define BRANCH_COST(speed_p, predictable_p) 0
+#define BRANCH_COST(speed_p, predictable_p) avr_branch_cost
#define SLOW_BYTE_ACCESS 0