This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch,AVR]: Fix PR50910: int/2 leads to libgcc call


This is a fix for optimization flaw when dividing int by 2.

There is really no need for a library call. Costs of [U]DIV/[U]MOD are adjusted
to take into account the costs of CONST_INT operands that must be loaded for
division by means of libgcc call.

There are some new combiner patterns suffixed .lt0 that so adjustment
frequently seen when division-by-const in lowered to arithmetic in order to
avoid more expensive libcall.

Moreover, there are two patterns for adding sign-extended QI to HI. These
patterns are shorter, faster and have lower register pressure than explicitly
sign-extending the QI before adding it.  Example code is:

int add (int a, char b) { return a + b; }
int sub (int a, char b) { return a - b; }

add:
	add r24,r22	 ;  13	*addhi3.sign_extend1	[length = 4]
	adc r25,__zero_reg__
	sbrc r22,7
	dec r25
	ret

sub:
	sub r24,r22	 ;  13	*subhi3.sign_extend2	[length = 4]
	sbc r25,__zero_reg__
	sbrc r22,7
	inc r25
	ret

The reg_overlap_mentioned case is just for pathological code like, e.g.
   a + (char) a
so that the expected size is 4 instructions.

Since beginning of time, BRANCH_COST was set to 0 so that some optimization
passes make code happily jumping around. The patch introduces a new command
line option for that; mainly because I don't know the rationale behind setting
BRANCH_COST to 0.

Regression-tested.

Ok for trunk?

Johann

	* config/avr/avr.opt (-mbranch-cost=): New option.
	* config/avr/avr.h (BRANCH_COST): Define to avr_branch_cost.
	* config/avr/avr.c (avr_rtx_costs_1): Adjust [U]DIV/[U]MOD costs.
	* config/avr/avr.md (*addqi3.lt0, *addhi3.lt0, *addsi3.lt0): New insns.
	(*addhi3_zero_extend1): Remov % in constraint of operand 1.
	(*addhi3.sign_extend1, *subhi3.sign_extend2): New insns.
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 180654)
+++ config/avr/avr.md	(working copy)
@@ -776,27 +776,36 @@ (define_expand "addhi3"
 
 
 (define_insn "*addhi3_zero_extend"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(plus:HI (zero_extend:HI
-		  (match_operand:QI 1 "register_operand" "r"))
-		 (match_operand:HI 2 "register_operand" "0")))]
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:HI 2 "register_operand"                 "0")))]
   ""
-  "add %A0,%1
-	adc %B0,__zero_reg__"
+  "add %A0,%1\;adc %B0,__zero_reg__"
   [(set_attr "length" "2")
    (set_attr "cc" "set_n")])
 
 (define_insn "*addhi3_zero_extend1"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-	(plus:HI (match_operand:HI 1 "register_operand" "%0")
-		 (zero_extend:HI
-		  (match_operand:QI 2 "register_operand" "r"))))]
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (plus:HI (match_operand:HI 1 "register_operand"                 "0")
+                 (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
   ""
-  "add %A0,%2
-	adc %B0,__zero_reg__"
+  "add %A0,%2\;adc %B0,__zero_reg__"
   [(set_attr "length" "2")
    (set_attr "cc" "set_n")])
 
+(define_insn "*addhi3.sign_extend1"
+  [(set (match_operand:HI 0 "register_operand"                         "=r")
+        (plus:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "r"))
+                 (match_operand:HI 2 "register_operand"                 "0")))]
+  ""
+  {
+    return reg_overlap_mentioned_p (operands[0], operands[1])
+      ? "mov __tmp_reg__,%1\;add %A0,%1\;adc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;dec %B0"
+      : "add %A0,%1\;adc %B0,__zero_reg__\;sbrc %1,7\;dec %B0";
+  }
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
 (define_insn "*addhi3_sp"
   [(set (match_operand:HI 1 "stack_register_operand"           "=q")
         (plus:HI (match_operand:HI 2 "stack_register_operand"   "q")
@@ -956,6 +965,19 @@ (define_insn "*subhi3_zero_extend1"
   [(set_attr "length" "2")
    (set_attr "cc" "set_czn")])
 
+(define_insn "*subhi3.sign_extend2"
+  [(set (match_operand:HI 0 "register_operand"                          "=r")
+        (minus:HI (match_operand:HI 1 "register_operand"                 "0")
+                  (sign_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
+  ""
+  {
+    return reg_overlap_mentioned_p (operands[0], operands[2])
+      ? "mov __tmp_reg__,%2\;sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;inc %B0"
+      : "sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc %2,7\;inc %B0";
+  }
+  [(set_attr "length" "5")
+   (set_attr "cc" "clobber")])
+
 (define_insn "subsi3"
   [(set (match_operand:SI 0 "register_operand"          "=r")
         (minus:SI (match_operand:SI 1 "register_operand" "0")
@@ -1054,6 +1076,41 @@ (define_insn "*subqi3.ashiftrt7"
   [(set_attr "length" "2")
    (set_attr "cc" "clobber")])
 
+(define_insn "*addqi3.lt0"
+  [(set (match_operand:QI 0 "register_operand"                 "=r")
+        (plus:QI (lt:QI (match_operand:QI 1 "register_operand"  "r")
+                        (const_int 0))
+                 (match_operand:QI 2 "register_operand"         "0")))]
+  ""
+  "sbrc %1,7\;inc %0"
+  [(set_attr "length" "2")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*addhi3.lt0"
+  [(set (match_operand:HI 0 "register_operand"                   "=w,r")
+        (plus:HI (lt:HI (match_operand:QI 1 "register_operand"    "r,r")
+                        (const_int 0))
+                 (match_operand:HI 2 "register_operand"           "0,0")))
+   (clobber (match_scratch:QI 3                                  "=X,&1"))]
+  ""
+  "@
+	sbrc %1,7\;adiw %0,1
+	lsl %1\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__"
+  [(set_attr "length" "2,3")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*addsi3.lt0"
+  [(set (match_operand:SI 0 "register_operand"                       "=r")
+        (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "r")
+                              (const_int 31))
+                 (match_operand:SI 2 "register_operand"               "0")))]
+  ""
+  "mov __tmp_reg__,%D1\;lsl __tmp_reg__
+	adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__"
+  [(set_attr "length" "6")
+   (set_attr "cc" "clobber")])
+  
+
 ;; "umulqihi3"
 ;; "mulqihi3"
 (define_insn "<extend_u>mulqihi3"
Index: config/avr/avr.opt
===================================================================
--- config/avr/avr.opt	(revision 180654)
+++ config/avr/avr.opt	(working copy)
@@ -40,6 +40,10 @@ mno-interrupts
 Target Report RejectNegative Mask(NO_INTERRUPTS)
 Change the stack pointer without disabling interrupts
 
+mbranch-cost=
+Target Report Joined RejectNegative UInteger Var(avr_branch_cost) Init(0)
+Set the branch costs for conditional branch instructions.  Reasonable values are small, non-negative integers.  The default branch cost is 0.
+
 morder1
 Target Report Undocumented Mask(ORDER_1)
 
@@ -69,3 +73,7 @@ Accumulate outgoing function arguments a
 mstrict-X
 Target Report Var(avr_strict_X) Init(0)
 When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register.  Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.
+
+mbranch-cost=
+Target Report RejectNegative Joined UInteger Var(avr_branch_cost) Init(0)
+Set the cost of a branch instruction.  Default value is 0.
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 180654)
+++ config/avr/avr.c	(working copy)
@@ -6477,11 +6477,16 @@ avr_rtx_costs_1 (rtx x, int codearg, int
     case UDIV:
     case UMOD:
       if (!speed)
-	*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
+        *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
       else
-	return false;
+        *total = COSTS_N_INSNS (15 * GET_MODE_SIZE (mode));
       *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
-      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+      /* For div/mod with const-int divisor we have at least the cost of
+         loading the divisor. */
+      if (CONST_INT_P (XEXP (x, 1)))
+        *total += COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      /* Add some overall penaly for clobbering and moving around registers */
+      *total += COSTS_N_INSNS (2);
       return true;
 
     case ROTATE:
Index: config/avr/avr.h
===================================================================
--- config/avr/avr.h	(revision 180654)
+++ config/avr/avr.h	(working copy)
@@ -385,7 +385,7 @@ typedef struct avr_args {
       }                                                                 \
   } while (0)
 
-#define BRANCH_COST(speed_p, predictable_p) 0
+#define BRANCH_COST(speed_p, predictable_p) avr_branch_cost
 
 #define SLOW_BYTE_ACCESS 0
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]