This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[PATCH: RL78] Optimize libgcc routines using clrw and clrb

From: Kaushik Phatak <Kaushik dot Phatak at kpit dot com>
To: "'gcc-patches at gcc dot gnu dot org'" <gcc-patches at gcc dot gnu dot org>
Cc: "nick clifton (nickc at redhat dot com)" <nickc at redhat dot com>
Date: Fri, 5 Feb 2016 12:56:52 +0000
Subject: [PATCH: RL78] Optimize libgcc routines using clrw and clrb
Authentication-results: sourceware.org; auth=none
Authentication-results: gcc.gnu.org; dkim=none (message not signed) header.d=none;gcc.gnu.org; dmarc=none action=none header.from=kpit.com;

Hi,
Please find below a simple patch which optimizes the loading of immediate value by using the clrw or clrb 
instruction in case a 0x00 is being loaded into the register.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes for movw and mov.

There is a total of about 94 bytes code size improvement with this patch in these libgcc routines.

The following routines have improved code size,
___mulsi3   : 2 bytes
___divsi3   : 20 bytes
___modsi3   : 20 bytes
___divhi3   : 10 bytes
___modhi3   : 10 bytes
___parityqi_internal : 2 bytes
__int_cmpsf : 2 bytes
___fixsfsi  : 5 bytes
___fixunssfsi : 2 bytes
___floatsisf  : 6 bytes
_int_unpack_sf : 1 bytes
___addsf3 : 5 bytes
__rl78_int_pack_a_r8 : 2 bytes
___mulsf3  : 2 bytes
___divsf3  : 3 bytes
__gcc_bcmp :  2 bytes


I have also attached a draft version of a similar patch (rl78_libgcc_optimize_draft.patch), which goes further and 
removes movw immediate to other saddr registers and replaces them with 2 instructions, i.e.
 START_FUNC ___modhi3
        ;; r8 = 4[sp] % 6[sp]
-       movw    de, #0
+       clrw    ax
+       movw    de,ax
        mov     a, [sp+5]

This patch improves code size by 1 byte for each such substitution, however does add an extra clock cycle.

We may consider this patch in case we are purely looking for code size improvement, assuming the libraries
are built with -Os. This shows a total of 134 bytes improvement in code size.

Patch1: rl78_libgcc_optimize_clrw.patch - 94 bytes improvement in code size.
Patch2: rl78_libgcc_optimize_draft.patch - 134 bytes improvement in code size.

Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.

Best Regards,
Kaushik

p.s. Kindly ignore any disclaimers at end of this e-mail as they are auto-inserted.
Apologies for the same.

2016-02-05  Kaushik Phatak <kaushik.phatak@kpit.com>

        * config/rl78/bit-count.S: Use clrw/clrb where possible.
		* config/rl78/cmpsi2.S: Likewise.
		* config/rl78/divmodhi.S Likewise.
		* config/rl78/divmodsi.S Likewise.
		* config/rl78/fpbit-sf.S Likewise.
		* config/rl78/fpmath-sf.S Likewise.
		* config/rl78/mulsi3.S Likewise.
		
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S	(revision 3174)
+++ libgcc/config/rl78/bit-count.S	(working copy)
@@ -139,7 +139,7 @@
 	xor1	cy, a.5
 	xor1	cy, a.6
 	xor1	cy, a.7
-	movw	ax, #0
+	clrw	ax
 	bnc	$1f
 	incw	ax
 1:
@@ -190,7 +190,7 @@
 	movw	ax, sp
 	addw	ax, #4
 	movw	hl, ax
-	mov	a, #0
+	clrb	a
 1:
 	xch	a, b
 	mov	a, [hl]
@@ -207,7 +207,7 @@
 	bnz	$1b
 
 	mov	x, a
-	mov	a, #0
+	clrb	a
 	movw	r8, ax
 	ret	
 END_FUNC	___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S	(revision 3174)
+++ libgcc/config/rl78/cmpsi2.S	(working copy)
@@ -162,8 +162,8 @@
 
 	;; They differ.  Subtract *S2 from *S1 and return as the result.
 	mov	x, a
-	mov	a, #0
-	mov	r9, #0
+	clrb	a
+	clrb	r9
 	subw	ax, r8
 1:
 	movw	r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S	(revision 3174)
+++ libgcc/config/rl78/divmodhi.S	(working copy)
@@ -576,7 +576,7 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
 .endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S	(revision 3174)
+++ libgcc/config/rl78/divmodsi.S	(working copy)
@@ -952,10 +952,10 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S	(working copy)
@@ -117,7 +117,7 @@
 	call	$!__int_iszero
 	bnz	$2f
 	;; At this point, both args are zero.
-	mov	a, #0
+	clrb	a
 	ret
 
 2:
@@ -151,7 +151,7 @@
 	bc	$ybig_cmpsf	; branch if X < Y
 	bnz	$xbig_cmpsf	; branch if X > Y
 
-	mov	a, #0
+	clrb	a
 	ret
 
 xbig_cmpsf:			; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
 	movw	r10, #0x7fff
 	ret
 	;; -inf
-2:	mov	r8, #0
+2:	clrb	r8
 	mov	r10, #0x8000
 	ret
 	
@@ -302,10 +302,10 @@
 	clr1	a.7
 	call	$!__int_fixunssfsi
 
-	movw	ax, #0
+	clrw	ax
 	subw	ax, r8
 	movw	r8, ax
-	movw	ax, #0
+	clrw	ax
         sknc
         decw    ax
         subw    ax, r10
@@ -410,7 +410,7 @@
 	set1	a.7
 
 	;; Clear B:C:R12:R13
-	movw	bc, #0
+	clrw	bc
 	movw	r12, #0
 
 	;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
 
 	;; If negative convert to positive ...
 	movw 	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, bc
 	movw	bc, ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, hl
@@ -533,7 +533,7 @@
 	bnz	$1f
 	movw	ax, bc
 	cmpw	ax, #0
-	movw	ax, #0
+	clrw	ax
 	bnz	$1f
 
 	;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S	(working copy)
@@ -87,7 +87,7 @@
 	or	a, #0x80
 	mov	A_FRAC_H, a
 
-	mov	a, #0
+	clrb	a
 	mov	A_FRAC_HH, a
 
 	;; rounding-bit-shift
@@ -273,7 +273,7 @@
 	;; "zero out" b
 	movw	ax, A_EXP
 	movw	B_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	B_FRAC_L, ax
 	movw	B_FRAC_H, ax
 	br	$5f
@@ -281,7 +281,7 @@
 	;; "zero out" a
 	movw	ax, B_EXP
 	movw	A_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	A_FRAC_L, ax
 	movw	A_FRAC_H, ax
 
@@ -379,7 +379,7 @@
 	bt	a.7, $.L706
 	
 	;; subtraction was positive
-	mov	a, #0
+	clrb	a
 	mov	A_SIGN, a
 	br	$.L712
 
@@ -543,7 +543,7 @@
 	or	a, A_FRAC_H
 	or	a, A_FRAC_HH
 	bnz	$1f
-	movw	ax, #0
+	clrw	ax
 	movw	A_EXP, ax
 1:	
 	mov	a, A_FRAC_H
@@ -682,7 +682,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+4], ax
 	movw	[sp+6], ax
 	movw	[sp+12], ax
@@ -867,7 +867,7 @@
 	and	a, #0x80
 	mov	r11, a
 	movw	r8, #0
-	mov	r10, #0
+	clrb	r10
 	ret
 	
 1:	
@@ -930,7 +930,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+0], ax
 	movw	[sp+2], ax
 	movw	[sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S	(revision 3174)
+++ libgcc/config/rl78/mulsi3.S	(working copy)
@@ -148,7 +148,7 @@
 	movw	ax, bc
 
 .Lmul_hisi_top:
-	movw	bc, #0
+	clrw	bc
 
 .Lmul_hisi_loop:
 	shrw	ax, 1

Attachment: rl78_libgcc_optimize_draft.patch
Description: rl78_libgcc_optimize_draft.patch

Attachment: rl78_libgcc_optimize_clrw.patch
Description: rl78_libgcc_optimize_clrw.patch

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]