[PATCH: RL78] Optimize libgcc routines using clrw and clrb
Kaushik Phatak
Kaushik.Phatak@kpit.com
Tue Apr 5 08:08:00 GMT 2016
Hi,
Please find below a patch that optimizes libgcc routines for the RL78 target.
This is similar to my earlier patch submitted here,
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00415.html
The patch optimizes the loading of immediate value in the case of 0x00, by using the clrw or clrb instruction.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes for movw and mov.
Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.
Best Regards,
Kaushik
p.s. Kindly ignore any disclaimers at end of this e-mail as they are auto-inserted.
Apologies for the same.
2016-04-06 Kaushik Phatak <kaushik.phatak@kpit.com>
* config/rl78/bit-count.S: Use clrw/clrb where possible.
* config/rl78/cmpsi2.S: Likewise.
* config/rl78/divmodhi.S Likewise.
* config/rl78/divmodsi.S Likewise.
* config/rl78/fpbit-sf.S Likewise.
* config/rl78/fpmath-sf.S Likewise.
* config/rl78/mulsi3.S Likewise.
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S (revision 3174)
+++ libgcc/config/rl78/bit-count.S (working copy)
@@ -139,7 +139,7 @@
xor1 cy, a.5
xor1 cy, a.6
xor1 cy, a.7
- movw ax, #0
+ clrw ax
bnc $1f
incw ax
1:
@@ -190,7 +190,7 @@
movw ax, sp
addw ax, #4
movw hl, ax
- mov a, #0
+ clrb a
1:
xch a, b
mov a, [hl]
@@ -207,7 +207,7 @@
bnz $1b
mov x, a
- mov a, #0
+ clrb a
movw r8, ax
ret
END_FUNC ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -162,8 +162,8 @@
;; They differ. Subtract *S2 from *S1 and return as the result.
mov x, a
- mov a, #0
- mov r9, #0
+ clrb a
+ clrb r9
subw ax, r8
1:
movw r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S (revision 3174)
+++ libgcc/config/rl78/divmodhi.S (working copy)
@@ -576,7 +576,7 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
.endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S (revision 3174)
+++ libgcc/config/rl78/divmodsi.S (working copy)
@@ -952,10 +952,10 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S (working copy)
@@ -117,7 +117,7 @@
call $!__int_iszero
bnz $2f
;; At this point, both args are zero.
- mov a, #0
+ clrb a
ret
2:
@@ -151,7 +151,7 @@
bc $ybig_cmpsf ; branch if X < Y
bnz $xbig_cmpsf ; branch if X > Y
- mov a, #0
+ clrb a
ret
xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
movw r10, #0x7fff
ret
;; -inf
-2: mov r8, #0
+2: clrb r8
mov r10, #0x8000
ret
@@ -302,10 +302,10 @@
clr1 a.7
call $!__int_fixunssfsi
- movw ax, #0
+ clrw ax
subw ax, r8
movw r8, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, r10
@@ -410,7 +410,7 @@
set1 a.7
;; Clear B:C:R12:R13
- movw bc, #0
+ clrw bc
movw r12, #0
;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
;; If negative convert to positive ...
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, bc
movw bc, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, hl
@@ -533,7 +533,7 @@
bnz $1f
movw ax, bc
cmpw ax, #0
- movw ax, #0
+ clrw ax
bnz $1f
;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S (revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S (working copy)
@@ -87,7 +87,7 @@
or a, #0x80
mov A_FRAC_H, a
- mov a, #0
+ clrb a
mov A_FRAC_HH, a
;; rounding-bit-shift
@@ -273,7 +273,7 @@
;; "zero out" b
movw ax, A_EXP
movw B_EXP, ax
- movw ax, #0
+ clrw ax
movw B_FRAC_L, ax
movw B_FRAC_H, ax
br $5f
@@ -281,7 +281,7 @@
;; "zero out" a
movw ax, B_EXP
movw A_EXP, ax
- movw ax, #0
+ clrw ax
movw A_FRAC_L, ax
movw A_FRAC_H, ax
@@ -379,7 +379,7 @@
bt a.7, $.L706
;; subtraction was positive
- mov a, #0
+ clrb a
mov A_SIGN, a
br $.L712
@@ -543,7 +543,7 @@
or a, A_FRAC_H
or a, A_FRAC_HH
bnz $1f
- movw ax, #0
+ clrw ax
movw A_EXP, ax
1:
mov a, A_FRAC_H
@@ -682,7 +682,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+4], ax
movw [sp+6], ax
movw [sp+12], ax
@@ -867,7 +867,7 @@
and a, #0x80
mov r11, a
movw r8, #0
- mov r10, #0
+ clrb r10
ret
1:
@@ -930,7 +930,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+0], ax
movw [sp+2], ax
movw [sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S (revision 3174)
+++ libgcc/config/rl78/mulsi3.S (working copy)
@@ -148,7 +148,7 @@
movw ax, bc
.Lmul_hisi_top:
- movw bc, #0
+ clrw bc
.Lmul_hisi_loop:
shrw ax, 1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: rl78_libgcc_optimize_clrw.patch
Type: application/octet-stream
Size: 4440 bytes
Desc: rl78_libgcc_optimize_clrw.patch
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20160405/674a99a9/attachment.obj>
More information about the Gcc-patches
mailing list