This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi, Please find below a simple patch which optimizes the loading of immediate value by using the clrw or clrb instruction in case a 0x00 is being loaded into the register. The patch replaces movw/mov instruction with the smaller clrw/clrb instruction. The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes for movw and mov. There is a total of about 94 bytes code size improvement with this patch in these libgcc routines. The following routines have improved code size, ___mulsi3 : 2 bytes ___divsi3 : 20 bytes ___modsi3 : 20 bytes ___divhi3 : 10 bytes ___modhi3 : 10 bytes ___parityqi_internal : 2 bytes __int_cmpsf : 2 bytes ___fixsfsi : 5 bytes ___fixunssfsi : 2 bytes ___floatsisf : 6 bytes _int_unpack_sf : 1 bytes ___addsf3 : 5 bytes __rl78_int_pack_a_r8 : 2 bytes ___mulsf3 : 2 bytes ___divsf3 : 3 bytes __gcc_bcmp : 2 bytes I have also attached a draft version of a similar patch (rl78_libgcc_optimize_draft.patch), which goes further and removes movw immediate to other saddr registers and replaces them with 2 instructions, i.e. START_FUNC ___modhi3 ;; r8 = 4[sp] % 6[sp] - movw de, #0 + clrw ax + movw de,ax mov a, [sp+5] This patch improves code size by 1 byte for each such substitution, however does add an extra clock cycle. We may consider this patch in case we are purely looking for code size improvement, assuming the libraries are built with -Os. This shows a total of 134 bytes improvement in code size. Patch1: rl78_libgcc_optimize_clrw.patch - 94 bytes improvement in code size. Patch2: rl78_libgcc_optimize_draft.patch - 134 bytes improvement in code size. Kindly review this patch and let me know what you think. This is regression tested for rl78 -msim. Best Regards, Kaushik p.s. Kindly ignore any disclaimers at end of this e-mail as they are auto-inserted. Apologies for the same. 2016-02-05 Kaushik Phatak <kaushik.phatak@kpit.com> * config/rl78/bit-count.S: Use clrw/clrb where possible. * config/rl78/cmpsi2.S: Likewise. * config/rl78/divmodhi.S Likewise. * config/rl78/divmodsi.S Likewise. * config/rl78/fpbit-sf.S Likewise. * config/rl78/fpmath-sf.S Likewise. * config/rl78/mulsi3.S Likewise. Index: libgcc/config/rl78/bit-count.S =================================================================== --- libgcc/config/rl78/bit-count.S (revision 3174) +++ libgcc/config/rl78/bit-count.S (working copy) @@ -139,7 +139,7 @@ xor1 cy, a.5 xor1 cy, a.6 xor1 cy, a.7 - movw ax, #0 + clrw ax bnc $1f incw ax 1: @@ -190,7 +190,7 @@ movw ax, sp addw ax, #4 movw hl, ax - mov a, #0 + clrb a 1: xch a, b mov a, [hl] @@ -207,7 +207,7 @@ bnz $1b mov x, a - mov a, #0 + clrb a movw r8, ax ret END_FUNC ___popcountqi_internal Index: libgcc/config/rl78/cmpsi2.S =================================================================== --- libgcc/config/rl78/cmpsi2.S (revision 3174) +++ libgcc/config/rl78/cmpsi2.S (working copy) @@ -162,8 +162,8 @@ ;; They differ. Subtract *S2 from *S1 and return as the result. mov x, a - mov a, #0 - mov r9, #0 + clrb a + clrb r9 subw ax, r8 1: movw r8, ax Index: libgcc/config/rl78/divmodhi.S =================================================================== --- libgcc/config/rl78/divmodhi.S (revision 3174) +++ libgcc/config/rl78/divmodhi.S (working copy) @@ -576,7 +576,7 @@ .macro NEG_AX movw hl, ax - movw ax, #0 + clrw ax subw ax, [hl] movw [hl], ax .endm Index: libgcc/config/rl78/divmodsi.S =================================================================== --- libgcc/config/rl78/divmodsi.S (revision 3174) +++ libgcc/config/rl78/divmodsi.S (working copy) @@ -952,10 +952,10 @@ .macro NEG_AX movw hl, ax - movw ax, #0 + clrw ax subw ax, [hl] movw [hl], ax - movw ax, #0 + clrw ax sknc decw ax subw ax, [hl+2] Index: libgcc/config/rl78/fpbit-sf.S =================================================================== --- libgcc/config/rl78/fpbit-sf.S (revision 3174) +++ libgcc/config/rl78/fpbit-sf.S (working copy) @@ -117,7 +117,7 @@ call $!__int_iszero bnz $2f ;; At this point, both args are zero. - mov a, #0 + clrb a ret 2: @@ -151,7 +151,7 @@ bc $ybig_cmpsf ; branch if X < Y bnz $xbig_cmpsf ; branch if X > Y - mov a, #0 + clrb a ret xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg @@ -285,7 +285,7 @@ movw r10, #0x7fff ret ;; -inf -2: mov r8, #0 +2: clrb r8 mov r10, #0x8000 ret @@ -302,10 +302,10 @@ clr1 a.7 call $!__int_fixunssfsi - movw ax, #0 + clrw ax subw ax, r8 movw r8, ax - movw ax, #0 + clrw ax sknc decw ax subw ax, r10 @@ -410,7 +410,7 @@ set1 a.7 ;; Clear B:C:R12:R13 - movw bc, #0 + clrw bc movw r12, #0 ;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13), @@ -482,10 +482,10 @@ ;; If negative convert to positive ... movw hl, ax - movw ax, #0 + clrw ax subw ax, bc movw bc, ax - movw ax, #0 + clrw ax sknc decw ax subw ax, hl @@ -533,7 +533,7 @@ bnz $1f movw ax, bc cmpw ax, #0 - movw ax, #0 + clrw ax bnz $1f ;; Return 0.0 Index: libgcc/config/rl78/fpmath-sf.S =================================================================== --- libgcc/config/rl78/fpmath-sf.S (revision 3174) +++ libgcc/config/rl78/fpmath-sf.S (working copy) @@ -87,7 +87,7 @@ or a, #0x80 mov A_FRAC_H, a - mov a, #0 + clrb a mov A_FRAC_HH, a ;; rounding-bit-shift @@ -273,7 +273,7 @@ ;; "zero out" b movw ax, A_EXP movw B_EXP, ax - movw ax, #0 + clrw ax movw B_FRAC_L, ax movw B_FRAC_H, ax br $5f @@ -281,7 +281,7 @@ ;; "zero out" a movw ax, B_EXP movw A_EXP, ax - movw ax, #0 + clrw ax movw A_FRAC_L, ax movw A_FRAC_H, ax @@ -379,7 +379,7 @@ bt a.7, $.L706 ;; subtraction was positive - mov a, #0 + clrb a mov A_SIGN, a br $.L712 @@ -543,7 +543,7 @@ or a, A_FRAC_H or a, A_FRAC_HH bnz $1f - movw ax, #0 + clrw ax movw A_EXP, ax 1: mov a, A_FRAC_H @@ -682,7 +682,7 @@ movw ax, B_FRAC_H movw [sp+10], ax - movw ax, #0 + clrw ax movw [sp+4], ax movw [sp+6], ax movw [sp+12], ax @@ -867,7 +867,7 @@ and a, #0x80 mov r11, a movw r8, #0 - mov r10, #0 + clrb r10 ret 1: @@ -930,7 +930,7 @@ movw ax, B_FRAC_H movw [sp+10], ax - movw ax, #0 + clrw ax movw [sp+0], ax movw [sp+2], ax movw [sp+12], ax Index: libgcc/config/rl78/mulsi3.S =================================================================== --- libgcc/config/rl78/mulsi3.S (revision 3174) +++ libgcc/config/rl78/mulsi3.S (working copy) @@ -148,7 +148,7 @@ movw ax, bc .Lmul_hisi_top: - movw bc, #0 + clrw bc .Lmul_hisi_loop: shrw ax, 1
Attachment:
rl78_libgcc_optimize_draft.patch
Description: rl78_libgcc_optimize_draft.patch
Attachment:
rl78_libgcc_optimize_clrw.patch
Description: rl78_libgcc_optimize_clrw.patch
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |