This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
ARM: lib1funcs.asm tidy up
- To: gcc-patches at gcc dot gnu dot org
- Subject: ARM: lib1funcs.asm tidy up
- From: Nick Clifton <nickc at redhat dot com>
- Date: Tue, 22 Aug 2000 12:49:09 -0700
Hi Guys,
Well as promised here is the patch I am about to apply to tidy up
the code in the ARM port's lib1funcs.asm file. This patch replaces
duplicated code sequences with assembler macros, and then defines
the code once, in the definition of the macro.
I have tested the patched file with divmod-1.c and arith-rand.c from
the GCC testsuite (thanks Jeff!) and there are no failures.
Cheers
Nick
2000-08-22 Nick Clifton <nickc@redhat.com>
* config/arm/lib1funcs.asm (ARM_DIV_MOD_BODY): New macro.
Common code for ARM divide and modulus functions.
(THUMB_DIV_MOD_BODY): New macro. Thumb equivalent of
ARM_DIV_MOD_BODY.
(FUNC_END): New macro: Common code at the end of the division
and modulo functions.
(THUMB_FUNC_START): New macro: Common code at the start of
Thumb functions.
(__divsi3, __udivsi3, __modsi3, __umodsi3): Use new macros.
Index: config/arm/lib1funcs.asm
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/arm/lib1funcs.asm,v
retrieving revision 1.13
diff -p -w -r1.13 lib1funcs.asm
*** lib1funcs.asm 2000/08/22 19:37:02 1.13
--- lib1funcs.asm 2000/08/22 19:39:03
*************** along with this program; see the file CO
*** 27,32 ****
--- 27,35 ----
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* ------------------------------------------------------------------------ */
+
+ /* We need to know what prefix to add to function names. */
+
#ifndef __USER_LABEL_PREFIX__
#error __USER_LABEL_PREFIX__ not defined
#endif
*************** Boston, MA 02111-1307, USA. */
*** 55,60 ****
--- 58,64 ----
#endif
/* Function end macros. Variants for 26 bit APCS and interworking. */
+
#ifdef __APCS_26__
# define RET movs pc, lr
# define RETc(x) mov##x##s pc, lr
*************** Ldiv0:
*** 71,76 ****
--- 75,81 ----
# define RET bx lr
# define RETc(x) bx##x lr
.macro THUMB_LDIV0
+ Ldiv0:
push { lr }
bl SYM (__div0)
mov r0, #0 @ About as wrong as it could be.
*************** Ldiv0:
*** 78,83 ****
--- 83,89 ----
bx r1
.endm
.macro ARM_LDIV0
+ Ldiv0:
str lr, [sp, #-4]!
bl SYM (__div0) __PLT__
mov r0, #0 @ About as wrong as it could be.
*************** Ldiv0:
*** 88,99 ****
--- 94,107 ----
# define RET mov pc, lr
# define RETc(x) mov##x pc, lr
.macro THUMB_LDIV0
+ Ldiv0:
push { lr }
bl SYM (__div0)
mov r0, #0 @ About as wrong as it could be.
pop { pc }
.endm
.macro ARM_LDIV0
+ Ldiv0:
str lr, [sp, #-4]!
bl SYM (__div0) __PLT__
mov r0, #0 @ About as wrong as it could be.
*************** Ldiv0:
*** 103,109 ****
--- 111,136 ----
# define RETCOND
#endif
+ .macro FUNC_END name
+ Ldiv0:
#ifdef __thumb__
+ THUMB_LDIV0
+ #else
+ ARM_LDIV0
+ #endif
+ SIZE (__\name)
+ .endm
+
+ .macro THUMB_FUNC_START name
+ .globl SYM (\name)
+ TYPE (\name)
+ .thumb_func
+ SYM (\name):
+ .endm
+
+ /* Function start macros. Variants for ARM and Thumb. */
+
+ #ifdef __thumb__
#define THUMB_FUNC .thumb_func
#define THUMB_CODE .force_thumb
#else
*************** Ldiv0:
*** 111,117 ****
#define THUMB_CODE
#endif
-
.macro FUNC_START name
.text
.globl SYM (__\name)
--- 138,143 ----
*************** Ldiv0:
*** 122,328 ****
SYM (__\name):
.endm
! .macro FUNC_END name
! Ldiv0:
! #ifdef __thumb__
! THUMB_LDIV0
! #else
! ARM_LDIV0
! #endif
! SIZE (__\name)
! .endm
- .macro THUMB_FUNC_START name
- .globl SYM (\name)
- TYPE (\name)
- .thumb_func
- SYM (\name):
- .endm
-
- /* Used for Thumb code. */
work .req r4 @ XXXX is this safe ?
-
- /* ------------------------------------------------------------------------ */
- #ifdef L_udivsi3
-
dividend .req r0
divisor .req r1
result .req r2
curbit .req r3
ip .req r12
sp .req r13
lr .req r14
pc .req r15
-
- FUNC_START udivsi3
-
- #ifdef __thumb__
-
- cmp divisor, #0
- beq Ldiv0
- mov curbit, #1
- mov result, #0
-
- push { work }
- cmp dividend, divisor
- bcc Lgot_result
-
- @ Load the constant 0x10000000 into our work register
- mov work, #1
- lsl work, #28
- Loop1:
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
- cmp divisor, work
- bcs Lbignum
- cmp divisor, dividend
- bcs Lbignum
- lsl divisor, #4
- lsl curbit, #4
- b Loop1
! Lbignum:
! @ Set work to 0x80000000
! lsl work, #3
! Loop2:
! @ For very big divisors, we must shift it a bit at a time, or
! @ we will be in danger of overflowing.
! cmp divisor, work
! bcs Loop3
! cmp divisor, dividend
! bcs Loop3
! lsl divisor, #1
! lsl curbit, #1
! b Loop2
!
! Loop3:
! @ Test for possible subtractions, and note which bits
! @ are done in the result. On the final pass, this may subtract
! @ too much from the dividend, but the result will be ok, since the
! @ "bit" will have been shifted out at the bottom.
! cmp dividend, divisor
! bcc Over1
! sub dividend, dividend, divisor
! orr result, result, curbit
! Over1:
! lsr work, divisor, #1
! cmp dividend, work
! bcc Over2
! sub dividend, dividend, work
! lsr work, curbit, #1
! orr result, work
! Over2:
! lsr work, divisor, #2
! cmp dividend, work
! bcc Over3
! sub dividend, dividend, work
! lsr work, curbit, #2
! orr result, work
! Over3:
! lsr work, divisor, #3
! cmp dividend, work
! bcc Over4
! sub dividend, dividend, work
! lsr work, curbit, #3
! orr result, work
! Over4:
! cmp dividend, #0 @ Early termination?
! beq Lgot_result
! lsr curbit, #4 @ No, any more bits to do?
! beq Lgot_result
! lsr divisor, #4
! b Loop3
! Lgot_result:
! mov r0, result
! pop { work }
! RET
!
! #else /* ARM version. */
!
! cmp divisor, #0
! beq Ldiv0
! mov curbit, #1
! mov result, #0
! cmp dividend, divisor
! bcc Lgot_result
Loop1:
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
cmp divisor, #0x10000000
! cmpcc divisor, dividend
! movcc divisor, divisor, lsl #4
! movcc curbit, curbit, lsl #4
! bcc Loop1
Lbignum:
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
cmp divisor, #0x80000000
! cmpcc divisor, dividend
! movcc divisor, divisor, lsl #1
! movcc curbit, curbit, lsl #1
! bcc Lbignum
Loop3:
! @ Test for possible subtractions, and note which bits
! @ are done in the result. On the final pass, this may subtract
! @ too much from the dividend, but the result will be ok, since the
! @ "bit" will have been shifted out at the bottom.
cmp dividend, divisor
! subcs dividend, dividend, divisor
! orrcs result, result, curbit
cmp dividend, divisor, lsr #1
! subcs dividend, dividend, divisor, lsr #1
! orrcs result, result, curbit, lsr #1
cmp dividend, divisor, lsr #2
! subcs dividend, dividend, divisor, lsr #2
! orrcs result, result, curbit, lsr #2
cmp dividend, divisor, lsr #3
! subcs dividend, dividend, divisor, lsr #3
! orrcs result, result, curbit, lsr #3
! cmp dividend, #0 @ Early termination?
! movnes curbit, curbit, lsr #4 @ No, any more bits to do?
! movne divisor, divisor, lsr #4
! bne Loop3
! Lgot_result:
! mov r0, result
! RET
! #endif /* ARM version */
! FUNC_END udivsi3
! #endif /* L_udivsi3 */
/* ------------------------------------------------------------------------ */
! #ifdef L_umodsi3
!
! dividend .req r0
! divisor .req r1
! overdone .req r2
! curbit .req r3
! ip .req r12
! sp .req r13
! lr .req r14
! pc .req r15
!
! FUNC_START umodsi3
!
! #ifdef __thumb__
!
! cmp divisor, #0
! beq Ldiv0
! mov curbit, #1
! cmp dividend, divisor
! bcs Over1
! RET
!
! Over1:
! @ Load the constant 0x10000000 into our work register
! push { work }
mov work, #1
lsl work, #28
Loop1:
--- 148,260 ----
SYM (__\name):
.endm
! /* Register aliases. */
work .req r4 @ XXXX is this safe ?
dividend .req r0
divisor .req r1
+ overdone .req r2
result .req r2
curbit .req r3
ip .req r12
sp .req r13
lr .req r14
pc .req r15
! /* ------------------------------------------------------------------------ */
! /* Bodies of the divsion and modulo routines. */
! /* ------------------------------------------------------------------------ */
! .macro ARM_DIV_MOD_BODY modulo
Loop1:
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
cmp divisor, #0x10000000
! cmpLO divisor, dividend
! movLO divisor, divisor, lsl #4
! movLO curbit, curbit, lsl #4
! bLO Loop1
Lbignum:
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
cmp divisor, #0x80000000
! cmpLO divisor, dividend
! movLO divisor, divisor, lsl #1
! movLO curbit, curbit, lsl #1
! bLO Lbignum
Loop3:
! @ Test for possible subtractions. On the final pass, this may
! @ subtract too much from the dividend ...
!
! .if \modulo
! @ ... so keep track of which subtractions are done in OVERDONE.
! @ We can fix them up afterwards.
! mov overdone, #0
cmp dividend, divisor
! subHS dividend, dividend, divisor
cmp dividend, divisor, lsr #1
! subHS dividend, dividend, divisor, lsr #1
! orrHS overdone, overdone, curbit, ror #1
cmp dividend, divisor, lsr #2
! subHS dividend, dividend, divisor, lsr #2
! orrHS overdone, overdone, curbit, ror #2
cmp dividend, divisor, lsr #3
! subHS dividend, dividend, divisor, lsr #3
! orrHS overdone, overdone, curbit, ror #3
! mov ip, curbit
! .else
! @ ... so keep track of which subtractions are done in RESULT.
! @ The result will be ok, since the "bit" will have been
! @ shifted out at the bottom.
! cmp dividend, divisor
! subHS dividend, dividend, divisor
! orrHS result, result, curbit
! cmp dividend, divisor, lsr #1
! subHS dividend, dividend, divisor, lsr #1
! orrHS result, result, curbit, lsr #1
! cmp dividend, divisor, lsr #2
! subHS dividend, dividend, divisor, lsr #2
! orrHS result, result, curbit, lsr #2
! cmp dividend, divisor, lsr #3
! subHS dividend, dividend, divisor, lsr #3
! orrHS result, result, curbit, lsr #3
! .endif
! cmp dividend, #0 @ Early termination?
! movNEs curbit, curbit, lsr #4 @ No, any more bits to do?
! movNE divisor, divisor, lsr #4
! bNE Loop3
! .if \modulo
! Lfixup_dividend:
! @ Any subtractions that we should not have done will be recorded in
! @ the top three bits of OVERDONE. Exactly which were not needed
! @ are governed by the position of the bit, stored in IP.
! ands overdone, overdone, #0xe0000000
! @ If we terminated early, because dividend became zero, then the
! @ bit in ip will not be in the bottom nibble, and we should not
! @ perform the additions below. We must test for this though
! @ (rather relying upon the TSTs to prevent the additions) since
! @ the bit in ip could be in the top two bits which might then match
! @ with one of the smaller RORs.
! tstNE ip, #0x7
! bEQ Lgot_result
! tst overdone, ip, ror #3
! addNE dividend, dividend, divisor, lsr #3
! tst overdone, ip, ror #2
! addNE dividend, dividend, divisor, lsr #2
! tst overdone, ip, ror #1
! addNE dividend, dividend, divisor, lsr #1
! .endif
! Lgot_result:
! .endm
/* ------------------------------------------------------------------------ */
! .macro THUMB_DIV_MOD_BODY modulo
! @ Load the constant 0x10000000 into our work register.
mov work, #1
lsl work, #28
Loop1:
*************** Loop1:
*** 331,339 ****
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
cmp divisor, work
! bcs Lbignum
cmp divisor, dividend
! bcs Lbignum
lsl divisor, #4
lsl curbit, #4
b Loop1
--- 263,271 ----
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
cmp divisor, work
! bHS Lbignum
cmp divisor, dividend
! bHS Lbignum
lsl divisor, #4
lsl curbit, #4
b Loop1
*************** Loop2:
*** 344,411 ****
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
cmp divisor, work
! bcs Loop3
cmp divisor, dividend
! bcs Loop3
lsl divisor, #1
lsl curbit, #1
b Loop2
Loop3:
! @ Test for possible subtractions. On the final pass, this may
! @ subtract too much from the dividend, so keep track of which
! @ subtractions are done, we can fix them up afterwards...
mov overdone, #0
cmp dividend, divisor
! bcc Over2
sub dividend, dividend, divisor
! Over2:
lsr work, divisor, #1
cmp dividend, work
! bcc Over3
sub dividend, dividend, work
mov ip, curbit
mov work, #1
ror curbit, work
orr overdone, curbit
mov curbit, ip
! Over3:
lsr work, divisor, #2
cmp dividend, work
! bcc Over4
sub dividend, dividend, work
mov ip, curbit
mov work, #2
ror curbit, work
orr overdone, curbit
mov curbit, ip
! Over4:
lsr work, divisor, #3
cmp dividend, work
! bcc Over5
sub dividend, dividend, work
mov ip, curbit
mov work, #3
ror curbit, work
orr overdone, curbit
mov curbit, ip
! Over5:
mov ip, curbit
cmp dividend, #0 @ Early termination?
! beq Over6
lsr curbit, #4 @ No, any more bits to do?
! beq Over6
lsr divisor, #4
b Loop3
! Over6:
@ Any subtractions that we should not have done will be recorded in
@ the top three bits of "overdone". Exactly which were not needed
@ are governed by the position of the bit, stored in ip.
mov work, #0xe
lsl work, #28
and overdone, work
! bne Over7
! pop { work }
! RET @ No fixups needed
@ If we terminated early, because dividend became zero, then the
@ bit in ip will not be in the bottom nibble, and we should not
--- 276,376 ----
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
cmp divisor, work
! bHS Loop3
cmp divisor, dividend
! bHS Loop3
lsl divisor, #1
lsl curbit, #1
b Loop2
Loop3:
! @ Test for possible subtractions ...
! .if \modulo
! @ ... On the final pass, this may subtract too much from the dividend,
! @ so keep track of which subtractions are done, we can fix them up
! @ afterwards.
mov overdone, #0
cmp dividend, divisor
! bLO Lover1
sub dividend, dividend, divisor
! Lover1:
lsr work, divisor, #1
cmp dividend, work
! bLO Lover2
sub dividend, dividend, work
mov ip, curbit
mov work, #1
ror curbit, work
orr overdone, curbit
mov curbit, ip
! Lover2:
lsr work, divisor, #2
cmp dividend, work
! bLO Lover3
sub dividend, dividend, work
mov ip, curbit
mov work, #2
ror curbit, work
orr overdone, curbit
mov curbit, ip
! Lover3:
lsr work, divisor, #3
cmp dividend, work
! bLO Lover4
sub dividend, dividend, work
mov ip, curbit
mov work, #3
ror curbit, work
orr overdone, curbit
mov curbit, ip
! Lover4:
mov ip, curbit
+ .else
+ @ ... and note which bits are done in the result. On the final pass,
+ @ this may subtract too much from the dividend, but the result will be ok,
+ @ since the "bit" will have been shifted out at the bottom.
+ cmp dividend, divisor
+ bLO Lover1
+ sub dividend, dividend, divisor
+ orr result, result, curbit
+ Lover1:
+ lsr work, divisor, #1
+ cmp dividend, work
+ bLO Lover2
+ sub dividend, dividend, work
+ lsr work, curbit, #1
+ orr result, work
+ Lover2:
+ lsr work, divisor, #2
+ cmp dividend, work
+ bLO Lover3
+ sub dividend, dividend, work
+ lsr work, curbit, #2
+ orr result, work
+ Lover3:
+ lsr work, divisor, #3
+ cmp dividend, work
+ bLO Lover4
+ sub dividend, dividend, work
+ lsr work, curbit, #3
+ orr result, work
+ Lover4:
+ .endif
+
cmp dividend, #0 @ Early termination?
! bEQ Lover5
lsr curbit, #4 @ No, any more bits to do?
! bEQ Lover5
lsr divisor, #4
b Loop3
! Lover5:
! .if \modulo
@ Any subtractions that we should not have done will be recorded in
@ the top three bits of "overdone". Exactly which were not needed
@ are governed by the position of the bit, stored in ip.
mov work, #0xe
lsl work, #28
and overdone, work
! bEQ Lgot_result
@ If we terminated early, because dividend became zero, then the
@ bit in ip will not be in the bottom nibble, and we should not
*************** Over6:
*** 416,520 ****
mov curbit, ip
mov work, #0x7
tst curbit, work
! beq Over10
- Over7:
mov curbit, ip
mov work, #3
ror curbit, work
tst overdone, curbit
! beq Over8
lsr work, divisor, #3
! add dividend, dividend, work
! Over8:
mov curbit, ip
mov work, #2
ror curbit, work
tst overdone, curbit
! beq Over9
lsr work, divisor, #2
! add dividend, dividend, work
! Over9:
mov curbit, ip
mov work, #1
ror curbit, work
tst overdone, curbit
! beq Over10
lsr work, divisor, #1
! add dividend, dividend, work
! Over10:
pop { work }
RET
#else /* ARM version. */
cmp divisor, #0
! beq Ldiv0
mov curbit, #1
cmp dividend, divisor
! RETc(cc)
! Loop1:
! @ Unless the divisor is very big, shift it up in multiples of
! @ four bits, since this is the amount of unwinding in the main
! @ division loop. Continue shifting until the divisor is
! @ larger than the dividend.
! cmp divisor, #0x10000000
! cmpcc divisor, dividend
! movcc divisor, divisor, lsl #4
! movcc curbit, curbit, lsl #4
! bcc Loop1
! Lbignum:
! @ For very big divisors, we must shift it a bit at a time, or
! @ we will be in danger of overflowing.
! cmp divisor, #0x80000000
! cmpcc divisor, dividend
! movcc divisor, divisor, lsl #1
! movcc curbit, curbit, lsl #1
! bcc Lbignum
! Loop3:
! @ Test for possible subtractions. On the final pass, this may
! @ subtract too much from the dividend, so keep track of which
! @ subtractions are done, we can fix them up afterwards...
! mov overdone, #0
cmp dividend, divisor
! subcs dividend, dividend, divisor
! cmp dividend, divisor, lsr #1
! subcs dividend, dividend, divisor, lsr #1
! orrcs overdone, overdone, curbit, ror #1
! cmp dividend, divisor, lsr #2
! subcs dividend, dividend, divisor, lsr #2
! orrcs overdone, overdone, curbit, ror #2
! cmp dividend, divisor, lsr #3
! subcs dividend, dividend, divisor, lsr #3
! orrcs overdone, overdone, curbit, ror #3
! mov ip, curbit
! cmp dividend, #0 @ Early termination?
! movnes curbit, curbit, lsr #4 @ No, any more bits to do?
! movne divisor, divisor, lsr #4
! bne Loop3
! @ Any subtractions that we should not have done will be recorded in
! @ the top three bits of "overdone". Exactly which were not needed
! @ are governed by the position of the bit, stored in ip.
! ands overdone, overdone, #0xe0000000
! @ If we terminated early, because dividend became zero, then the
! @ bit in ip will not be in the bottom nibble, and we should not
! @ perform the additions below. We must test for this though
! @ (rather relying upon the TSTs to prevent the additions) since
! @ the bit in ip could be in the top two bits which might then match
! @ with one of the smaller RORs.
! tstNE ip, #0x7
! RETc(eq) @ No fixups needed
! tst overdone, ip, ror #3
! addne dividend, dividend, divisor, lsr #3
! tst overdone, ip, ror #2
! addne dividend, dividend, divisor, lsr #2
! tst overdone, ip, ror #1
! addne dividend, dividend, divisor, lsr #1
RET
#endif /* ARM version. */
FUNC_END umodsi3
--- 381,493 ----
mov curbit, ip
mov work, #0x7
tst curbit, work
! bEQ Lgot_result
mov curbit, ip
mov work, #3
ror curbit, work
tst overdone, curbit
! bEQ Lover6
lsr work, divisor, #3
! add dividend, work
! Lover6:
mov curbit, ip
mov work, #2
ror curbit, work
tst overdone, curbit
! bEQ Lover7
lsr work, divisor, #2
! add dividend, work
! Lover7:
mov curbit, ip
mov work, #1
ror curbit, work
tst overdone, curbit
! bEQ Lgot_result
lsr work, divisor, #1
! add dividend, work
! .endif
! Lgot_result:
! .endm
! /* ------------------------------------------------------------------------ */
! /* Start of the Real Functions */
! /* ------------------------------------------------------------------------ */
! #ifdef L_udivsi3
!
! FUNC_START udivsi3
!
! #ifdef __thumb__
!
! cmp divisor, #0
! bEQ Ldiv0
! mov curbit, #1
! mov result, #0
!
! push { work }
! cmp dividend, divisor
! bLO Lgot_result
!
! THUMB_DIV_MOD_BODY 0
!
! mov r0, result
pop { work }
RET
#else /* ARM version. */
cmp divisor, #0
! bEQ Ldiv0
mov curbit, #1
+ mov result, #0
cmp dividend, divisor
! bLO Lgot_result
! ARM_DIV_MOD_BODY 0
! mov r0, result
! RET
!
! #endif /* ARM version */
!
! FUNC_END udivsi3
!
! #endif /* L_udivsi3 */
! /* ------------------------------------------------------------------------ */
! #ifdef L_umodsi3
!
! FUNC_START umodsi3
!
! #ifdef __thumb__
!
! cmp divisor, #0
! bEQ Ldiv0
! mov curbit, #1
cmp dividend, divisor
! bHS Lover10
! RET
! Lover10:
! push { work }
!
! THUMB_DIV_MOD_BODY 1
!
! pop { work }
RET
+ #else /* ARM version. */
+
+ cmp divisor, #0
+ bEQ Ldiv0
+ cmp divisor, #1
+ cmpNE dividend, divisor
+ movEQ dividend, #0
+ RETc(LO)
+ mov curbit, #1
+
+ ARM_DIV_MOD_BODY 1
+
+ RET
+
#endif /* ARM version. */
FUNC_END umodsi3
*************** Loop3:
*** 523,542 ****
/* ------------------------------------------------------------------------ */
#ifdef L_divsi3
- dividend .req r0
- divisor .req r1
- result .req r2
- curbit .req r3
- ip .req r12
- sp .req r13
- lr .req r14
- pc .req r15
-
FUNC_START divsi3
#ifdef __thumb__
cmp divisor, #0
! beq Ldiv0
push { work }
mov work, dividend
--- 496,506 ----
/* ------------------------------------------------------------------------ */
#ifdef L_divsi3
FUNC_START divsi3
#ifdef __thumb__
cmp divisor, #0
! bEQ Ldiv0
push { work }
mov work, dividend
*************** pc .req r15
*** 545,633 ****
mov curbit, #1
mov result, #0
cmp divisor, #0
! bpl Over1
neg divisor, divisor @ Loops below use unsigned.
! Over1:
cmp dividend, #0
! bpl Over2
neg dividend, dividend
! Over2:
cmp dividend, divisor
! bcc Lgot_result
!
! mov work, #1
! lsl work, #28
! Loop1:
! @ Unless the divisor is very big, shift it up in multiples of
! @ four bits, since this is the amount of unwinding in the main
! @ division loop. Continue shifting until the divisor is
! @ larger than the dividend.
! cmp divisor, work
! Bcs Lbignum
! cmp divisor, dividend
! Bcs Lbignum
! lsl divisor, #4
! lsl curbit, #4
! b Loop1
!
! Lbignum:
! @ For very big divisors, we must shift it a bit at a time, or
! @ we will be in danger of overflowing.
! lsl work, #3
! Loop2:
! cmp divisor, work
! Bcs Loop3
! cmp divisor, dividend
! Bcs Loop3
! lsl divisor, #1
! lsl curbit, #1
! b Loop2
! Loop3:
! @ Test for possible subtractions, and note which bits
! @ are done in the result. On the final pass, this may subtract
! @ too much from the dividend, but the result will be ok, since the
! @ "bit" will have been shifted out at the bottom.
! cmp dividend, divisor
! Bcc Over3
! sub dividend, dividend, divisor
! orr result, result, curbit
! Over3:
! lsr work, divisor, #1
! cmp dividend, work
! Bcc Over4
! sub dividend, dividend, work
! lsr work, curbit, #1
! orr result, work
! Over4:
! lsr work, divisor, #2
! cmp dividend, work
! Bcc Over5
! sub dividend, dividend, work
! lsr work, curbit, #2
! orr result, result, work
! Over5:
! lsr work, divisor, #3
! cmp dividend, work
! Bcc Over6
! sub dividend, dividend, work
! lsr work, curbit, #3
! orr result, result, work
! Over6:
! cmp dividend, #0 @ Early termination?
! Beq Lgot_result
! lsr curbit, #4 @ No, any more bits to do?
! Beq Lgot_result
! lsr divisor, #4
! b Loop3
- Lgot_result:
mov r0, result
mov work, ip
cmp work, #0
! Bpl Over7
neg r0, r0
! Over7:
pop { work }
RET
--- 509,532 ----
mov curbit, #1
mov result, #0
cmp divisor, #0
! bPL Lover10
neg divisor, divisor @ Loops below use unsigned.
! Lover10:
cmp dividend, #0
! bPL Lover11
neg dividend, dividend
! Lover11:
cmp dividend, divisor
! bLO Lgot_result
! THUMB_DIV_MOD_BODY 0
mov r0, result
mov work, ip
cmp work, #0
! bPL Lover12
neg r0, r0
! Lover12:
pop { work }
RET
*************** Over7:
*** 637,694 ****
mov curbit, #1
mov result, #0
cmp divisor, #0
! rsbmi divisor, divisor, #0 @ Loops below use unsigned.
! beq Ldiv0
cmp dividend, #0
! rsbmi dividend, dividend, #0
cmp dividend, divisor
! bcc Lgot_result
! Loop1:
! @ Unless the divisor is very big, shift it up in multiples of
! @ four bits, since this is the amount of unwinding in the main
! @ division loop. Continue shifting until the divisor is
! @ larger than the dividend.
! cmp divisor, #0x10000000
! cmpcc divisor, dividend
! movcc divisor, divisor, lsl #4
! movcc curbit, curbit, lsl #4
! bcc Loop1
- Lbignum:
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
- cmp divisor, #0x80000000
- cmpcc divisor, dividend
- movcc divisor, divisor, lsl #1
- movcc curbit, curbit, lsl #1
- bcc Lbignum
-
- Loop3:
- @ Test for possible subtractions, and note which bits
- @ are done in the result. On the final pass, this may subtract
- @ too much from the dividend, but the result will be ok, since the
- @ "bit" will have been shifted out at the bottom.
- cmp dividend, divisor
- subcs dividend, dividend, divisor
- orrcs result, result, curbit
- cmp dividend, divisor, lsr #1
- subcs dividend, dividend, divisor, lsr #1
- orrcs result, result, curbit, lsr #1
- cmp dividend, divisor, lsr #2
- subcs dividend, dividend, divisor, lsr #2
- orrcs result, result, curbit, lsr #2
- cmp dividend, divisor, lsr #3
- subcs dividend, dividend, divisor, lsr #3
- orrcs result, result, curbit, lsr #3
- cmp dividend, #0 @ Early termination?
- movnes curbit, curbit, lsr #4 @ No, any more bits to do?
- movne divisor, divisor, lsr #4
- bne Loop3
- Lgot_result:
mov r0, result
cmp ip, #0
! rsbmi r0, r0, #0
RET
#endif /* ARM version */
--- 536,553 ----
mov curbit, #1
mov result, #0
cmp divisor, #0
! rsbMI divisor, divisor, #0 @ Loops below use unsigned.
! bEQ Ldiv0
cmp dividend, #0
! rsbMI dividend, dividend, #0
cmp dividend, divisor
! bLO Lgot_result
! ARM_DIV_MOD_BODY 0
mov r0, result
cmp ip, #0
! rsbMI r0, r0, #0
RET
#endif /* ARM version */
*************** Lgot_result:
*** 699,940 ****
/* ------------------------------------------------------------------------ */
#ifdef L_modsi3
- dividend .req r0
- divisor .req r1
- overdone .req r2
- curbit .req r3
- ip .req r12
- sp .req r13
- lr .req r14
- pc .req r15
-
FUNC_START modsi3
#ifdef __thumb__
mov curbit, #1
cmp divisor, #0
! beq Ldiv0
! Bpl Over1
neg divisor, divisor @ Loops below use unsigned.
! Over1:
push { work }
@ Need to save the sign of the dividend, unfortunately, we need
! @ ip later on. Must do this after saving the original value of
@ the work register, because we will pop this value off first.
push { dividend }
cmp dividend, #0
! Bpl Over2
neg dividend, dividend
! Over2:
cmp dividend, divisor
! bcc Lgot_result
! mov work, #1
! lsl work, #28
! Loop1:
! @ Unless the divisor is very big, shift it up in multiples of
! @ four bits, since this is the amount of unwinding in the main
! @ division loop. Continue shifting until the divisor is
! @ larger than the dividend.
! cmp divisor, work
! bcs Lbignum
! cmp divisor, dividend
! bcs Lbignum
! lsl divisor, #4
! lsl curbit, #4
! b Loop1
! Lbignum:
! @ Set work to 0x80000000
! lsl work, #3
! Loop2:
! @ For very big divisors, we must shift it a bit at a time, or
! @ we will be in danger of overflowing.
! cmp divisor, work
! bcs Loop3
! cmp divisor, dividend
! bcs Loop3
! lsl divisor, #1
! lsl curbit, #1
! b Loop2
- Loop3:
- @ Test for possible subtractions. On the final pass, this may
- @ subtract too much from the dividend, so keep track of which
- @ subtractions are done, we can fix them up afterwards...
- mov overdone, #0
- cmp dividend, divisor
- bcc Over3
- sub dividend, dividend, divisor
- Over3:
- lsr work, divisor, #1
- cmp dividend, work
- bcc Over4
- sub dividend, dividend, work
- mov ip, curbit
- mov work, #1
- ror curbit, work
- orr overdone, curbit
- mov curbit, ip
- Over4:
- lsr work, divisor, #2
- cmp dividend, work
- bcc Over5
- sub dividend, dividend, work
- mov ip, curbit
- mov work, #2
- ror curbit, work
- orr overdone, curbit
- mov curbit, ip
- Over5:
- lsr work, divisor, #3
- cmp dividend, work
- bcc Over6
- sub dividend, dividend, work
- mov ip, curbit
- mov work, #3
- ror curbit, work
- orr overdone, curbit
- mov curbit, ip
- Over6:
- mov ip, curbit
- cmp dividend, #0 @ Early termination?
- beq Over7
- lsr curbit, #4 @ No, any more bits to do?
- beq Over7
- lsr divisor, #4
- b Loop3
-
- Over7:
- @ Any subtractions that we should not have done will be recorded in
- @ the top three bits of "overdone". Exactly which were not needed
- @ are governed by the position of the bit, stored in ip.
- mov work, #0xe
- lsl work, #28
- and overdone, work
- beq Lgot_result
-
- @ If we terminated early, because dividend became zero, then the
- @ bit in ip will not be in the bottom nibble, and we should not
- @ perform the additions below. We must test for this though
- @ (rather relying upon the TSTs to prevent the additions) since
- @ the bit in ip could be in the top two bits which might then match
- @ with one of the smaller RORs.
- mov curbit, ip
- mov work, #0x7
- tst curbit, work
- beq Lgot_result
-
- mov curbit, ip
- mov work, #3
- ror curbit, work
- tst overdone, curbit
- beq Over8
- lsr work, divisor, #3
- add dividend, dividend, work
- Over8:
- mov curbit, ip
- mov work, #2
- ror curbit, work
- tst overdone, curbit
- beq Over9
- lsr work, divisor, #2
- add dividend, dividend, work
- Over9:
- mov curbit, ip
- mov work, #1
- ror curbit, work
- tst overdone, curbit
- beq Lgot_result
- lsr work, divisor, #1
- add dividend, dividend, work
- Lgot_result:
pop { work }
cmp work, #0
! bpl Over10
neg dividend, dividend
! Over10:
pop { work }
RET
#else /* ARM version. */
- mov curbit, #1
cmp divisor, #0
! rsbmi divisor, divisor, #0 @ Loops below use unsigned.
! beq Ldiv0
@ Need to save the sign of the dividend, unfortunately, we need
@ ip later on; this is faster than pushing lr and using that.
str dividend, [sp, #-4]!
! cmp dividend, #0
! rsbmi dividend, dividend, #0
! cmp dividend, divisor
! bcc Lgot_result
!
! Loop1:
! @ Unless the divisor is very big, shift it up in multiples of
! @ four bits, since this is the amount of unwinding in the main
! @ division loop. Continue shifting until the divisor is
! @ larger than the dividend.
! cmp divisor, #0x10000000
! cmpcc divisor, dividend
! movcc divisor, divisor, lsl #4
! movcc curbit, curbit, lsl #4
! bcc Loop1
!
! Lbignum:
! @ For very big divisors, we must shift it a bit at a time, or
! @ we will be in danger of overflowing.
! cmp divisor, #0x80000000
! cmpcc divisor, dividend
! movcc divisor, divisor, lsl #1
! movcc curbit, curbit, lsl #1
! bcc Lbignum
! Loop3:
! @ Test for possible subtractions. On the final pass, this may
! @ subtract too much from the dividend, so keep track of which
! @ subtractions are done, we can fix them up afterwards...
! mov overdone, #0
! cmp dividend, divisor
! subcs dividend, dividend, divisor
! cmp dividend, divisor, lsr #1
! subcs dividend, dividend, divisor, lsr #1
! orrcs overdone, overdone, curbit, ror #1
! cmp dividend, divisor, lsr #2
! subcs dividend, dividend, divisor, lsr #2
! orrcs overdone, overdone, curbit, ror #2
! cmp dividend, divisor, lsr #3
! subcs dividend, dividend, divisor, lsr #3
! orrcs overdone, overdone, curbit, ror #3
! mov ip, curbit
! cmp dividend, #0 @ Early termination?
! movnes curbit, curbit, lsr #4 @ No, any more bits to do?
! movne divisor, divisor, lsr #4
! bne Loop3
- @ Any subtractions that we should not have done will be recorded in
- @ the top three bits of "overdone". Exactly which were not needed
- @ are governed by the position of the bit, stored in ip.
- ands overdone, overdone, #0xe0000000
- @ If we terminated early, because dividend became zero, then the
- @ bit in ip will not be in the bottom nibble, and we should not
- @ perform the additions below. We must test for this though
- @ (rather relying upon the TSTs to prevent the additions) since
- @ the bit in ip could be in the top two bits which might then match
- @ with one of the smaller RORs.
- tstNE ip, #0x7
- beq Lgot_result
- tst overdone, ip, ror #3
- addne dividend, dividend, divisor, lsr #3
- tst overdone, ip, ror #2
- addne dividend, dividend, divisor, lsr #2
- tst overdone, ip, ror #1
- addne dividend, dividend, divisor, lsr #1
- Lgot_result:
ldr ip, [sp], #4
cmp ip, #0
! rsbmi dividend, dividend, #0
RET
#endif /* ARM version */
--- 558,614 ----
/* ------------------------------------------------------------------------ */
#ifdef L_modsi3
FUNC_START modsi3
#ifdef __thumb__
mov curbit, #1
cmp divisor, #0
! bEQ Ldiv0
! bPL Lover10
neg divisor, divisor @ Loops below use unsigned.
! Lover10:
push { work }
@ Need to save the sign of the dividend, unfortunately, we need
! @ work later on. Must do this after saving the original value of
@ the work register, because we will pop this value off first.
push { dividend }
cmp dividend, #0
! bPL Lover11
neg dividend, dividend
! Lover11:
cmp dividend, divisor
! bLO Lgot_result
! THUMB_DIV_MOD_BODY 1
pop { work }
cmp work, #0
! bPL Lover12
neg dividend, dividend
! Lover12:
pop { work }
RET
#else /* ARM version. */
cmp divisor, #0
! rsbMI divisor, divisor, #0 @ Loops below use unsigned.
! bEQ Ldiv0
@ Need to save the sign of the dividend, unfortunately, we need
@ ip later on; this is faster than pushing lr and using that.
str dividend, [sp, #-4]!
! cmp dividend, #0 @ Test dividend against zero
! rsbMI dividend, dividend, #0 @ If negative make positive
! cmp dividend, divisor @ else if zero return zero
! bLO Lgot_result @ if smaller return dividend
! mov curbit, #1
! ARM_DIV_MOD_BODY 1
ldr ip, [sp], #4
cmp ip, #0
! rsbMI dividend, dividend, #0
RET
#endif /* ARM version */
*************** _arm_return:
*** 1105,1108 ****
SIZE (_interwork_call_via_lr)
#endif /* L_interwork_call_via_rX */
-
--- 779,781 ----