This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
ARM: lib1funcs.asm tidy up

To: gcc-patches at gcc dot gnu dot org
Subject: ARM: lib1funcs.asm tidy up
From: Nick Clifton <nickc at redhat dot com>
Date: Tue, 22 Aug 2000 12:49:09 -0700
Hi Guys,

  Well as promised here is the patch I am about to apply to tidy up
  the code in the ARM port's lib1funcs.asm file.  This patch replaces
  duplicated code sequences with assembler macros, and then defines
  the code once, in the definition of the macro.

  I have tested the patched file with divmod-1.c and arith-rand.c from
  the GCC testsuite (thanks Jeff!) and there are no failures.

Cheers
	Nick


2000-08-22  Nick Clifton  <nickc@redhat.com>

	* config/arm/lib1funcs.asm (ARM_DIV_MOD_BODY): New macro.
	Common code for ARM divide and modulus functions.
	(THUMB_DIV_MOD_BODY): New macro. Thumb equivalent of
	ARM_DIV_MOD_BODY.
	(FUNC_END): New macro: Common code at the end of the division
	and modulo functions.
	(THUMB_FUNC_START): New macro:  Common code at the start of
	Thumb functions.
	(__divsi3, __udivsi3, __modsi3, __umodsi3): Use new macros.
	
Index: config/arm/lib1funcs.asm
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/arm/lib1funcs.asm,v
retrieving revision 1.13
diff -p -w -r1.13 lib1funcs.asm
*** lib1funcs.asm	2000/08/22 19:37:02	1.13
--- lib1funcs.asm	2000/08/22 19:39:03
*************** along with this program; see the file CO
*** 27,32 ****
--- 27,35 ----
  the Free Software Foundation, 59 Temple Place - Suite 330,
  Boston, MA 02111-1307, USA.  */
  /* ------------------------------------------------------------------------ */
+ 
+ /* We need to know what prefix to add to function names.  */
+ 
  #ifndef __USER_LABEL_PREFIX__
  #error  __USER_LABEL_PREFIX__ not defined
  #endif
*************** Boston, MA 02111-1307, USA.  */
*** 55,60 ****
--- 58,64 ----
  #endif
  
  /* Function end macros.  Variants for 26 bit APCS and interworking.  */
+ 
  #ifdef __APCS_26__
  # define RET		movs	pc, lr
  # define RETc(x)	mov##x##s	pc, lr
*************** Ldiv0:
*** 71,76 ****
--- 75,81 ----
  #  define RET		bx	lr
  #  define RETc(x)	bx##x	lr
  .macro THUMB_LDIV0
+ Ldiv0:
  	push	{ lr }
  	bl	SYM (__div0)
  	mov	r0, #0			@ About as wrong as it could be.
*************** Ldiv0:
*** 78,83 ****
--- 83,89 ----
  	bx	r1
  .endm
  .macro ARM_LDIV0
+ Ldiv0:
  	str	lr, [sp, #-4]!
  	bl	SYM (__div0) __PLT__
  	mov	r0, #0			@ About as wrong as it could be.
*************** Ldiv0:
*** 88,99 ****
--- 94,107 ----
  #  define RET		mov	pc, lr
  #  define RETc(x)	mov##x	pc, lr
  .macro THUMB_LDIV0
+ Ldiv0:
  	push	{ lr }
  	bl	SYM (__div0)
  	mov	r0, #0			@ About as wrong as it could be.
  	pop	{ pc }
  .endm
  .macro ARM_LDIV0
+ Ldiv0:
  	str	lr, [sp, #-4]!
  	bl	SYM (__div0) __PLT__
  	mov	r0, #0			@ About as wrong as it could be.
*************** Ldiv0:
*** 103,109 ****
--- 111,136 ----
  # define RETCOND
  #endif
  
+ .macro FUNC_END name
+ Ldiv0:
  #ifdef __thumb__
+ 	THUMB_LDIV0
+ #else
+ 	ARM_LDIV0
+ #endif
+ 	SIZE (__\name)	
+ .endm
+ 
+ .macro THUMB_FUNC_START name
+ 	.globl	SYM (\name)
+ 	TYPE	(\name)
+ 	.thumb_func
+ SYM (\name):
+ .endm
+ 
+ /* Function start macros.  Variants for ARM and Thumb.  */
+ 
+ #ifdef __thumb__
  #define THUMB_FUNC .thumb_func
  #define THUMB_CODE .force_thumb
  #else
*************** Ldiv0:
*** 111,117 ****
  #define THUMB_CODE
  #endif
  	
- 	
  .macro FUNC_START name
  	.text
  	.globl SYM (__\name)
--- 138,143 ----
*************** Ldiv0:
*** 122,328 ****
  SYM (__\name):
  .endm
  
! .macro FUNC_END name
! Ldiv0:
! #ifdef __thumb__
! 	THUMB_LDIV0
! #else
! 	ARM_LDIV0
! #endif
! 	SIZE (__\name)	
! .endm
  
- .macro THUMB_FUNC_START name
- 	.globl	SYM (\name)
- 	TYPE	(\name)
- 	.thumb_func
- SYM (\name):
- .endm
- 		
- /* Used for Thumb code.  */	
  work		.req	r4	@ XXXX is this safe ?
- 
- /* ------------------------------------------------------------------------ */
- #ifdef L_udivsi3
- 
  dividend	.req	r0
  divisor		.req	r1
  result		.req	r2
  curbit		.req	r3
  ip		.req	r12
  sp		.req	r13
  lr		.req	r14
  pc		.req	r15
- 	
- 	FUNC_START udivsi3
- 
- #ifdef __thumb__
- 
- 	cmp	divisor, #0
- 	beq	Ldiv0
- 	mov	curbit, #1
- 	mov	result, #0
- 	
- 	push	{ work }
- 	cmp	dividend, divisor
- 	bcc	Lgot_result
- 
- 	@ Load the constant 0x10000000 into our work register
- 	mov	work, #1
- 	lsl	work, #28
- Loop1:
- 	@ Unless the divisor is very big, shift it up in multiples of
- 	@ four bits, since this is the amount of unwinding in the main
- 	@ division loop.  Continue shifting until the divisor is 
- 	@ larger than the dividend.
- 	cmp	divisor, work
- 	bcs     Lbignum
- 	cmp	divisor, dividend
- 	bcs     Lbignum
- 	lsl	divisor, #4
- 	lsl	curbit,  #4
- 	b	Loop1
  
! Lbignum:
! 	@ Set work to 0x80000000
! 	lsl	work, #3
! Loop2:		
! 	@ For very big divisors, we must shift it a bit at a time, or
! 	@ we will be in danger of overflowing.
! 	cmp	divisor, work
! 	bcs	Loop3
! 	cmp	divisor, dividend
! 	bcs	Loop3
! 	lsl	divisor, #1
! 	lsl	curbit,  #1
! 	b	Loop2
! 
! Loop3:
! 	@ Test for possible subtractions, and note which bits
! 	@ are done in the result.  On the final pass, this may subtract
! 	@ too much from the dividend, but the result will be ok, since the
! 	@ "bit" will have been shifted out at the bottom.
! 	cmp	dividend, divisor
! 	bcc     Over1
! 	sub	dividend, dividend, divisor
! 	orr	result, result, curbit
! Over1:	
! 	lsr	work, divisor, #1
! 	cmp	dividend, work
! 	bcc	Over2
! 	sub	dividend, dividend, work
! 	lsr	work, curbit, #1
! 	orr	result, work
! Over2:	
! 	lsr	work, divisor, #2
! 	cmp	dividend, work
! 	bcc	Over3
! 	sub	dividend, dividend, work
! 	lsr	work, curbit, #2
! 	orr	result, work
! Over3:	
! 	lsr	work, divisor, #3
! 	cmp	dividend, work
! 	bcc	Over4
! 	sub	dividend, dividend, work
! 	lsr	work, curbit, #3
! 	orr	result, work
! Over4:	
! 	cmp	dividend, #0			@ Early termination?
! 	beq	Lgot_result
! 	lsr	curbit,  #4			@ No, any more bits to do?
! 	beq	Lgot_result
! 	lsr	divisor, #4
! 	b	Loop3
! Lgot_result:
! 	mov	r0, result
! 	pop	{ work }
! 	RET
! 
! #else /* ARM version.  */
! 	
! 	cmp	divisor, #0
! 	beq	Ldiv0
! 	mov	curbit, #1
! 	mov	result, #0
! 	cmp	dividend, divisor
! 	bcc	Lgot_result
  Loop1:
  	@ Unless the divisor is very big, shift it up in multiples of
  	@ four bits, since this is the amount of unwinding in the main
  	@ division loop.  Continue shifting until the divisor is 
  	@ larger than the dividend.
  	cmp	divisor, #0x10000000
! 	cmpcc	divisor, dividend
! 	movcc	divisor, divisor, lsl #4
! 	movcc	curbit, curbit, lsl #4
! 	bcc	Loop1
  
  Lbignum:
  	@ For very big divisors, we must shift it a bit at a time, or
  	@ we will be in danger of overflowing.
  	cmp	divisor, #0x80000000
! 	cmpcc	divisor, dividend
! 	movcc	divisor, divisor, lsl #1
! 	movcc	curbit, curbit, lsl #1
! 	bcc	Lbignum
  
  Loop3:
! 	@ Test for possible subtractions, and note which bits
! 	@ are done in the result.  On the final pass, this may subtract
! 	@ too much from the dividend, but the result will be ok, since the
! 	@ "bit" will have been shifted out at the bottom.
  	cmp	dividend, divisor
! 	subcs	dividend, dividend, divisor
! 	orrcs	result, result, curbit
  	cmp	dividend, divisor, lsr #1
! 	subcs	dividend, dividend, divisor, lsr #1
! 	orrcs	result, result, curbit, lsr #1
  	cmp	dividend, divisor, lsr #2
! 	subcs	dividend, dividend, divisor, lsr #2
! 	orrcs	result, result, curbit, lsr #2
  	cmp	dividend, divisor, lsr #3
! 	subcs	dividend, dividend, divisor, lsr #3
! 	orrcs	result, result, curbit, lsr #3
! 	cmp	dividend, #0			@ Early termination?
! 	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
! 	movne	divisor, divisor, lsr #4
! 	bne	Loop3
! Lgot_result:
! 	mov	r0, result
! 	RET	
  
! #endif /* ARM version */
  
! 	FUNC_END udivsi3
  
! #endif /* L_udivsi3 */
  /* ------------------------------------------------------------------------ */
! #ifdef L_umodsi3
! 
! dividend	.req	r0
! divisor		.req	r1
! overdone	.req	r2
! curbit		.req	r3
! ip		.req	r12
! sp		.req	r13
! lr		.req	r14
! pc		.req	r15
! 	
! 	FUNC_START umodsi3
! 
! #ifdef __thumb__
! 
! 	cmp	divisor, #0
! 	beq	Ldiv0
! 	mov	curbit, #1
! 	cmp	dividend, divisor
! 	bcs	Over1
! 	RET	
! 
! Over1:	
! 	@ Load the constant 0x10000000 into our work register
! 	push	{ work }
  	mov	work, #1
  	lsl	work, #28
  Loop1:
--- 148,260 ----
  SYM (__\name):
  .endm
  		
! /* Register aliases.  */
  
  work		.req	r4	@ XXXX is this safe ?
  dividend	.req	r0
  divisor		.req	r1
+ overdone	.req	r2
  result		.req	r2
  curbit		.req	r3
  ip		.req	r12
  sp		.req	r13
  lr		.req	r14
  pc		.req	r15
  
! /* ------------------------------------------------------------------------ */
! /*		Bodies of the divsion and modulo routines.		    */
! /* ------------------------------------------------------------------------ */	
! .macro ARM_DIV_MOD_BODY modulo
  Loop1:
  	@ Unless the divisor is very big, shift it up in multiples of
  	@ four bits, since this is the amount of unwinding in the main
  	@ division loop.  Continue shifting until the divisor is 
  	@ larger than the dividend.
  	cmp	divisor, #0x10000000
! 	cmpLO	divisor, dividend
! 	movLO	divisor, divisor, lsl #4
! 	movLO	curbit,  curbit,  lsl #4
! 	bLO	Loop1
  
  Lbignum:
  	@ For very big divisors, we must shift it a bit at a time, or
  	@ we will be in danger of overflowing.
  	cmp	divisor, #0x80000000
! 	cmpLO	divisor, dividend
! 	movLO	divisor, divisor, lsl #1
! 	movLO	curbit,  curbit,  lsl #1
! 	bLO	Lbignum
  
  Loop3:
! 	@ Test for possible subtractions.  On the final pass, this may 
! 	@ subtract too much from the dividend ...
! 	
!   .if \modulo
! 	@ ... so keep track of which subtractions are done in OVERDONE.
! 	@ We can fix them up afterwards.
! 	mov	overdone, #0
  	cmp	dividend, divisor
! 	subHS	dividend, dividend, divisor
  	cmp	dividend, divisor,  lsr #1
! 	subHS	dividend, dividend, divisor, lsr #1
! 	orrHS	overdone, overdone, curbit,  ror #1
  	cmp	dividend, divisor,  lsr #2
! 	subHS	dividend, dividend, divisor, lsr #2
! 	orrHS	overdone, overdone, curbit,  ror #2
  	cmp	dividend, divisor,  lsr #3
! 	subHS	dividend, dividend, divisor, lsr #3
! 	orrHS	overdone, overdone, curbit,  ror #3
! 	mov	ip,       curbit
!   .else
! 	@ ... so keep track of which subtractions are done in RESULT.
! 	@ The result will be ok, since the "bit" will have been 
! 	@ shifted out at the bottom.
! 	cmp	dividend, divisor
! 	subHS	dividend, dividend, divisor
! 	orrHS	result,   result,   curbit
! 	cmp	dividend, divisor,  lsr #1
! 	subHS	dividend, dividend, divisor, lsr #1
! 	orrHS	result,   result,   curbit,  lsr #1
! 	cmp	dividend, divisor,  lsr #2
! 	subHS	dividend, dividend, divisor, lsr #2
! 	orrHS	result,   result,   curbit,  lsr #2
! 	cmp	dividend, divisor,  lsr #3
! 	subHS	dividend, dividend, divisor, lsr #3
! 	orrHS	result,   result,   curbit,  lsr #3
!   .endif
  
! 	cmp	dividend, #0			@ Early termination?
! 	movNEs	curbit,   curbit,  lsr #4	@ No, any more bits to do?
! 	movNE	divisor,  divisor, lsr #4
! 	bNE	Loop3
  
!   .if \modulo
! Lfixup_dividend:	
! 	@ Any subtractions that we should not have done will be recorded in
! 	@ the top three bits of OVERDONE.  Exactly which were not needed
! 	@ are governed by the position of the bit, stored in IP.
! 	ands	overdone, overdone, #0xe0000000
! 	@ If we terminated early, because dividend became zero, then the 
! 	@ bit in ip will not be in the bottom nibble, and we should not
! 	@ perform the additions below.  We must test for this though
! 	@ (rather relying upon the TSTs to prevent the additions) since
! 	@ the bit in ip could be in the top two bits which might then match
! 	@ with one of the smaller RORs.
! 	tstNE	ip, #0x7
! 	bEQ	Lgot_result
! 	tst	overdone, ip, ror #3
! 	addNE	dividend, dividend, divisor, lsr #3
! 	tst	overdone, ip, ror #2
! 	addNE	dividend, dividend, divisor, lsr #2
! 	tst	overdone, ip, ror #1
! 	addNE	dividend, dividend, divisor, lsr #1
!   .endif
  
! Lgot_result:
! .endm
  /* ------------------------------------------------------------------------ */
! .macro THUMB_DIV_MOD_BODY modulo
! 	@ Load the constant 0x10000000 into our work register.
  	mov	work, #1
  	lsl	work, #28
  Loop1:
*************** Loop1:
*** 331,339 ****
  	@ division loop.  Continue shifting until the divisor is 
  	@ larger than the dividend.
  	cmp	divisor, work
! 	bcs	Lbignum
  	cmp	divisor, dividend
! 	bcs	Lbignum
  	lsl	divisor, #4
  	lsl	curbit, #4
  	b	Loop1
--- 263,271 ----
  	@ division loop.  Continue shifting until the divisor is 
  	@ larger than the dividend.
  	cmp	divisor, work
! 	bHS	Lbignum
  	cmp	divisor, dividend
! 	bHS	Lbignum
  	lsl	divisor, #4
  	lsl	curbit,  #4
  	b	Loop1
*************** Loop2:
*** 344,411 ****
  	@ For very big divisors, we must shift it a bit at a time, or
  	@ we will be in danger of overflowing.
  	cmp	divisor, work
! 	bcs	Loop3
  	cmp	divisor, dividend
! 	bcs	Loop3
  	lsl	divisor, #1
  	lsl	curbit, #1
  	b	Loop2
  Loop3:
! 	@ Test for possible subtractions.  On the final pass, this may 
! 	@ subtract too much from the dividend, so keep track of which
! 	@ subtractions are done, we can fix them up afterwards...
  	mov	overdone, #0
  	cmp	dividend, divisor
! 	bcc	Over2
  	sub	dividend, dividend, divisor
! Over2:
  	lsr	work, divisor, #1
  	cmp	dividend, work
! 	bcc	Over3
  	sub	dividend, dividend, work
  	mov	ip, curbit
  	mov	work, #1
  	ror	curbit, work
  	orr	overdone, curbit
  	mov	curbit, ip
! Over3:
  	lsr	work, divisor, #2
  	cmp	dividend, work
! 	bcc	Over4
  	sub	dividend, dividend, work
  	mov	ip, curbit
  	mov	work, #2
  	ror	curbit, work
  	orr	overdone, curbit
  	mov	curbit, ip
! Over4:
  	lsr	work, divisor, #3
  	cmp	dividend, work
! 	bcc	Over5
  	sub	dividend, dividend, work
  	mov	ip, curbit
  	mov	work, #3
  	ror	curbit, work
  	orr	overdone, curbit
  	mov	curbit, ip
! Over5:
  	mov	ip, curbit
  	cmp	dividend, #0			@ Early termination?
! 	beq	Over6
  	lsr	curbit, #4			@ No, any more bits to do?
! 	beq	Over6
  	lsr	divisor, #4
  	b	Loop3
! Over6:	
  	@ Any subtractions that we should not have done will be recorded in
  	@ the top three bits of "overdone".  Exactly which were not needed
  	@ are governed by the position of the bit, stored in ip.
  	mov	work, #0xe
  	lsl	work, #28	
  	and	overdone, work
! 	bne	Over7
! 	pop	{ work }
! 	RET					@ No fixups needed
  	
  	@ If we terminated early, because dividend became zero, then the 
  	@ bit in ip will not be in the bottom nibble, and we should not
--- 276,376 ----
  	@ For very big divisors, we must shift it a bit at a time, or
  	@ we will be in danger of overflowing.
  	cmp	divisor, work
! 	bHS	Loop3
  	cmp	divisor, dividend
! 	bHS	Loop3
  	lsl	divisor, #1
  	lsl	curbit,  #1
  	b	Loop2
  Loop3:
! 	@ Test for possible subtractions ...
!   .if \modulo
! 	@ ... On the final pass, this may subtract too much from the dividend, 
! 	@ so keep track of which subtractions are done, we can fix them up 
! 	@ afterwards.
  	mov	overdone, #0
  	cmp	dividend, divisor
! 	bLO	Lover1
  	sub	dividend, dividend, divisor
! Lover1:
  	lsr	work, divisor, #1
  	cmp	dividend, work
! 	bLO	Lover2
  	sub	dividend, dividend, work
  	mov	ip, curbit
  	mov	work, #1
  	ror	curbit, work
  	orr	overdone, curbit
  	mov	curbit, ip
! Lover2:
  	lsr	work, divisor, #2
  	cmp	dividend, work
! 	bLO	Lover3
  	sub	dividend, dividend, work
  	mov	ip, curbit
  	mov	work, #2
  	ror	curbit, work
  	orr	overdone, curbit
  	mov	curbit, ip
! Lover3:
  	lsr	work, divisor, #3
  	cmp	dividend, work
! 	bLO	Lover4
  	sub	dividend, dividend, work
  	mov	ip, curbit
  	mov	work, #3
  	ror	curbit, work
  	orr	overdone, curbit
  	mov	curbit, ip
! Lover4:
  	mov	ip, curbit
+   .else
+ 	@ ... and note which bits are done in the result.  On the final pass,
+ 	@ this may subtract too much from the dividend, but the result will be ok,
+ 	@ since the "bit" will have been shifted out at the bottom.
+ 	cmp	dividend, divisor
+ 	bLO	Lover1
+ 	sub	dividend, dividend, divisor
+ 	orr	result, result, curbit
+ Lover1:
+ 	lsr	work, divisor, #1
+ 	cmp	dividend, work
+ 	bLO	Lover2
+ 	sub	dividend, dividend, work
+ 	lsr	work, curbit, #1
+ 	orr	result, work
+ Lover2:
+ 	lsr	work, divisor, #2
+ 	cmp	dividend, work
+ 	bLO	Lover3
+ 	sub	dividend, dividend, work
+ 	lsr	work, curbit, #2
+ 	orr	result, work
+ Lover3:
+ 	lsr	work, divisor, #3
+ 	cmp	dividend, work
+ 	bLO	Lover4
+ 	sub	dividend, dividend, work
+ 	lsr	work, curbit, #3
+ 	orr	result, work
+ Lover4:
+   .endif
+ 	
  	cmp	dividend, #0			@ Early termination?
! 	bEQ	Lover5
  	lsr	curbit,  #4			@ No, any more bits to do?
! 	bEQ	Lover5
  	lsr	divisor, #4
  	b	Loop3
! Lover5:
!   .if \modulo
  	@ Any subtractions that we should not have done will be recorded in
  	@ the top three bits of "overdone".  Exactly which were not needed
  	@ are governed by the position of the bit, stored in ip.
  	mov	work, #0xe
  	lsl	work, #28
  	and	overdone, work
! 	bEQ	Lgot_result
  	
  	@ If we terminated early, because dividend became zero, then the 
  	@ bit in ip will not be in the bottom nibble, and we should not
*************** Over6:	
*** 416,520 ****
  	mov	curbit, ip
  	mov	work, #0x7
  	tst	curbit, work
! 	beq	Over10
  	
- Over7:
  	mov	curbit, ip
  	mov	work, #3
  	ror	curbit, work
  	tst	overdone, curbit
! 	beq	Over8
  	lsr	work, divisor, #3
! 	add	dividend, dividend, work
! Over8:
  	mov	curbit, ip
  	mov	work, #2
  	ror	curbit, work
  	tst	overdone, curbit
! 	beq	Over9
  	lsr	work, divisor, #2
! 	add	dividend, dividend, work
! Over9:
  	mov	curbit, ip
  	mov	work, #1
  	ror	curbit, work
  	tst	overdone, curbit
! 	beq	Over10
  	lsr	work, divisor, #1
! 	add	dividend, dividend, work
! Over10:
  	pop	{ work }
  	RET
  	
  #else  /* ARM version.  */
  	
  	cmp	divisor, #0
! 	beq	Ldiv0
  	mov	curbit, #1
  	cmp	dividend, divisor
! 	RETc(cc)
! Loop1:
! 	@ Unless the divisor is very big, shift it up in multiples of
! 	@ four bits, since this is the amount of unwinding in the main
! 	@ division loop.  Continue shifting until the divisor is 
! 	@ larger than the dividend.
! 	cmp	divisor, #0x10000000
! 	cmpcc	divisor, dividend
! 	movcc	divisor, divisor, lsl #4
! 	movcc	curbit, curbit, lsl #4
! 	bcc	Loop1
  
! Lbignum:
! 	@ For very big divisors, we must shift it a bit at a time, or
! 	@ we will be in danger of overflowing.
! 	cmp	divisor, #0x80000000
! 	cmpcc	divisor, dividend
! 	movcc	divisor, divisor, lsl #1
! 	movcc	curbit, curbit, lsl #1
! 	bcc	Lbignum
  
! Loop3:
! 	@ Test for possible subtractions.  On the final pass, this may 
! 	@ subtract too much from the dividend, so keep track of which
! 	@ subtractions are done, we can fix them up afterwards...
! 	mov	overdone, #0
  	cmp	dividend, divisor
! 	subcs	dividend, dividend, divisor
! 	cmp	dividend, divisor, lsr #1
! 	subcs	dividend, dividend, divisor, lsr #1
! 	orrcs	overdone, overdone, curbit, ror #1
! 	cmp	dividend, divisor, lsr #2
! 	subcs	dividend, dividend, divisor, lsr #2
! 	orrcs	overdone, overdone, curbit, ror #2
! 	cmp	dividend, divisor, lsr #3
! 	subcs	dividend, dividend, divisor, lsr #3
! 	orrcs	overdone, overdone, curbit, ror #3
! 	mov	ip, curbit
! 	cmp	dividend, #0			@ Early termination?
! 	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
! 	movne	divisor, divisor, lsr #4
! 	bne	Loop3
  
! 	@ Any subtractions that we should not have done will be recorded in
! 	@ the top three bits of "overdone".  Exactly which were not needed
! 	@ are governed by the position of the bit, stored in ip.
! 	ands	overdone, overdone, #0xe0000000
! 	@ If we terminated early, because dividend became zero, then the 
! 	@ bit in ip will not be in the bottom nibble, and we should not
! 	@ perform the additions below.  We must test for this though
! 	@ (rather relying upon the TSTs to prevent the additions) since
! 	@ the bit in ip could be in the top two bits which might then match
! 	@ with one of the smaller RORs.
! 	tstNE	ip, #0x7
! 	RETc(eq)				@ No fixups needed
! 	tst	overdone, ip, ror #3
! 	addne	dividend, dividend, divisor, lsr #3
! 	tst	overdone, ip, ror #2
! 	addne	dividend, dividend, divisor, lsr #2
! 	tst	overdone, ip, ror #1
! 	addne	dividend, dividend, divisor, lsr #1
  	RET	
  
  #endif /* ARM version.  */
  	
  	FUNC_END umodsi3
--- 381,493 ----
  	mov	curbit, ip
  	mov	work, #0x7
  	tst	curbit, work
! 	bEQ	Lgot_result
  	
  	mov	curbit, ip
  	mov	work, #3
  	ror	curbit, work
  	tst	overdone, curbit
! 	bEQ	Lover6
  	lsr	work, divisor, #3
! 	add	dividend, work
! Lover6:
  	mov	curbit, ip
  	mov	work, #2
  	ror	curbit, work
  	tst	overdone, curbit
! 	bEQ	Lover7
  	lsr	work, divisor, #2
! 	add	dividend, work
! Lover7:
  	mov	curbit, ip
  	mov	work, #1
  	ror	curbit, work
  	tst	overdone, curbit
! 	bEQ	Lgot_result
  	lsr	work, divisor, #1
! 	add	dividend, work
!   .endif
! Lgot_result:
! .endm	
! /* ------------------------------------------------------------------------ */
! /*		Start of the Real Functions				    */
! /* ------------------------------------------------------------------------ */
! #ifdef L_udivsi3
! 
! 	FUNC_START udivsi3
! 
! #ifdef __thumb__
! 
! 	cmp	divisor, #0
! 	bEQ	Ldiv0
! 	mov	curbit, #1
! 	mov	result, #0
! 	
! 	push	{ work }
! 	cmp	dividend, divisor
! 	bLO	Lgot_result
! 
! 	THUMB_DIV_MOD_BODY 0
! 	
! 	mov	r0, result
  	pop	{ work }
  	RET
  
  #else /* ARM version.  */
  	
  	cmp	divisor, #0
! 	bEQ	Ldiv0
  	mov	curbit, #1
+ 	mov	result, #0
  	cmp	dividend, divisor
! 	bLO	Lgot_result
  	
! 	ARM_DIV_MOD_BODY 0
  	
! 	mov	r0, result
! 	RET	
! 
! #endif /* ARM version */
! 
! 	FUNC_END udivsi3
! 
! #endif /* L_udivsi3 */
! /* ------------------------------------------------------------------------ */
! #ifdef L_umodsi3
! 
! 	FUNC_START umodsi3
! 
! #ifdef __thumb__
! 
! 	cmp	divisor, #0
! 	bEQ	Ldiv0
! 	mov	curbit, #1
  	cmp	dividend, divisor
! 	bHS	Lover10
! 	RET	
  
! Lover10:
! 	push	{ work }
! 
! 	THUMB_DIV_MOD_BODY 1
! 	
! 	pop	{ work }
  	RET
  	
+ #else  /* ARM version.  */
+ 	
+ 	cmp	divisor, #0
+ 	bEQ	Ldiv0
+ 	cmp     divisor, #1
+ 	cmpNE	dividend, divisor
+ 	movEQ   dividend, #0
+ 	RETc(LO)
+ 	mov	curbit, #1
+ 
+ 	ARM_DIV_MOD_BODY 1
+ 	
+ 	RET	
+ 
  #endif /* ARM version.  */
  	
  	FUNC_END umodsi3
*************** Loop3:
*** 523,542 ****
  /* ------------------------------------------------------------------------ */
  #ifdef L_divsi3
  
- dividend	.req	r0
- divisor		.req	r1
- result		.req	r2
- curbit		.req	r3
- ip		.req	r12
- sp		.req	r13
- lr		.req	r14
- pc		.req	r15
- 
  	FUNC_START divsi3	
  
  #ifdef __thumb__
  	cmp	divisor, #0
! 	beq	Ldiv0
  	
  	push	{ work }
  	mov	work, dividend
--- 496,506 ----
  /* ------------------------------------------------------------------------ */
  #ifdef L_divsi3
  
  	FUNC_START divsi3	
  
  #ifdef __thumb__
  	cmp	divisor, #0
! 	bEQ	Ldiv0
  	
  	push	{ work }
  	mov	work, dividend
*************** pc		.req	r15
*** 545,633 ****
  	mov	curbit, #1
  	mov	result, #0
  	cmp	divisor, #0
! 	bpl	Over1
  	neg	divisor, divisor	@ Loops below use unsigned.
! Over1:	
  	cmp	dividend, #0
! 	bpl	Over2
  	neg	dividend, dividend
! Over2:	
  	cmp	dividend, divisor
! 	bcc	Lgot_result
! 
! 	mov	work, #1
! 	lsl	work, #28
! Loop1:
! 	@ Unless the divisor is very big, shift it up in multiples of
! 	@ four bits, since this is the amount of unwinding in the main
! 	@ division loop.  Continue shifting until the divisor is 
! 	@ larger than the dividend.
! 	cmp	divisor, work
! 	Bcs	Lbignum
! 	cmp	divisor, dividend
! 	Bcs	Lbignum
! 	lsl	divisor, #4
! 	lsl	curbit, #4
! 	b	Loop1
! 
! Lbignum:
! 	@ For very big divisors, we must shift it a bit at a time, or
! 	@ we will be in danger of overflowing.
! 	lsl	work, #3
! Loop2:		
! 	cmp	divisor, work
! 	Bcs	Loop3
! 	cmp	divisor, dividend
! 	Bcs	Loop3
! 	lsl	divisor, #1
! 	lsl	curbit, #1
! 	b	Loop2
  
! Loop3:
! 	@ Test for possible subtractions, and note which bits
! 	@ are done in the result.  On the final pass, this may subtract
! 	@ too much from the dividend, but the result will be ok, since the
! 	@ "bit" will have been shifted out at the bottom.
! 	cmp	dividend, divisor
! 	Bcc	Over3
! 	sub	dividend, dividend, divisor
! 	orr	result, result, curbit
! Over3:
! 	lsr	work, divisor, #1
! 	cmp	dividend, work
! 	Bcc	Over4
! 	sub	dividend, dividend, work
! 	lsr	work, curbit, #1
! 	orr	result, work
! Over4:	
! 	lsr	work, divisor, #2
! 	cmp	dividend, work
! 	Bcc	Over5
! 	sub	dividend, dividend, work
! 	lsr	work, curbit, #2
! 	orr	result, result, work
! Over5:	
! 	lsr	work, divisor, #3
! 	cmp	dividend, work
! 	Bcc	Over6
! 	sub	dividend, dividend, work
! 	lsr	work, curbit, #3
! 	orr	result, result, work
! Over6:	
! 	cmp	dividend, #0			@ Early termination?
! 	Beq	Lgot_result
! 	lsr	curbit, #4			@ No, any more bits to do?
! 	Beq	Lgot_result
! 	lsr	divisor, #4
! 	b	Loop3
  	
- Lgot_result:
  	mov	r0, result
  	mov	work, ip
  	cmp	work, #0
! 	Bpl	Over7
  	neg	r0, r0
! Over7:
  	pop	{ work }
  	RET	
  
--- 509,532 ----
  	mov	curbit, #1
  	mov	result, #0
  	cmp	divisor, #0
! 	bPL	Lover10
  	neg	divisor, divisor	@ Loops below use unsigned.
! Lover10:
  	cmp	dividend, #0
! 	bPL	Lover11
  	neg	dividend, dividend
! Lover11:
  	cmp	dividend, divisor
! 	bLO	Lgot_result
  
! 	THUMB_DIV_MOD_BODY 0
  	
  	mov	r0, result
  	mov	work, ip
  	cmp	work, #0
! 	bPL	Lover12
  	neg	r0, r0
! Lover12:
  	pop	{ work }
  	RET
  
*************** Over7:
*** 637,694 ****
  	mov	curbit, #1
  	mov	result, #0
  	cmp	divisor, #0
! 	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
! 	beq	Ldiv0
  	cmp	dividend, #0
! 	rsbmi	dividend, dividend, #0
  	cmp	dividend, divisor
! 	bcc	Lgot_result
  
! Loop1:
! 	@ Unless the divisor is very big, shift it up in multiples of
! 	@ four bits, since this is the amount of unwinding in the main
! 	@ division loop.  Continue shifting until the divisor is 
! 	@ larger than the dividend.
! 	cmp	divisor, #0x10000000
! 	cmpcc	divisor, dividend
! 	movcc	divisor, divisor, lsl #4
! 	movcc	curbit, curbit, lsl #4
! 	bcc	Loop1
  
- Lbignum:
- 	@ For very big divisors, we must shift it a bit at a time, or
- 	@ we will be in danger of overflowing.
- 	cmp	divisor, #0x80000000
- 	cmpcc	divisor, dividend
- 	movcc	divisor, divisor, lsl #1
- 	movcc	curbit, curbit, lsl #1
- 	bcc	Lbignum
- 
- Loop3:
- 	@ Test for possible subtractions, and note which bits
- 	@ are done in the result.  On the final pass, this may subtract
- 	@ too much from the dividend, but the result will be ok, since the
- 	@ "bit" will have been shifted out at the bottom.
- 	cmp	dividend, divisor
- 	subcs	dividend, dividend, divisor
- 	orrcs	result, result, curbit
- 	cmp	dividend, divisor, lsr #1
- 	subcs	dividend, dividend, divisor, lsr #1
- 	orrcs	result, result, curbit, lsr #1
- 	cmp	dividend, divisor, lsr #2
- 	subcs	dividend, dividend, divisor, lsr #2
- 	orrcs	result, result, curbit, lsr #2
- 	cmp	dividend, divisor, lsr #3
- 	subcs	dividend, dividend, divisor, lsr #3
- 	orrcs	result, result, curbit, lsr #3
- 	cmp	dividend, #0			@ Early termination?
- 	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
- 	movne	divisor, divisor, lsr #4
- 	bne	Loop3
- Lgot_result:
  	mov	r0, result
  	cmp	ip, #0
! 	rsbmi	r0, r0, #0
  	RET	
  
  #endif /* ARM version */
--- 536,553 ----
  	mov	curbit, #1
  	mov	result, #0
  	cmp	divisor, #0
! 	rsbMI	divisor, divisor, #0		@ Loops below use unsigned.
! 	bEQ	Ldiv0
  	cmp	dividend, #0
! 	rsbMI	dividend, dividend, #0
  	cmp	dividend, divisor
! 	bLO	Lgot_result
  
! 	ARM_DIV_MOD_BODY 0
  	
  	mov	r0, result
  	cmp	ip, #0
! 	rsbMI	r0, r0, #0
  	RET	
  
  #endif /* ARM version */
*************** Lgot_result:
*** 699,940 ****
  /* ------------------------------------------------------------------------ */
  #ifdef L_modsi3
  
- dividend	.req	r0
- divisor		.req	r1
- overdone	.req	r2
- curbit		.req	r3
- ip		.req	r12
- sp		.req	r13
- lr		.req	r14
- pc		.req	r15
- 	
  	FUNC_START modsi3
  
  #ifdef __thumb__
  
  	mov	curbit, #1
  	cmp	divisor, #0
! 	beq	Ldiv0
! 	Bpl	Over1
  	neg	divisor, divisor		@ Loops below use unsigned.
! Over1:	
  	push	{ work }
  	@ Need to save the sign of the dividend, unfortunately, we need
! 	@ ip later on.  Must do this after saving the original value of
  	@ the work register, because we will pop this value off first.
  	push	{ dividend }
  	cmp	dividend, #0
! 	Bpl	Over2
  	neg	dividend, dividend
! Over2:	
  	cmp	dividend, divisor
! 	bcc	Lgot_result
! 	mov	work, #1
! 	lsl	work, #28
! Loop1:
! 	@ Unless the divisor is very big, shift it up in multiples of
! 	@ four bits, since this is the amount of unwinding in the main
! 	@ division loop.  Continue shifting until the divisor is 
! 	@ larger than the dividend.
! 	cmp	divisor, work
! 	bcs	Lbignum
! 	cmp	divisor, dividend
! 	bcs	Lbignum
! 	lsl	divisor, #4
! 	lsl	curbit, #4
! 	b	Loop1
  
! Lbignum:
! 	@ Set work to 0x80000000
! 	lsl	work, #3
! Loop2:
! 	@ For very big divisors, we must shift it a bit at a time, or
! 	@ we will be in danger of overflowing.
! 	cmp	divisor, work
! 	bcs	Loop3
! 	cmp	divisor, dividend
! 	bcs	Loop3
! 	lsl	divisor, #1
! 	lsl	curbit, #1
! 	b	Loop2
  
- Loop3:
- 	@ Test for possible subtractions.  On the final pass, this may 
- 	@ subtract too much from the dividend, so keep track of which
- 	@ subtractions are done, we can fix them up afterwards...
- 	mov	overdone, #0
- 	cmp	dividend, divisor
- 	bcc	Over3
- 	sub	dividend, dividend, divisor
- Over3:
- 	lsr	work, divisor, #1
- 	cmp	dividend, work
- 	bcc	Over4
- 	sub	dividend, dividend, work
- 	mov	ip, curbit
- 	mov	work, #1
- 	ror	curbit, work
- 	orr	overdone, curbit
- 	mov	curbit, ip
- Over4:
- 	lsr	work, divisor, #2
- 	cmp	dividend, work
- 	bcc	Over5
- 	sub	dividend, dividend, work
- 	mov	ip, curbit
- 	mov	work, #2
- 	ror	curbit, work
- 	orr	overdone, curbit
- 	mov	curbit, ip
- Over5:
- 	lsr	work, divisor, #3
- 	cmp	dividend, work
- 	bcc	Over6
- 	sub	dividend, dividend, work
- 	mov	ip, curbit
- 	mov	work, #3
- 	ror	curbit, work
- 	orr	overdone, curbit
- 	mov	curbit, ip
- Over6:
- 	mov	ip, curbit
- 	cmp	dividend, #0			@ Early termination?
- 	beq	Over7
- 	lsr	curbit, #4			@ No, any more bits to do?
- 	beq	Over7
- 	lsr	divisor, #4
- 	b	Loop3
- 
- Over7:	
- 	@ Any subtractions that we should not have done will be recorded in
- 	@ the top three bits of "overdone".  Exactly which were not needed
- 	@ are governed by the position of the bit, stored in ip.
- 	mov	work, #0xe
- 	lsl	work, #28
- 	and	overdone, work
- 	beq	Lgot_result
- 	
- 	@ If we terminated early, because dividend became zero, then the 
- 	@ bit in ip will not be in the bottom nibble, and we should not
- 	@ perform the additions below.  We must test for this though
- 	@ (rather relying upon the TSTs to prevent the additions) since
- 	@ the bit in ip could be in the top two bits which might then match
- 	@ with one of the smaller RORs.
- 	mov	curbit, ip
- 	mov	work, #0x7
- 	tst	curbit, work
- 	beq	Lgot_result
- 	
- 	mov	curbit, ip
- 	mov	work, #3
- 	ror	curbit, work
- 	tst	overdone, curbit
- 	beq	Over8
- 	lsr	work, divisor, #3
- 	add	dividend, dividend, work
- Over8:
- 	mov	curbit, ip
- 	mov	work, #2
- 	ror	curbit, work
- 	tst	overdone, curbit
- 	beq	Over9
- 	lsr	work, divisor, #2
- 	add	dividend, dividend, work
- Over9:
- 	mov	curbit, ip
- 	mov	work, #1
- 	ror	curbit, work
- 	tst	overdone, curbit
- 	beq	Lgot_result
- 	lsr	work, divisor, #1
- 	add	dividend, dividend, work
- Lgot_result:
  	pop	{ work }
  	cmp	work, #0
! 	bpl	Over10
  	neg	dividend, dividend
! Over10:
  	pop	{ work }
  	RET	
  
  #else /* ARM version.  */
  	
- 	mov	curbit, #1
  	cmp	divisor, #0
! 	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
! 	beq	Ldiv0
  	@ Need to save the sign of the dividend, unfortunately, we need
  	@ ip later on; this is faster than pushing lr and using that.
  	str	dividend, [sp, #-4]!
! 	cmp	dividend, #0
! 	rsbmi	dividend, dividend, #0
! 	cmp	dividend, divisor
! 	bcc	Lgot_result
! 
! Loop1:
! 	@ Unless the divisor is very big, shift it up in multiples of
! 	@ four bits, since this is the amount of unwinding in the main
! 	@ division loop.  Continue shifting until the divisor is 
! 	@ larger than the dividend.
! 	cmp	divisor, #0x10000000
! 	cmpcc	divisor, dividend
! 	movcc	divisor, divisor, lsl #4
! 	movcc	curbit, curbit, lsl #4
! 	bcc	Loop1
! 
! Lbignum:
! 	@ For very big divisors, we must shift it a bit at a time, or
! 	@ we will be in danger of overflowing.
! 	cmp	divisor, #0x80000000
! 	cmpcc	divisor, dividend
! 	movcc	divisor, divisor, lsl #1
! 	movcc	curbit, curbit, lsl #1
! 	bcc	Lbignum
  
! Loop3:
! 	@ Test for possible subtractions.  On the final pass, this may 
! 	@ subtract too much from the dividend, so keep track of which
! 	@ subtractions are done, we can fix them up afterwards...
! 	mov	overdone, #0
! 	cmp	dividend, divisor
! 	subcs	dividend, dividend, divisor
! 	cmp	dividend, divisor, lsr #1
! 	subcs	dividend, dividend, divisor, lsr #1
! 	orrcs	overdone, overdone, curbit, ror #1
! 	cmp	dividend, divisor, lsr #2
! 	subcs	dividend, dividend, divisor, lsr #2
! 	orrcs	overdone, overdone, curbit, ror #2
! 	cmp	dividend, divisor, lsr #3
! 	subcs	dividend, dividend, divisor, lsr #3
! 	orrcs	overdone, overdone, curbit, ror #3
! 	mov	ip, curbit
! 	cmp	dividend, #0			@ Early termination?
! 	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
! 	movne	divisor, divisor, lsr #4
! 	bne	Loop3
  
- 	@ Any subtractions that we should not have done will be recorded in
- 	@ the top three bits of "overdone".  Exactly which were not needed
- 	@ are governed by the position of the bit, stored in ip.
- 	ands	overdone, overdone, #0xe0000000
- 	@ If we terminated early, because dividend became zero, then the 
- 	@ bit in ip will not be in the bottom nibble, and we should not
- 	@ perform the additions below.  We must test for this though
- 	@ (rather relying upon the TSTs to prevent the additions) since
- 	@ the bit in ip could be in the top two bits which might then match
- 	@ with one of the smaller RORs.
- 	tstNE	ip, #0x7
- 	beq	Lgot_result
- 	tst	overdone, ip, ror #3
- 	addne	dividend, dividend, divisor, lsr #3
- 	tst	overdone, ip, ror #2
- 	addne	dividend, dividend, divisor, lsr #2
- 	tst	overdone, ip, ror #1
- 	addne	dividend, dividend, divisor, lsr #1
- Lgot_result:
  	ldr	ip, [sp], #4
  	cmp	ip, #0
! 	rsbmi	dividend, dividend, #0
  	RET	
  
  #endif /* ARM version */
--- 558,614 ----
  /* ------------------------------------------------------------------------ */
  #ifdef L_modsi3
  
  	FUNC_START modsi3
  
  #ifdef __thumb__
  
  	mov	curbit, #1
  	cmp	divisor, #0
! 	bEQ	Ldiv0
! 	bPL	Lover10
  	neg	divisor, divisor		@ Loops below use unsigned.
! Lover10:
  	push	{ work }
  	@ Need to save the sign of the dividend, unfortunately, we need
! 	@ work later on.  Must do this after saving the original value of
  	@ the work register, because we will pop this value off first.
  	push	{ dividend }
  	cmp	dividend, #0
! 	bPL	Lover11
  	neg	dividend, dividend
! Lover11:
  	cmp	dividend, divisor
! 	bLO	Lgot_result
  
! 	THUMB_DIV_MOD_BODY 1
  		
  	pop	{ work }
  	cmp	work, #0
! 	bPL	Lover12
  	neg	dividend, dividend
! Lover12:
  	pop	{ work }
  	RET	
  
  #else /* ARM version.  */
  	
  	cmp	divisor, #0
! 	rsbMI	divisor, divisor, #0		@ Loops below use unsigned.
! 	bEQ	Ldiv0
  	@ Need to save the sign of the dividend, unfortunately, we need
  	@ ip later on; this is faster than pushing lr and using that.
  	str	dividend, [sp, #-4]!
! 	cmp	dividend, #0			@ Test dividend against zero
! 	rsbMI	dividend, dividend, #0		@ If negative make positive
! 	cmp	dividend, divisor		@ else if zero return zero
! 	bLO	Lgot_result			@ if smaller return dividend
! 	mov	curbit, #1
  
! 	ARM_DIV_MOD_BODY 1
  
  	ldr	ip, [sp], #4
  	cmp	ip, #0
! 	rsbMI	dividend, dividend, #0
  	RET	
  
  #endif /* ARM version */
*************** _arm_return:		
*** 1105,1108 ****
  	SIZE	(_interwork_call_via_lr)
  	
  #endif /* L_interwork_call_via_rX */
- 
--- 779,781 ----
Follow-Ups:
- Re: ARM: lib1funcs.asm tidy up
  - From: Richard Earnshaw
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]