This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

XScale optimised lib1funcs.asm


I am contributing the following patch on behalf of Steve Woodford at
Wasabi Systems.  It adds ARMv5 optimised division and modulo routines
to lib1funcs.asm.  Regression tested on --target=xscale-elf and a full
testsuite run.

Okay to commit?

2003-09-05  Steve Woodford  <scw@wasabisystems.com>

	* config/arm/lib1funcs.asm (udivsi3): Optimise for XScale.
	(umodsi3, divsi3, modsi3): Likewise.

Index: gcc/config/arm/lib1funcs.asm
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/lib1funcs.asm,v
retrieving revision 1.25
diff -u -r1.25 lib1funcs.asm
--- gcc/config/arm/lib1funcs.asm	30 Aug 2003 15:55:17 -0000	1.25
+++ gcc/config/arm/lib1funcs.asm	5 Sep 2003 00:22:23 -0000
@@ -495,7 +495,49 @@
 	RET
 
 #else /* ARM version.  */
-	
+#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+	subs	r3, dividend, divisor
+	blo	LSYM(Lzero)
+	cmp	r3, divisor
+	blo	LSYM(Lone)
+	movs	r2, dividend
+	clz	r3, dividend
+	clz	r0, divisor
+	sub	r3, r0, r3
+#ifndef	__OPTIMIZE_SIZE__
+	rsbs	r3, r3, #31
+	addne	r3, r3, r3, lsl #1
+	mov	r0, #0
+	addne	pc, pc, r3, lsl #2
+	nop
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp	r2, divisor, lsl #shift
+	adc	r0, r0, r0
+	subcs	r2, r2, divisor, lsl #shift
+	.endr
+#else
+	mov	r0, #0
+LSYM(Loop):
+	cmp	r2, divisor, lsl r3
+	adc	r0, r0, r0
+	subcs	r2, r2, divisor, lsl r3
+	subs	r3, r3, #1
+	bpl	LSYM(Loop)
+#endif
+	RET
+
+LSYM(Lzero):
+	mov	r0, #0
+	RET
+
+LSYM(Lone):
+	mov	r0, #1
+	RET
+#else	
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
 	mov	curbit, #1
@@ -507,7 +549,7 @@
 	
 	mov	r0, result
 	RET	
-
+#endif
 #endif /* ARM version */
 
 	DIV_FUNC_END udivsi3
@@ -536,7 +578,36 @@
 	RET
 	
 #else  /* ARM version.  */
-	
+#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+	subs	r3, dividend, divisor
+	RETc(lo)
+	cmp	r3, divisor
+	sublo	r0, dividend, divisor
+	RETc(lo)
+	clz	r3, dividend
+	clz	r2, divisor
+	sub	r3, r2, r3
+#ifndef	__OPTIMIZE_SIZE__
+	rsbs	r3, r3, #31
+	addne	pc, pc, r3, lsl #3
+	nop
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp	dividend, divisor, lsl #shift
+	subcs	dividend, dividend, divisor, lsl #shift
+	.endr
+#else
+LSYM(Loop):
+	cmp	dividend, divisor, lsl r3
+	subcs	dividend, dividend, divisor, lsl r3
+	subs	r3, r3, #1
+	bpl	LSYM(Loop)
+#endif
+	RET
+#else
 	cmp	divisor, #0
 	beq	LSYM(Ldiv0)
 	cmp     divisor, #1
@@ -548,7 +619,7 @@
 	ARM_DIV_MOD_BODY 1
 	
 	RET	
-
+#endif
 #endif /* ARM version.  */
 	
 	DIV_FUNC_END umodsi3
@@ -592,7 +663,56 @@
 	RET
 
 #else /* ARM version.  */
+#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+	movs	r2, dividend
+	rsbmi	r2, dividend, #0
+	eors	ip, dividend, divisor, asr #32
+	rsbcs	divisor, divisor, #0
+	cmp	r2, divisor
+	blo	LSYM(Lzero)
+	cmp	r2, divisor, lsl #1
+	blo	LSYM(Lone)
+	clz	r3, r2
+	clz	r0, divisor
+	sub	r3, r0, r3
+#ifndef	__OPTIMIZE_SIZE__
+	rsbs	r3, r3, #31
+	addne	r3, r3, r3, lsl #1
+	mov	r0, #0
+	addne	pc, pc, r3, lsl #2
+	nop
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp	r2, divisor, lsl #shift
+	adc	r0, r0, r0
+	subcs	r2, r2, divisor, lsl #shift
+	.endr
+#else
+	mov	r0, #0
+LSYM(Loop):
+	cmp	r2, divisor, lsl r3
+	adc	r0, r0, r0
+	subcs	r2, r2, divisor, lsl r3
+	subs	r3, r3, #1
+	bpl	LSYM(Loop)
+#endif
+	movs    ip, ip, lsl #1
+	rsbcs   r0, r0, #0
+	RET
 	
+LSYM(Lzero):
+	mov	r0, #0
+	RET
+
+LSYM(Lone):
+	mov	r0, #1
+	movs    ip, ip, lsl #1
+	rsbcs   r0, r0, #0
+	RET
+#else
 	eor	ip, dividend, divisor		@ Save the sign of the result.
 	mov	curbit, #1
 	mov	result, #0
@@ -610,7 +730,7 @@
 	cmp	ip, #0
 	rsbmi	r0, r0, #0
 	RET	
-
+#endif
 #endif /* ARM version */
 	
 	DIV_FUNC_END divsi3
@@ -652,7 +772,42 @@
 	RET	
 
 #else /* ARM version.  */
-	
+#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
+	ands	ip, dividend, #0x80000000
+	rsbmi	dividend, dividend, #0
+	cmp	divisor, #0
+	beq	LSYM(Ldiv0)
+	rsbmi	divisor, divisor, #0
+	cmp	dividend, divisor
+	blo	LSYM(Ldone)
+	cmp	dividend, divisor, lsl #1
+	sublo	r0, dividend, divisor
+	blo	LSYM(Ldone)
+	clz	r3, dividend
+	clz	r2, divisor
+	sub	r3, r2, r3
+#ifndef	__OPTIMIZE_SIZE__
+	rsbs	r3, r3, #31
+	addne	pc, pc, r3, lsl #3
+	nop
+	.set	shift, 32
+	.rept	32
+	.set	shift, shift - 1
+	cmp	dividend, divisor, lsl #shift
+	subcs	dividend, dividend, divisor, lsl #shift
+	.endr
+#else
+LSYM(Loop):
+	cmp	dividend, divisor, lsl r3
+	subcs	dividend, dividend, divisor, lsl r3
+	subs	r3, r3, #1
+	bpl	LSYM(Loop)
+#endif
+LSYM(Ldone):
+	movs    ip, ip, lsl #1
+	rsbcs   r0, r0, #0
+	RET
+#else
 	cmp	divisor, #0
 	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
 	beq	LSYM(Ldiv0)
@@ -671,7 +826,7 @@
 	cmp	ip, #0
 	rsbmi	dividend, dividend, #0
 	RET	
-
+#endif
 #endif /* ARM version */
 	
 	DIV_FUNC_END modsi3


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]