This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
XScale optimised lib1funcs.asm
- From: Ben Elliston <bje at wasabisystems dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: scw at wasabisystems dot com
- Date: Fri, 5 Sep 2003 10:24:48 +1000
- Subject: XScale optimised lib1funcs.asm
- Organisation: Wasabi Systems Inc.
I am contributing the following patch on behalf of Steve Woodford at
Wasabi Systems. It adds ARMv5 optimised division and modulo routines
to lib1funcs.asm. Regression tested on --target=xscale-elf and a full
testsuite run.
Okay to commit?
2003-09-05 Steve Woodford <scw@wasabisystems.com>
* config/arm/lib1funcs.asm (udivsi3): Optimise for XScale.
(umodsi3, divsi3, modsi3): Likewise.
Index: gcc/config/arm/lib1funcs.asm
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/lib1funcs.asm,v
retrieving revision 1.25
diff -u -r1.25 lib1funcs.asm
--- gcc/config/arm/lib1funcs.asm 30 Aug 2003 15:55:17 -0000 1.25
+++ gcc/config/arm/lib1funcs.asm 5 Sep 2003 00:22:23 -0000
@@ -495,7 +495,49 @@
RET
#else /* ARM version. */
-
+#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
+ cmp divisor, #0
+ beq LSYM(Ldiv0)
+ subs r3, dividend, divisor
+ blo LSYM(Lzero)
+ cmp r3, divisor
+ blo LSYM(Lone)
+ movs r2, dividend
+ clz r3, dividend
+ clz r0, divisor
+ sub r3, r0, r3
+#ifndef __OPTIMIZE_SIZE__
+ rsbs r3, r3, #31
+ addne r3, r3, r3, lsl #1
+ mov r0, #0
+ addne pc, pc, r3, lsl #2
+ nop
+ .set shift, 32
+ .rept 32
+ .set shift, shift - 1
+ cmp r2, divisor, lsl #shift
+ adc r0, r0, r0
+ subcs r2, r2, divisor, lsl #shift
+ .endr
+#else
+ mov r0, #0
+LSYM(Loop):
+ cmp r2, divisor, lsl r3
+ adc r0, r0, r0
+ subcs r2, r2, divisor, lsl r3
+ subs r3, r3, #1
+ bpl LSYM(Loop)
+#endif
+ RET
+
+LSYM(Lzero):
+ mov r0, #0
+ RET
+
+LSYM(Lone):
+ mov r0, #1
+ RET
+#else
cmp divisor, #0
beq LSYM(Ldiv0)
mov curbit, #1
@@ -507,7 +549,7 @@
mov r0, result
RET
-
+#endif
#endif /* ARM version */
DIV_FUNC_END udivsi3
@@ -536,7 +578,36 @@
RET
#else /* ARM version. */
-
+#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
+ cmp divisor, #0
+ beq LSYM(Ldiv0)
+ subs r3, dividend, divisor
+ RETc(lo)
+ cmp r3, divisor
+ sublo r0, dividend, divisor
+ RETc(lo)
+ clz r3, dividend
+ clz r2, divisor
+ sub r3, r2, r3
+#ifndef __OPTIMIZE_SIZE__
+ rsbs r3, r3, #31
+ addne pc, pc, r3, lsl #3
+ nop
+ .set shift, 32
+ .rept 32
+ .set shift, shift - 1
+ cmp dividend, divisor, lsl #shift
+ subcs dividend, dividend, divisor, lsl #shift
+ .endr
+#else
+LSYM(Loop):
+ cmp dividend, divisor, lsl r3
+ subcs dividend, dividend, divisor, lsl r3
+ subs r3, r3, #1
+ bpl LSYM(Loop)
+#endif
+ RET
+#else
cmp divisor, #0
beq LSYM(Ldiv0)
cmp divisor, #1
@@ -548,7 +619,7 @@
ARM_DIV_MOD_BODY 1
RET
-
+#endif
#endif /* ARM version. */
DIV_FUNC_END umodsi3
@@ -592,7 +663,56 @@
RET
#else /* ARM version. */
+#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
+ cmp divisor, #0
+ beq LSYM(Ldiv0)
+ movs r2, dividend
+ rsbmi r2, dividend, #0
+ eors ip, dividend, divisor, asr #32
+ rsbcs divisor, divisor, #0
+ cmp r2, divisor
+ blo LSYM(Lzero)
+ cmp r2, divisor, lsl #1
+ blo LSYM(Lone)
+ clz r3, r2
+ clz r0, divisor
+ sub r3, r0, r3
+#ifndef __OPTIMIZE_SIZE__
+ rsbs r3, r3, #31
+ addne r3, r3, r3, lsl #1
+ mov r0, #0
+ addne pc, pc, r3, lsl #2
+ nop
+ .set shift, 32
+ .rept 32
+ .set shift, shift - 1
+ cmp r2, divisor, lsl #shift
+ adc r0, r0, r0
+ subcs r2, r2, divisor, lsl #shift
+ .endr
+#else
+ mov r0, #0
+LSYM(Loop):
+ cmp r2, divisor, lsl r3
+ adc r0, r0, r0
+ subcs r2, r2, divisor, lsl r3
+ subs r3, r3, #1
+ bpl LSYM(Loop)
+#endif
+ movs ip, ip, lsl #1
+ rsbcs r0, r0, #0
+ RET
+LSYM(Lzero):
+ mov r0, #0
+ RET
+
+LSYM(Lone):
+ mov r0, #1
+ movs ip, ip, lsl #1
+ rsbcs r0, r0, #0
+ RET
+#else
eor ip, dividend, divisor @ Save the sign of the result.
mov curbit, #1
mov result, #0
@@ -610,7 +730,7 @@
cmp ip, #0
rsbmi r0, r0, #0
RET
-
+#endif
#endif /* ARM version */
DIV_FUNC_END divsi3
@@ -652,7 +772,42 @@
RET
#else /* ARM version. */
-
+#if defined __ARM_ARCH_5__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__
+ ands ip, dividend, #0x80000000
+ rsbmi dividend, dividend, #0
+ cmp divisor, #0
+ beq LSYM(Ldiv0)
+ rsbmi divisor, divisor, #0
+ cmp dividend, divisor
+ blo LSYM(Ldone)
+ cmp dividend, divisor, lsl #1
+ sublo r0, dividend, divisor
+ blo LSYM(Ldone)
+ clz r3, dividend
+ clz r2, divisor
+ sub r3, r2, r3
+#ifndef __OPTIMIZE_SIZE__
+ rsbs r3, r3, #31
+ addne pc, pc, r3, lsl #3
+ nop
+ .set shift, 32
+ .rept 32
+ .set shift, shift - 1
+ cmp dividend, divisor, lsl #shift
+ subcs dividend, dividend, divisor, lsl #shift
+ .endr
+#else
+LSYM(Loop):
+ cmp dividend, divisor, lsl r3
+ subcs dividend, dividend, divisor, lsl r3
+ subs r3, r3, #1
+ bpl LSYM(Loop)
+#endif
+LSYM(Ldone):
+ movs ip, ip, lsl #1
+ rsbcs r0, r0, #0
+ RET
+#else
cmp divisor, #0
rsbmi divisor, divisor, #0 @ Loops below use unsigned.
beq LSYM(Ldiv0)
@@ -671,7 +826,7 @@
cmp ip, #0
rsbmi dividend, dividend, #0
RET
-
+#endif
#endif /* ARM version */
DIV_FUNC_END modsi3