[PATCH 2/3, ARM, libgcc, ping6] Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc

Thomas Preud'homme thomas.preudhomme@arm.com
Thu Apr 30 07:43:00 GMT 2015


Here is an updated patch that prefix local symbols with __ for more safety.
They appear in the symtab as local so it is not strictly necessary but one is
never too cautious. Being local, they also do not generate any PLT entry.
They appear only because the jumps are from one section to another
(which is the whole purpose of this patch) and thus need a static relocation.

I hope this revised version address all your concerns.

ChangeLog entry is unchanged:

*** gcc/libgcc/ChangeLog ***

2015-04-30   Tony Wang <tony.wang@arm.com>

        * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
        * config/arm/ieee754-df.S: Same with above

diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
index c1468dc..39b0028 100644
--- a/libgcc/config/arm/ieee754-df.S
+++ b/libgcc/config/arm/ieee754-df.S
@@ -559,7 +559,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
 
 #ifdef L_arm_muldivdf3
 
-ARM_FUNC_START muldf3
+ARM_FUNC_START muldf3, function_section
 ARM_FUNC_ALIAS aeabi_dmul muldf3
 	do_push	{r4, r5, r6, lr}
 
@@ -571,7 +571,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
 	COND(and,s,ne)	r5, ip, yh, lsr #20
 	teqne	r4, ip
 	teqne	r5, ip
-	bleq	LSYM(Lml_s)
+	bleq	__Lml_s
 
 	@ Add exponents together
 	add	r4, r4, r5
@@ -689,7 +689,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
 	subs	ip, r4, #(254 - 1)
 	do_it	hi
 	cmphi	ip, #0x700
-	bhi	LSYM(Lml_u)
+	bhi	__Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	lr, #0x80000000
@@ -716,9 +716,12 @@ LSYM(Lml_1):
 	mov	lr, #0
 	subs	r4, r4, #1
 
-LSYM(Lml_u):
+	FUNC_END aeabi_dmul
+	FUNC_END muldf3
+
+ARM_SYM_START __Lml_u
 	@ Overflow?
-	bgt	LSYM(Lml_o)
+	bgt	__Lml_o
 
 	@ Check if denormalized result is possible, otherwise return signed 0.
 	cmn	r4, #(53 + 1)
@@ -778,10 +781,11 @@ LSYM(Lml_u):
 	do_it	eq
 	biceq	xl, xl, r3, lsr #31
 	RETLDM	"r4, r5, r6"
+	SYM_END __Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START __Lml_d
 	teq	r4, #0
 	bne	2f
 	and	r6, xh, #0x80000000
@@ -804,8 +808,9 @@ LSYM(Lml_d):
 	beq	3b
 	orr	yh, yh, r6
 	RET
+	SYM_END __Lml_d
 
-LSYM(Lml_s):
+ARM_SYM_START __Lml_s
 	@ Isolate the INF and NAN cases away
 	teq	r4, ip
 	and	r5, ip, yh, lsr #20
@@ -817,10 +822,11 @@ LSYM(Lml_s):
 	orrs	r6, xl, xh, lsl #1
 	do_it	ne
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	bne	LSYM(Lml_d)
+	bne	__Lml_d
+	SYM_END __Lml_s
 
 	@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START __Lml_z
 	eor	xh, xh, yh
 	and	xh, xh, #0x80000000
 	mov	xl, #0
@@ -832,41 +838,42 @@ LSYM(Lml_z):
 	moveq	xl, yl
 	moveq	xh, yh
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	beq	__Lml_n		@ 0 * INF or INF * 0 -> NAN
 	teq	r4, ip
 	bne	1f
 	orrs	r6, xl, xh, lsl #12
-	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+	bne	__Lml_n		@ NAN * <anything> -> NAN
 1:	teq	r5, ip
-	bne	LSYM(Lml_i)
+	bne	__Lml_i
 	orrs	r6, yl, yh, lsl #12
 	do_it	ne, t
 	movne	xl, yl
 	movne	xh, yh
-	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+	bne	__Lml_n		@ <anything> * NAN -> NAN
+	SYM_END __Lml_z
 
 	@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START __Lml_i
 	eor	xh, xh, yh
+	SYM_END __Lml_i
 
 	@ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
+ARM_SYM_START __Lml_o
 	and	xh, xh, #0x80000000
 	orr	xh, xh, #0x7f000000
 	orr	xh, xh, #0x00f00000
 	mov	xl, #0
 	RETLDM	"r4, r5, r6"
+	SYM_END __Lml_o
 
 	@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START __Lml_n
 	orr	xh, xh, #0x7f000000
 	orr	xh, xh, #0x00f80000
 	RETLDM	"r4, r5, r6"
+	SYM_END __Lml_n
 
-	FUNC_END aeabi_dmul
-	FUNC_END muldf3
-
-ARM_FUNC_START divdf3
+ARM_FUNC_START divdf3 function_section
 ARM_FUNC_ALIAS aeabi_ddiv divdf3
 	
 	do_push	{r4, r5, r6, lr}
@@ -985,7 +992,7 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
 	subs	ip, r4, #(254 - 1)
 	do_it	hi
 	cmphi	ip, #0x700
-	bhi	LSYM(Lml_u)
+	bhi	__Lml_u
 
 	@ Round the result, merge final exponent.
 	subs	ip, r5, yh
@@ -1009,13 +1016,13 @@ LSYM(Ldv_1):
 	orr	xh, xh, #0x00100000
 	mov	lr, #0
 	subs	r4, r4, #1
-	b	LSYM(Lml_u)
+	b	__Lml_u
 
 	@ Result mightt need to be denormalized: put remainder bits
 	@ in lr for rounding considerations.
 LSYM(Ldv_u):
 	orr	lr, r5, r6
-	b	LSYM(Lml_u)
+	b	__Lml_u
 
 	@ One or both arguments is either INF, NAN or zero.
 LSYM(Ldv_s):
@@ -1023,34 +1030,34 @@ LSYM(Ldv_s):
 	teq	r4, ip
 	do_it	eq
 	teqeq	r5, ip
-	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
+	beq	__Lml_n		@ INF/NAN / INF/NAN -> NAN
 	teq	r4, ip
 	bne	1f
 	orrs	r4, xl, xh, lsl #12
-	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	bne	__Lml_n		@ NAN / <anything> -> NAN
 	teq	r5, ip
-	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	bne	__Lml_i		@ INF / <anything> -> INF
 	mov	xl, yl
 	mov	xh, yh
-	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+	b	__Lml_n		@ INF / (INF or NAN) -> NAN
 1:	teq	r5, ip
 	bne	2f
 	orrs	r5, yl, yh, lsl #12
-	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	beq	__Lml_z		@ <anything> / INF -> 0
 	mov	xl, yl
 	mov	xh, yh
-	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+	b	__Lml_n		@ <anything> / NAN -> NAN
 2:	@ If both are nonzero, we need to normalize and resume above.
 	orrs	r6, xl, xh, lsl #1
 	do_it	ne
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	bne	LSYM(Lml_d)
+	bne	__Lml_d
 	@ One or both arguments are 0.
 	orrs	r4, xl, xh, lsl #1
-	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bne	__Lml_i		@ <non_zero> / 0 -> INF
 	orrs	r5, yl, yh, lsl #1
-	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
-	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+	bne	__Lml_z		@ 0 / <non_zero> -> 0
+	b	__Lml_n		@ 0 / 0 -> NAN
 
 	FUNC_END aeabi_ddiv
 	FUNC_END divdf3
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
index bc44d4e..7c2ab8b 100644
--- a/libgcc/config/arm/ieee754-sf.S
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -418,7 +418,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
 
 #ifdef L_arm_muldivsf3
 
-ARM_FUNC_START mulsf3
+ARM_FUNC_START mulsf3, function_section
 ARM_FUNC_ALIAS aeabi_fmul mulsf3
 
 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -428,7 +428,7 @@ ARM_FUNC_ALIAS aeabi_fmul mulsf3
 	COND(and,s,ne)	r3, ip, r1, lsr #23
 	teqne	r2, ip
 	teqne	r3, ip
-	beq	LSYM(Lml_s)
+	beq	__Lml_s
 LSYM(Lml_x):
 
 	@ Add exponents together
@@ -490,7 +490,7 @@ LSYM(Lml_x):
 	@ Apply exponent bias, check for under/overflow.
 	sbc	r2, r2, #127
 	cmp	r2, #(254 - 1)
-	bhi	LSYM(Lml_u)
+	bhi	__Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	r3, #0x80000000
@@ -518,9 +518,12 @@ LSYM(Lml_1):
 	mov	r3, #0
 	subs	r2, r2, #1
 
-LSYM(Lml_u):
+	FUNC_END aeabi_fmul
+	FUNC_END mulsf3
+
+ARM_SYM_START __Lml_u
 	@ Overflow?
-	bgt	LSYM(Lml_o)
+	bgt	__Lml_o
 
 	@ Check if denormalized result is possible, otherwise return signed 0.
 	cmn	r2, #(24 + 1)
@@ -540,10 +543,11 @@ LSYM(Lml_u):
 	do_it	eq
 	biceq	r0, r0, ip, lsr #31
 	RET
+	SYM_END __Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START __Lml_d
 	teq	r2, #0
 	and	ip, r0, #0x80000000
 1:	do_it	eq, tt
@@ -561,8 +565,9 @@ LSYM(Lml_d):
 	beq	2b
 	orr	r1, r1, ip
 	b	LSYM(Lml_x)
+	SYM_END __Lml_d
 
-LSYM(Lml_s):
+ARM_SYM_START __Lml_s
 	@ Isolate the INF and NAN cases away
 	and	r3, ip, r1, lsr #23
 	teq	r2, ip
@@ -574,10 +579,11 @@ LSYM(Lml_s):
 	bics	ip, r0, #0x80000000
 	do_it	ne
 	COND(bic,s,ne)	ip, r1, #0x80000000
-	bne	LSYM(Lml_d)
+	bne	__Lml_d
+	SYM_END __Lml_s
 
 	@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START __Lml_z
 	eor	r0, r0, r1
 	bic	r0, r0, #0x7fffffff
 	RET
@@ -589,39 +595,41 @@ LSYM(Lml_z):
 	moveq	r0, r1
 	teqne	r1, #0x0
 	teqne	r1, #0x80000000
-	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	beq	__Lml_n		@ 0 * INF or INF * 0 -> NAN
 	teq	r2, ip
 	bne	1f
 	movs	r2, r0, lsl #9
-	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+	bne	__Lml_n		@ NAN * <anything> -> NAN
 1:	teq	r3, ip
-	bne	LSYM(Lml_i)
+	bne	__Lml_i
 	movs	r3, r1, lsl #9
 	do_it	ne
 	movne	r0, r1
-	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+	bne	__Lml_n		@ <anything> * NAN -> NAN
+	SYM_END __Lml_z
 
 	@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START __Lml_i
 	eor	r0, r0, r1
+	SYM_END __Lml_i
 
 	@ Overflow: return INF (sign already in r0).
-LSYM(Lml_o):
+ARM_SYM_START __Lml_o
 	and	r0, r0, #0x80000000
 	orr	r0, r0, #0x7f000000
 	orr	r0, r0, #0x00800000
 	RET
+	SYM_END __Lml_o
 
 	@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START __Lml_n
 	orr	r0, r0, #0x7f000000
 	orr	r0, r0, #0x00c00000
 	RET
+	SYM_END __Lml_n
 
-	FUNC_END aeabi_fmul
-	FUNC_END mulsf3
 
-ARM_FUNC_START divsf3
+ARM_FUNC_START divsf3 function_section
 ARM_FUNC_ALIAS aeabi_fdiv divsf3
 
 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -684,7 +692,7 @@ LSYM(Ldv_x):
 
 	@ Check exponent for under/overflow.
 	cmp	r2, #(254 - 1)
-	bhi	LSYM(Lml_u)
+	bhi	__Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	r3, r1
@@ -706,7 +714,7 @@ LSYM(Ldv_1):
 	orr	r0, r0, #0x00800000
 	mov	r3, #0
 	subs	r2, r2, #1
-	b	LSYM(Lml_u)
+	b	__Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
@@ -735,17 +743,17 @@ LSYM(Ldv_s):
 	teq	r2, ip
 	bne	1f
 	movs	r2, r0, lsl #9
-	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	bne	__Lml_n		@ NAN / <anything> -> NAN
 	teq	r3, ip
-	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	bne	__Lml_i		@ INF / <anything> -> INF
 	mov	r0, r1
-	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+	b	__Lml_n		@ INF / (INF or NAN) -> NAN
 1:	teq	r3, ip
 	bne	2f
 	movs	r3, r1, lsl #9
-	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	beq	__Lml_z		@ <anything> / INF -> 0
 	mov	r0, r1
-	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+	b	__Lml_n		@ <anything> / NAN -> NAN
 2:	@ If both are nonzero, we need to normalize and resume above.
 	bics	ip, r0, #0x80000000
 	do_it	ne
@@ -753,10 +761,10 @@ LSYM(Ldv_s):
 	bne	LSYM(Ldv_d)
 	@ One or both arguments are zero.
 	bics	r2, r0, #0x80000000
-	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bne	__Lml_i		@ <non_zero> / 0 -> INF
 	bics	r3, r1, #0x80000000
-	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
-	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+	bne	__Lml_z		@ 0 / <non_zero> -> 0
+	b	__Lml_n		@ 0 / 0 -> NAN
 
 	FUNC_END aeabi_fdiv
 	FUNC_END divsf3


Is this ok for trunk?

Best regards,

Thomas




More information about the Gcc-patches mailing list