This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH] [1/2] [ARM] [libgcc] Support RTABI half-precision conversion functions.

From: "Hale Wang" <hale dot wang at arm dot com>
To: "'GCC Patches'" <gcc-patches at gcc dot gnu dot org>
Date: Fri, 10 Apr 2015 12:26:54 +0800
Subject: [PATCH] [1/2] [ARM] [libgcc] Support RTABI half-precision conversion functions.
Authentication-results: sourceware.org; auth=none
This patch is used to support the RTABI functions (__aeabi_f2h,
__aeabi_f2h_alt, __aeabi_h2f, __aeabi_h2f_alt) in libgcc.

According to the run-time ABI for ARM architecture, this function is allowed
to corrupt only the integer core registers permitted to be corrupted by the
[AAPCS] (r0-r3, ip, lr, and CPSR). So we can't just simply use the existing
GNU conversion functions to implement this function.

This patch divides this issue into two cases:
1. For the targets with FP registers, implement this function by assembly
code. The assembly code is generated by compile the "fp16.c" with the option
'-mfloat-abi=soft' which means no FP registers will be used. Both the thumb
mode and arm mode versions are supported. 
2. For the targets without FP registers, use aliases of the GNU functions to
implement this function.

Another patch will be sent out later to include some test cases (possibly
also support an option to switch between GNU and RTABI versions). 

GCC build passed. The arm mode and thumb mode of the conversion functions
can be linked correctly with different arm targets.
I have modified the gcc source code locally to replace the __gnu_h2f with
__aeabi_h2f(same for _f2h) and disabled the hardware instructions for fp16
conversion(so the software implementation will be used), and all regression
test(for thumb, thumb2 and arm targets) for fp16 are all passed.

Is it OK for stage 1?

libgcc/ChangeLog:

2015-04-10  Hale Wang  <hale.wang@arm.com>

	* config/arm/fp16.c (__aeabi_f2h, __aeabi_f2h_alt, __aeabi_h2f)
	(__aeabi_h2f_alt): New aliases to GNU functions if __SOFTFP__ is
	defined.
	* config/arm/lib1funcs.S (aeabi_fp16-thumb2.S, aeabi_fp16-thumb.S)
	(aeabi_fp16-arm.S): Include new files if __SOFTFP__ is undefined.
	* config/arm/libgcc-bpabi.ver (__aeabi_f2h, __aeabi_f2h_alt)
	(__aeabi_h2f, __aeabi_h2f_alt) : Set versions.
	* config/arm/t-elf (_arm_truncsfhf2, _arm_extendhfsf2) : New asm
	implementations.
	* config/arm/t-symbian (_arm_truncsfhf2, _arm_extendhfsf2) :
Likewise.
	* config/arm/aeabi_fp16-arm.S (__aeabi_f2h, __aeabi_f2h_alt)
	(__aeabi_h2f, __aeabi_h2f_alt) : New assembly implementations for
	arm instruction set.
	* config/arm/aeabi_fp16-thumb.S (__aeabi_f2h, __aeabi_f2h_alt)
	(__aeabi_h2f, __aeabi_h2f_alt) : New assembly implementations for
	thumb instruction set.
	* config/arm/aeabi_fp16-thumb2.S (__aeabi_f2h, __aeabi_f2h_alt)
	(__aeabi_h2f, __aeabi_h2f_alt) : New assembly implementations for
	thumb2 instruction set.

diff --git a/libgcc/config/arm/aeabi_fp16-arm.S
b/libgcc/config/arm/aeabi_fp16-arm.S
new file mode 100644
index 0000000..7838a48
--- /dev/null
+++ b/libgcc/config/arm/aeabi_fp16-arm.S
@@ -0,0 +1,240 @@
+/* Half-precision floating point conversion routines for ARM.
+
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Half-precision conversion routines for arm mode.  */
+
+#ifdef L_arm_truncsfhf2
+
+ARM_FUNC_START truncsfhf2
+ARM_FUNC_ALIAS aeabi_f2h truncsfhf2
+	mov	r2, r0, lsr #23
+	and	r2, r2, #255
+	bic	r3, r0, #-16777216
+	and	r1, r0, #-2147483648
+	cmp	r2, #255
+	mov	r0, r1, lsr #16
+	bic	r3, r3, #8388608
+	beq	4f
+	cmp	r2, #0
+	cmpeq	r3, #0
+	bxeq	lr
+	sub	ip, r2, #127
+	cmn	ip, #14
+	str	lr, [sp, #-4]!
+	orr	r3, r3, #8388608
+	ldrge	r1, 5f
+	bge	1f
+	mvn	r1, #-16777216
+	cmn	ip, #25
+	subge	lr, r2, #102
+	movge	r1, r1, lsr lr
+1:
+	ands	lr, r3, r1
+	beq	2f
+	add	r1, r1, #1
+	mov	r1, r1, lsr #1
+	cmp	lr, r1
+	andeq	r1, r3, r1, asl #1
+	add	r3, r3, r1
+	cmp	r3, #16777216
+	movcs	r3, r3, lsr #1
+	subcs	ip, r2, #126
+2:
+	cmp	ip, #15
+	orrgt	r0, r0, #31744
+	bgt	3f
+	cmn	ip, #24
+	blt	3f
+	cmn	ip, #14
+	mvnlt	r2, #13
+	movlt	r1, #0
+	addge	ip, ip, #14
+	movge	ip, ip, asl #26
+	rsblt	r2, ip, r2
+	movge	r1, ip, lsr #16
+	movlt	r3, r3, lsr r2
+	add	r3, r1, r3, lsr #13
+	orr	r0, r0, r3
+	mov	r3, r0, asl #16
+	mov	r0, r3, lsr #16
+3:
+	ldr	lr, [sp], #4
+	bx	lr
+4:
+	orr	r3, r0, r3, lsr #13
+	orr	r0, r3, #32256
+	bx	lr
+
+	.align	2
+5:
+	.word	8191
+	FUNC_END aeabi_f2h
+	FUNC_END truncsfhf2
+
+ARM_FUNC_START aeabi_f2h_alt
+	mov	r2, r0, lsr #23
+	and	r2, r2, #255
+	and	r1, r0, #-2147483648
+	cmp	r2, #255
+	mov	r1, r1, lsr #16
+	beq	4f
+	bic	r3, r0, #-16777216
+	bics	r3, r3, #8388608
+	moveq	r0, #1
+	movne	r0, #0
+	cmp	r2, #0
+	movne	r0, #0
+	andeq	r0, r0, #1
+	cmp	r0, #0
+	bne	4f
+	sub	ip, r2, #127
+	cmn	ip, #14
+	str	lr, [sp, #-4]!
+	orr	r3, r3, #8388608
+	ldrge	r0, 6f
+	bge	1f
+	mvn	r0, #-16777216
+	cmn	ip, #25
+	subge	lr, r2, #102
+	movge	r0, r0, lsr lr
+1:
+	ands	lr, r3, r0
+	beq	2f
+	add	r0, r0, #1
+	mov	r0, r0, lsr #1
+	cmp	lr, r0
+	andeq	r0, r3, r0, asl #1
+	add	r3, r3, r0
+	cmp	r3, #16777216
+	movcs	r3, r3, lsr #1
+	subcs	ip, r2, #126
+2:
+	cmp	ip, #16
+	bgt	5f
+	cmn	ip, #24
+	movlt	r0, r1
+	blt	3f
+	cmn	ip, #14
+	mvnlt	r2, #13
+	movlt	r0, #0
+	addge	ip, ip, #14
+	movge	ip, ip, asl #26
+	rsblt	r2, ip, r2
+	movge	r0, ip, lsr #16
+	movlt	r3, r3, lsr r2
+	add	r0, r0, r3, lsr #13
+	orr	r0, r1, r0
+	mov	r0, r0, asl #16
+	mov	r0, r0, lsr #16
+3:
+	ldr	lr, [sp], #4
+	bx	lr
+4:
+	mov	r0, r1
+	bx	lr
+5:
+	mvn	r0, r1, lsr #15
+	mvn	r0, r0, asl #15
+	mov	r0, r0, asl #16
+	mov	r0, r0, lsr #16
+	ldr	lr, [sp], #4
+	bx	lr
+
+	.align	2
+6:
+	.word	8191
+	FUNC_END aeabi_f2h_alt
+
+#endif /* L_truncsfhf2 */
+
+#ifdef L_arm_extendhfsf2
+
+ARM_FUNC_START extendhfsf2
+ARM_FUNC_ALIAS aeabi_h2f extendhfsf2
+	mov	r3, r0, lsr #10
+	and	r3, r3, #31
+	stmfd	sp!, {r4, r5, r6, lr}
+	cmp	r3, #31
+	and	r5, r0, #32768
+	mov	r0, r0, asl #22
+	mov	r5, r5, asl #16
+	mov	r4, r0, lsr #22
+	beq	4f
+	cmp	r3, #0
+	beq	3f
+1:
+	add	r2, r3, #112
+	mov	r3, r4, asl #13
+	add	r4, r3, r2, lsl #23
+	orr	r0, r5, r4
+2:
+	ldmfd	sp!, {r4, r5, r6, lr}
+	bx	lr
+3:
+	cmp	r4, #0
+	moveq	r0, r5
+	beq	2b
+	mov	r0, r4
+	bl	__clzsi2
+	sub	r0, r0, #21
+	mov	r4, r4, asl r0
+	rsb	r3, r0, #0
+	b	1b
+4:
+	orr	r0, r5, r4, asl #13
+	orr	r5, r0, #2130706432
+	orr	r0, r5, #8388608
+	ldmfd	sp!, {r4, r5, r6, lr}
+	bx	lr
+	FUNC_END aeabi_h2f
+	FUNC_END extendhfsf2
+
+ARM_FUNC_START aeabi_h2f_alt
+	stmfd	sp!, {r4, r5, r6, lr}
+	mov	r4, r0, lsr #10
+	ands	r3, r4, #31
+	and	r2, r0, #32768
+	mov	r4, r0, asl #22
+	mov	r5, r2, asl #16
+	mov	r4, r4, lsr #22
+	bne	1f
+	cmp	r4, #0
+	moveq	r0, r5
+	beq	2f
+	mov	r0, r4
+	bl	__clzsi2
+	sub	r0, r0, #21
+	mov	r4, r4, asl r0
+	rsb	r3, r0, #0
+1:
+	add	r2, r3, #112
+	mov	r3, r4, asl #13
+	add	r4, r3, r2, lsl #23
+	orr	r0, r5, r4
+2:
+	ldmfd	sp!, {r4, r5, r6, lr}
+	bx	lr
+	FUNC_END aeabi_h2f_alt
+
+#endif /* L_extendhfsf2 */
diff --git a/libgcc/config/arm/aeabi_fp16-thumb.S
b/libgcc/config/arm/aeabi_fp16-thumb.S
new file mode 100644
index 0000000..068e4f8
--- /dev/null
+++ b/libgcc/config/arm/aeabi_fp16-thumb.S
@@ -0,0 +1,343 @@
+/* Half-precision floating point conversion routines for ARM.
+
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Half-precision conversion routines for thumb1 instruction set.  */
+
+#ifdef L_arm_truncsfhf2
+
+ARM_FUNC_START truncsfhf2
+ARM_FUNC_ALIAS aeabi_f2h truncsfhf2
+	lsrs	r2, r0, #31
+	lsls	r3, r0, #1
+	lsls	r2, r2, #31
+	lsls	r0, r0, #9
+	push	{r4, r5, lr}
+	lsrs	r2, r2, #16
+	lsrs	r5, r3, #24
+	lsrs	r4, r0, #9
+	cmp	r5, #255
+	beq	6f
+	cmp	r5, #0
+	bne	2f
+	movs	r0, r2
+	cmp	r4, #0
+	bne	2f
+1:
+	pop	{r4, r5}
+	pop	{r1}
+	bx	r1
+2:
+	movs	r1, r5
+	movs	r3, #128
+	subs	r1, r1, #127
+	movs	r0, r1
+	lsls	r3, r3, #16
+	orrs	r3, r4
+	adds	r0, r0, #14
+	bge	7f
+	movs	r0, r1
+	adds	r0, r0, #25
+	blt	12f
+	movs	r4, r5
+	ldr	r0, 15f
+	subs	r4, r4, #102
+	lsrs	r0, r0, r4
+	movs	r4, r0
+	ands	r4, r3
+	bne	9f
+3:
+	cmp	r1, #15
+	bgt	11f
+4:
+	movs	r4, r1
+	movs	r0, r2
+	adds	r4, r4, #24
+	blt	1b
+	movs	r0, r1
+	adds	r0, r0, #14
+	blt	14f
+	adds	r1, r1, #14
+	lsls	r0, r1, #26
+	lsrs	r0, r0, #16
+5:
+	lsrs	r3, r3, #13
+	adds	r0, r0, r3
+	orrs	r0, r2
+	lsls	r0, r0, #16
+	lsrs	r0, r0, #16
+	b	1b
+6:
+	movs	r3, #252
+	lsrs	r0, r0, #22
+	orrs	r0, r2
+	lsls	r3, r3, #7
+	orrs	r0, r3
+	b	1b
+7:
+	ldr	r0, 15f+4
+8:
+	movs	r4, r0
+	ands	r4, r3
+	beq	3b
+9:
+	adds	r0, r0, #1
+	lsrs	r0, r0, #1
+	cmp	r4, r0
+	beq	13f
+10:
+	adds	r3, r3, r0
+	ldr	r0, 15f
+	cmp	r3, r0
+	bls	3b
+	movs	r1, r5
+	subs	r1, r1, #126
+	lsrs	r3, r3, #1
+	cmp	r1, #15
+	ble	4b
+11:
+	movs	r3, #248
+	lsls	r3, r3, #7
+	orrs	r3, r2
+	movs	r0, r3
+	b	1b
+12:
+	ldr	r0, 15f
+	b	8b
+13:
+	lsls	r0, r0, #1
+	ands	r0, r3
+	b	10b
+14:
+	movs	r0, #14
+	rsbs	r0, r0, #0
+	subs	r1, r0, r1
+	lsrs	r3, r3, r1
+	movs	r0, #0
+	b	5b
+
+	.align	2
+15:
+	.word	16777215
+	.word	8191
+
+	FUNC_END aeabi_f2h
+	FUNC_END truncsfhf2
+
+ARM_FUNC_START aeabi_f2h_alt
+	lsrs	r3, r0, #31
+	lsls	r3, r3, #31
+	lsls	r2, r0, #1
+	push	{r4, r5, lr}
+	lsrs	r1, r3, #16
+	lsrs	r2, r2, #24
+	cmp	r2, #255
+	beq	6f
+	lsls	r3, r0, #9
+	lsrs	r3, r3, #9
+	cmp	r2, #0
+	bne	2f
+	movs	r0, r1
+	cmp	r3, #0
+	bne	2f
+1:
+	pop	{r4, r5}
+	pop	{r1}
+	bx	r1
+2:
+	movs	r4, r2
+	movs	r0, #128
+	subs	r4, r4, #127
+	lsls	r0, r0, #16
+	orrs	r3, r0
+	movs	r0, r4
+	adds	r0, r0, #14
+	bge	7f
+	movs	r0, r4
+	adds	r0, r0, #25
+	blt	12f
+	movs	r5, r2
+	ldr	r0, 15f
+	subs	r5, r5, #102
+	lsrs	r0, r0, r5
+	movs	r5, r0
+	ands	r5, r3
+	bne	9f
+3:
+	cmp	r4, #16
+	bgt	11f
+4:
+	movs	r2, r4
+	movs	r0, r1
+	adds	r2, r2, #24
+	blt	1b
+	movs	r2, r4
+	adds	r2, r2, #14
+	blt	14f
+	movs	r0, r4
+	adds	r0, r0, #14
+	lsls	r0, r0, #26
+	lsrs	r0, r0, #16
+5:
+	lsrs	r2, r3, #13
+	adds	r0, r0, r2
+	orrs	r0, r1
+	lsls	r0, r0, #16
+	lsrs	r0, r0, #16
+	b	1b
+6:
+	movs	r0, r1
+	b	1b
+7:
+	ldr	r0, 15f+4
+8:
+	movs	r5, r0
+	ands	r5, r3
+	beq	3b
+9:
+	adds	r0, r0, #1
+	lsrs	r0, r0, #1
+	cmp	r5, r0
+	beq	13f
+10:
+	adds	r3, r3, r0
+	ldr	r0, 15f
+	cmp	r3, r0
+	bls	3b
+	subs	r2, r2, #126
+	lsrs	r3, r3, #1
+	subs	r4, r2, #0
+	cmp	r4, #16
+	ble	4b
+11:
+	ldr	r0, 15f+8
+	orrs	r0, r1
+	b	1b
+12:
+	ldr	r0, 15f
+	b	8b
+13:
+	lsls	r0, r0, #1
+	ands	r0, r3
+	b	10b
+14:
+	movs	r2, #14
+	rsbs	r2, r2, #0
+	subs	r4, r2, r4
+	lsrs	r3, r3, r4
+	movs	r0, #0
+	b	5b
+
+	.align	2
+15:
+	.word	16777215
+	.word	8191
+	.word	32767
+
+	FUNC_END aeabi_f2h_alt
+
+#endif /* L_truncsfhf2 */
+
+#ifdef L_arm_extendhfsf2
+
+ARM_FUNC_START extendhfsf2
+ARM_FUNC_ALIAS aeabi_h2f extendhfsf2
+	push	{r4, r5, r6, lr}
+	lsls	r3, r0, #17
+	lsrs	r4, r0, #15
+	lsls	r0, r0, #22
+	lsls	r4, r4, #31
+	lsrs	r3, r3, #27
+	lsrs	r5, r0, #22
+	cmp	r3, #31
+	beq	4f
+	cmp	r3, #0
+	beq	3f
+1:
+	movs	r0, r4
+	adds	r3, r3, #112
+	lsls	r3, r3, #23
+	lsls	r5, r5, #13
+	adds	r3, r3, r5
+	orrs	r0, r3
+2:
+	pop	{r4, r5, r6}
+	pop	{r1}
+	bx	r1
+3:
+	movs	r0, r4
+	cmp	r5, #0
+	beq	2b
+	movs	r0, r5
+	bl	__clzsi2
+	movs	r3, r0
+	subs	r3, r3, #21
+	lsls	r5, r5, r3
+	rsbs	r3, r3, #0
+	b	1b
+4:
+	movs	r3, #255
+	lsls	r0, r5, #13
+	orrs	r4, r0
+	lsls	r3, r3, #23
+	orrs	r3, r4
+	movs	r0, r3
+	b	2b
+
+	FUNC_END aeabi_h2f
+	FUNC_END extendhfsf2
+
+ARM_FUNC_START aeabi_h2f_alt
+	push	{r4, r5, r6, lr}
+	lsls	r3, r0, #17
+	lsrs	r4, r0, #15
+	lsls	r0, r0, #22
+	lsls	r4, r4, #31
+	lsrs	r3, r3, #27
+	lsrs	r5, r0, #22
+	cmp	r3, #0
+	bne	1f
+	movs	r0, r4
+	cmp	r5, #0
+	beq	2f
+	movs	r0, r5
+	bl	__clzsi2
+	movs	r3, r0
+	subs	r3, r3, #21
+	lsls	r5, r5, r3
+	rsbs	r3, r3, #0
+1:
+	movs	r0, r4
+	adds	r3, r3, #112
+	lsls	r3, r3, #23
+	lsls	r5, r5, #13
+	adds	r3, r3, r5
+	orrs	r0, r3
+2:
+	pop	{r4, r5, r6}
+	pop	{r1}
+	bx	r1
+
+	FUNC_END aeabi_h2f_alt
+
+#endif /* L_extendhfsf2 */
diff --git a/libgcc/config/arm/aeabi_fp16-thumb2.S
b/libgcc/config/arm/aeabi_fp16-thumb2.S
new file mode 100644
index 0000000..12fe90c
--- /dev/null
+++ b/libgcc/config/arm/aeabi_fp16-thumb2.S
@@ -0,0 +1,230 @@
+/* Half-precision floating point conversion routines for ARM.
+
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Half-precision conversion routines for thumb2 instruction set.  */
+
+#ifdef L_arm_truncsfhf2
+
+ARM_FUNC_START truncsfhf2
+ARM_FUNC_ALIAS aeabi_f2h truncsfhf2
+	lsrs	r3, r0, #16
+	and	r3, r3, #32768
+	push	{r4, r5}
+	ubfx	r4, r0, #23, #8
+	uxth	r2, r3
+	cmp	r4, #255
+	ubfx	r3, r0, #0, #23
+	itt	eq
+	orreq	r3, r2, r3, lsr #13
+	orreq	r0, r3, #32256
+	beq	3f
+	cmp	r4, #0
+	it	eq
+	cmpeq	r3, #0
+	beq	4f
+	sub	r1, r4, #127
+	cmn	r1, #14
+	orr	r3, r3, #8388608
+	it	ge
+	movwge	r0, #8191
+	bge	1f
+	cmn	r1, #25
+	ittte	ge
+	subge	r5, r4, #102
+	mvnge	r0, #-16777216
+	lsrge	r0, r0, r5
+	mvnlt	r0, #-16777216
+1:
+	ands	r5, r3, r0
+	beq	2f
+	adds	r0, r0, #1
+	lsrs	r0, r0, #1
+	cmp	r5, r0
+	it	eq
+	andeq	r0, r3, r0, lsl #1
+	add	r3, r3, r0
+	cmp	r3, #16777216
+	itt	cs
+	lsrcs	r3, r3, #1
+	subcs	r1, r4, #126
+2:
+	cmp	r1, #15
+	itt	gt
+	orrgt	r3, r2, #31744
+	uxthgt	r0, r3
+	bgt	3f
+	cmn	r1, #24
+	blt	4f
+	cmn	r1, #14
+	ittet	lt
+	movwlt	r0, #65522
+	movtlt	r0, 65535
+	addge	r0, r1, #14
+	sublt	r0, r0, r1
+	itete	ge
+	lslge	r0, r0, #10
+	lsrlt	r3, r3, r0
+	uxthge	r0, r0
+	movlt	r0, #0
+	add	r3, r0, r3, lsr #13
+	orrs	r3, r3, r2
+	uxth	r0, r3
+3:
+	pop	{r4, r5}
+	bx	lr
+4:
+	mov	r0, r2
+	pop	{r4, r5}
+	bx	lr
+	FUNC_END aeabi_f2h
+	FUNC_END truncsfhf2
+
+ARM_FUNC_START aeabi_f2h_alt
+	push	{r4, r5}
+	lsrs	r3, r0, #16
+	ubfx	r4, r0, #23, #8
+	and	r3, r3, #32768
+	cmp	r4, #255
+	uxth	r2, r3
+	beq	3f
+	ubfx	r3, r0, #0, #23
+	cmp	r4, #0
+	it	eq
+	cmpeq	r3, #0
+	beq	3f
+	sub	r1, r4, #127
+	cmn	r1, #14
+	orr	r3, r3, #8388608
+	it	ge
+	movwge	r0, #8191
+	bge	1f
+	cmn	r1, #25
+	ittte	ge
+	subge	r5, r4, #102
+	mvnge	r0, #-16777216
+	lsrge	r0, r0, r5
+	mvnlt	r0, #-16777216
+1:
+	ands	r5, r3, r0
+	beq	2f
+	adds	r0, r0, #1
+	lsrs	r0, r0, #1
+	cmp	r5, r0
+	it	eq
+	andeq	r0, r3, r0, lsl #1
+	add	r3, r3, r0
+	cmp	r3, #16777216
+	itt	cs
+	lsrcs	r3, r3, #1
+	subcs	r1, r4, #126
+2:
+	cmp	r1, #16
+	bgt	4f
+	cmn	r1, #24
+	blt	3f
+	pop	{r4, r5}
+	cmn	r1, #14
+	ittet	lt
+	movwlt	r0, #65522
+	movtlt	r0, 65535
+	addge	r0, r1, #14
+	sublt	r0, r0, r1
+	itete	ge
+	lslge	r0, r0, #10
+	lsrlt	r3, r3, r0
+	uxthge	r0, r0
+	movlt	r0, #0
+	add	r0, r0, r3, lsr #13
+	orrs	r0, r0, r2
+	uxth	r0, r0
+	bx	lr
+3:
+	mov	r0, r2
+	pop	{r4, r5}
+	bx	lr
+4:
+	mvn	r0, r2, lsr #15
+	pop	{r4, r5}
+	mvn	r0, r0, lsl #15
+	uxth	r0, r0
+	bx	lr
+	FUNC_END aeabi_f2h_alt
+
+#endif /* L_truncsfhf2 */
+
+#ifdef L_arm_extendhfsf2
+
+ARM_FUNC_START extendhfsf2
+ARM_FUNC_ALIAS aeabi_h2f extendhfsf2
+	ubfx	r3, r0, #10, #5
+	and	r2, r0, #32768
+	cmp	r3, #31
+	lsl	r2, r2, #16
+	ubfx	r0, r0, #0, #10
+	beq	2f
+	cbnz	r3, 1f
+	cbz	r0, 3f
+	clz	r3, r0
+	subs	r3, r3, #21
+	lsls	r0, r0, r3
+	negs	r3, r3
+1:
+	lsls	r0, r0, #13
+	adds	r3, r3, #112
+	add	r3, r0, r3, lsl #23
+	orr	r0, r2, r3
+	bx	lr
+2:
+	orr	r0, r2, r0, lsl #13
+	orr	r0, r0, #2139095040
+	bx	lr
+3:
+	mov	r0, r2
+	bx	lr
+	FUNC_END aeabi_h2f
+	FUNC_END extendhfsf2
+
+ARM_FUNC_START aeabi_h2f_alt
+	and	r3, r0, #32768
+	ubfx	r2, r0, #10, #5
+	lsls	r1, r3, #16
+	ubfx	r0, r0, #0, #10
+	cbnz	r2, 1f
+	cbz	r0, 2f
+	clz	r2, r0
+	subs	r2, r2, #21
+	lsls	r0, r0, r2
+	negs	r2, r2
+1:
+	adds	r2, r2, #112
+	lsls	r0, r0, #13
+	add	r3, r0, r2, lsl #23
+	orr	r0, r1, r3
+	bx	lr
+2:
+	mov	r0, r1
+	bx	lr
+	FUNC_END aeabi_h2f_alt
+
+#endif /* L_extendhfsf2 */
diff --git a/libgcc/config/arm/fp16.c b/libgcc/config/arm/fp16.c
index 86a6348..a8da79c 100644
--- a/libgcc/config/arm/fp16.c
+++ b/libgcc/config/arm/fp16.c
@@ -141,3 +141,28 @@ __gnu_h2f_alternative(unsigned short a)
 {
   return __gnu_h2f_internal(a, 0);
 }
+
+/* According to the run-time ABI for the ARM Architecture, the RTABI half
+   precision floating-point conversion functions are allowed to corrupt
+   only the integer core register permitted to be corrupted by the [AAPCS]
+   (r0-r3, ip, lr, and CPSR).
+
+   Therefore, we can't just simply use alias to support these functions
+   for the targets with FP register.  Instead, versions for these specific
+   targets are written in assembler (in aeabi_fp16-arm/thumb/thumb2.S).  */
+
+#if defined (__SOFTFP__)
+
+/* __SOFTFP__ is defined which means no FP register will be used in the
+   routines, this allows using aliases to make __gnu and __aeabi functions
+   the same.  */
+
+#define ALIAS(src, dst) \
+  typeof (src) dst __attribute__ ((alias (#src)));
+
+ALIAS (__gnu_f2h_ieee, __aeabi_f2h)
+ALIAS (__gnu_f2h_alternative, __aeabi_f2h_alt)
+ALIAS (__gnu_h2f_ieee, __aeabi_h2f)
+ALIAS (__gnu_h2f_alternative, __aeabi_h2f_alt)
+
+#endif
diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S
index a02238a..ec9fff7 100644
--- a/libgcc/config/arm/lib1funcs.S
+++ b/libgcc/config/arm/lib1funcs.S
@@ -1974,3 +1974,15 @@ LSYM(Lchange_\register):
 #include "bpabi-v6m.S"
 #endif /* __ARM_ARCH_6M__ */
 #endif /* !__symbian__ */
+
+#if !defined (__SOFTFP__)
+
+# if defined (__thumb2__)
+#  include "aeabi_fp16-thumb2.S"
+# elif defined (__thumb__)
+#  include "aeabi_fp16-thumb.S"
+# else
+#  include "aeabi_fp16-arm.S"
+# endif
+
+#endif
diff --git a/libgcc/config/arm/libgcc-bpabi.ver
b/libgcc/config/arm/libgcc-bpabi.ver
index a961302..1f4c54d 100644
--- a/libgcc/config/arm/libgcc-bpabi.ver
+++ b/libgcc/config/arm/libgcc-bpabi.ver
@@ -106,3 +106,9 @@ GCC_3.5 {
 GCC_4.3.0 {
   _Unwind_Backtrace
 }
+GCC_5.0.0 {
+  __aeabi_f2h
+  __aeabi_f2h_alt
+  __aeabi_h2f
+  __aeabi_h2f_alt
+}
diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf
index 9e7a317..1125484 100644
--- a/libgcc/config/arm/t-elf
+++ b/libgcc/config/arm/t-elf
@@ -6,7 +6,7 @@ LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls
_bb_init_func \
 	_call_via_rX _interwork_call_via_rX \
 	_lshrdi3 _ashrdi3 _ashldi3 \
 	_arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2
\
-	_arm_fixdfsi _arm_fixunsdfsi \
+	_arm_fixdfsi _arm_fixunsdfsi _arm_truncsfhf2 _arm_extendhfsf2\
 	_arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \
 	_arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \
 	_arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \
diff --git a/libgcc/config/arm/t-symbian b/libgcc/config/arm/t-symbian
index d573157..35cee87 100644
--- a/libgcc/config/arm/t-symbian
+++ b/libgcc/config/arm/t-symbian
@@ -1,6 +1,6 @@
 LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2
_clzdi2 _ctzsi2
 
-# These functions have __aeabi equivalents and will never be called by GCC.

+# These functions have __aeabi equivalents and will never be called by GCC.
 # By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code
being
 # used -- and we make sure that definitions are not available in
lib1funcs.S,
 # either, so they end up undefined.
@@ -10,7 +10,7 @@ LIB1ASMFUNCS += \
 	_fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \
 	_negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi
\
 	_truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \
-	_fixsfsi _fixunssfsi
+	_fixsfsi _fixunssfsi _truncsfhf2 _extendhfsf2
 
 # Include half-float helpers.
 LIB2ADD_ST += $(srcdir)/config/arm/fp16.c
Attachment: aeabi_fp16-conv.patch-3
Description: Binary data
Follow-Ups:
- Re: [PATCH] [1/2] [ARM] [libgcc] Support RTABI half-precision conversion functions.
  - From: Joseph Myers
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]