This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Better sequence for fp->unsigned truncation
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org, rth at redhat dot com
- Date: Wed, 19 Feb 2003 13:14:18 +0100
- Subject: Better sequence for fp->unsigned truncation
Hi,
the following testcase
unsigned long a;
float b;
t ()
{
a = b;
}
Currently expands on x86-64 into
t:
.LFB3:
flds b(%rip)
flds .LC0(%rip)
fxch %st(1)
fucomi %st(1), %st
jae .L2
fstp %st(1)
fnstcw -2(%rsp)
movzwl -2(%rsp), %eax
orb $12, %ah
movw %ax, -4(%rsp)
fldcw -4(%rsp)
fistpll -16(%rsp)
fldcw -2(%rsp)
movq -16(%rsp), %rdx
movq %rdx, a(%rip)
ret
.p2align 4,,7
.L2:
fnstcw -2(%rsp)
fsubp %st, %st(1)
movzwl -2(%rsp), %eax
orb $12, %ah
movw %ax, -4(%rsp)
movabsq $-9223372036854775808, %rax
fldcw -4(%rsp)
fistpll -16(%rsp)
fldcw -2(%rsp)
movq -16(%rsp), %rdx
xorq %rax, %rdx
movq %rdx, a(%rip)
ret
While I believe that the following code is equivalent:
t:
.LFB3:
movss b(%rip), %xmm0
movss .LC0(%rip), %xmm1
ucomiss %xmm1, %xmm0
jae .L2
cvttss2siq %xmm0, %rdx
movq %rdx, a(%rip)
ret
.p2align 4,,7
.L2:
subss %xmm1, %xmm0
movabsq $-9223372036854775808, %rax
cvttss2siq %xmm0, %rdx
xorq %rax, %rdx
movq %rdx, a(%rip)
ret
The same sequence is already used by mips backend. I've added explanatory
comment into the patch on why I think this is valid.
Honza
Wed Feb 19 13:09:51 CET 2003 Jan Hubicka <jh at suse dot cz>
* optabs.c (expand_fix): Do not widen the input operand.
Index: optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.150.2.6
diff -c -3 -p -r1.150.2.6 optabs.c
*** optabs.c 16 Feb 2003 18:50:06 -0000 1.150.2.6
--- optabs.c 19 Feb 2003 12:09:37 -0000
*************** expand_fix (to, from, unsignedp)
*** 4787,4801 ****
one plus the highest signed number, convert, and add it back.
We only need to check all real modes, since we know we didn't find
! anything with a wider integer mode. */
if (unsignedp && GET_MODE_BITSIZE (GET_MODE (to)) <= HOST_BITS_PER_WIDE_INT)
for (fmode = GET_MODE (from); fmode != VOIDmode;
fmode = GET_MODE_WIDER_MODE (fmode))
! /* Make sure we won't lose significant bits doing this. */
! if (GET_MODE_BITSIZE (fmode) > GET_MODE_BITSIZE (GET_MODE (to))
! && CODE_FOR_nothing != can_fix_p (GET_MODE (to), fmode, 0,
! &must_trunc))
{
int bitsize;
REAL_VALUE_TYPE offset;
--- 4787,4812 ----
one plus the highest signed number, convert, and add it back.
We only need to check all real modes, since we know we didn't find
! anything with a wider integer mode.
!
! This code used to extend FP value into mode wider than the destination.
! This is not needed. Consider, for instance conversion from SFmode
! into DImode.
!
! The hot path trought the code is dealing with inputs smaller than 2^63
! and doing just the conversion, so there is no bits to lose.
!
! In the other path we know the value is positive in the range 2^63..2^64-1
! inclusive. (as for other imput overflow happens and result is undefined)
! So we know that the most important bit set in mantisa corresponds to
! 2^63. The subtraction of 2^63 should not generate any rounding as it
! simply clears out that bit. The rest is trivial. */
if (unsignedp && GET_MODE_BITSIZE (GET_MODE (to)) <= HOST_BITS_PER_WIDE_INT)
for (fmode = GET_MODE (from); fmode != VOIDmode;
fmode = GET_MODE_WIDER_MODE (fmode))
! if (CODE_FOR_nothing != can_fix_p (GET_MODE (to), fmode, 0,
! &must_trunc))
{
int bitsize;
REAL_VALUE_TYPE offset;