Index: gcc/config/sh/lib1funcs.asm =================================================================== RCS file: /home/gnu/cvs/gcc-3.4/gcc/gcc/config/sh/lib1funcs.asm,v retrieving revision 3.4.1.1 retrieving revision 3.4.1.1.2.1 diff -u -p -r3.4.1.1 -r3.4.1.1.2.1 --- gcc/config/sh/lib1funcs.asm 2004/05/13 06:13:30 3.4.1.1 +++ gcc/config/sh/lib1funcs.asm 2004/07/23 10:14:03 3.4.1.1.2.1 @@ -2873,3 +2873,173 @@ GLOBAL(GCC_pop_shmedia_regs_nofpu): ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) #endif /* __SH5__ == 32 */ #endif /* L_push_pop_shmedia_regs */ + +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + #define SHLL4(REG) \ + shll2 REG; \ + shll2 REG + + #define SHLR4(REG) \ + shlr2 REG; \ + shlr2 REG + + #define SHLL6(REG) \ + shll2 REG; \ + shll2 REG; \ + shll2 REG + + #define SHLR6(REG) \ + shlr2 REG; \ + shlr2 REG; \ + shlr2 REG + + #define SHLL12(REG) \ + shll8 REG; \ + SHLL4 (REG) + + #define SHLR19(REG) \ + shlr16 REG; \ + shlr2 REG; \ + shlr REG + + #define SHLL23(REG) \ + shll16 REG; \ + SHLL6 (REG); \ + shll REG + + #define SHLR24(REG) \ + shlr16 REG; \ + shlr8 REG + + #define SHLR21(REG) \ + shlr16 REG; \ + SHLR4 (REG); \ + shlr REG + + #define SHLL21(REG) \ + shll16 REG; \ + SHLL4 (REG); \ + shll REG + + #define SHLR11(REG) \ + shlr8 REG; \ + shlr2 REG; \ + shlr REG + + #define SHLR22(REG) \ + shlr16 REG; \ + SHLR6 (REG) + + #define SHLR23(REG) \ + SHLR22 (REG); \ + shlr REG + + #define SHLR31(REG) \ + shlr16 REG; \ + shlr8 REG; \ + SHLR6 (REG); \ + shlr REG + + #define SHLL31(REG) \ + shll16 REG; \ + shll8 REG; \ + SHLL6 (REG); \ + shll REG + + #define SHLR20(REG) \ + shlr16 REG; \ + SHLR4 (REG) + + #define SHLL20(REG) \ + shll16 REG; \ + SHLL4 (REG) +#endif + +#if defined(L_addsub_sf) +#include "IEEE-754/SF/addsf3.s" +#endif + +#if defined(L_compare_sf) || defined(L_eq_sf) || defined(L_ne_sf) || defined(L_gt_sf) || defined(L_ge_sf) || defined(L_lt_sf) || defined(L_le_sf) +#include "IEEE-754/SF/compsf2.s" +#endif + +#if defined(L_div_sf) +#include "IEEE-754/SF/divsf3.s" +#endif + +#if defined(L_sf_to_si) +#include "IEEE-754/SF/fixsfsi.s" +#endif + +#if defined(L_sf_to_usi) +#include "IEEE-754/SF/fixunssfsi.s" +#endif + +#if defined(L_si_to_sf) +#include "IEEE-754/SF/floatsisf.s" +#endif + +#if defined(L_usi_to_sf) +#include "IEEE-754/SF/floatunssisf.s" +#endif + +#if defined(L_mul_sf) +#include "IEEE-754/SF/mulsf3.s" +#endif + +#if defined(L_negate_sf) +#include "IEEE-754/SF/negsf2.s" +#endif + +#if defined(L_unord_sf) +#include "IEEE-754/SF/unordsf2.s" +#endif + +/* double starts here. */ +#if defined(L_sf_to_df) +#include "IEEE-754/DF/extendsfdf2.s" +#endif + +#if defined(L_df_to_sf) +#include "IEEE-754/DF/truncdfsf2.s" +#endif + +#if defined(L_addsub_df) +#include "IEEE-754/DF/adddf3.s" +#endif + +#if defined(L_df_to_si) +#include "IEEE-754/DF/fixdfsi.s" +#endif + +#if defined(L_df_to_usi) +#include "IEEE-754/DF/fixunsdfsi.s" +#endif + +#if defined(L_mul_df) +#include "IEEE-754/DF/muldf3.s" +#endif + +#if defined(L_div_df) +#include "IEEE-754/DF/divdf3.s" +#endif + +#if defined(L_unord_df) +#include "IEEE-754/DF/unorddf2.s" +#endif + +#if defined(L_negate_df) +#include "IEEE-754/DF/negdf2.s" +#endif + +#if defined(L_si_to_df) +#include "IEEE-754/DF/floatsidf.s" +#endif + +#if defined(L_usi_to_df) +#include "IEEE-754/DF/floatunssidf.s" +#endif + +#if defined(L_compare_df) || defined(L_eq_df) || defined(L_ne_df) || defined(L_gt_df) || defined(L_ge_df) || defined(L_lt_df) || defined(L_le_df) +#include "IEEE-754/DF/compdf2.s" +#endif Index: gcc/config/sh/t-sh =================================================================== RCS file: /home/gnu/cvs/gcc-3.4/gcc/gcc/config/sh/t-sh,v retrieving revision 3.4.1.1 retrieving revision 3.4.1.1.2.1 diff -u -p -r3.4.1.1 -r3.4.1.1.2.1 --- gcc/config/sh/t-sh 2004/05/13 06:13:30 3.4.1.1 +++ gcc/config/sh/t-sh 2004/07/23 10:14:03 3.4.1.1.2.1 @@ -1,25 +1,29 @@ LIB1ASMSRC = sh/lib1funcs.asm -LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \ - _movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ +LIB1ASMFUNCS = _addsub_sf _fpcmp_parts_sf _sf_to_si _si_to_sf _mul_sf _gt_sf _ge_sf \ + _unord_sf _div_sf _sf_to_usi _usi_to_sf _negate_sf _sf_to_df _lt_sf _le_sf \ + _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr _eq_sf _ne_sf _df_to_sf \ + _movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr _addsub_df \ + _df_to_si _si_to_df _mul_df _gt_df _ge_df _unord_df _div_df _df_to_usi _usi_to_df _negate_df \ + _lt_sf _le_sf _eq_sf _ne_sf _fpcmp_parts_df $(LIB1ASMFUNCS_CACHE) # We want fine grained libraries, so use the new code to build the # floating point emulation libraries. -FPBIT = fp-bit.c -DPBIT = dp-bit.c +#FPBIT = fp-bit.c +#DPBIT = dp-bit.c -dp-bit.c: $(srcdir)/config/fp-bit.c - echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c - echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c - echo '#endif' >> dp-bit.c - cat $(srcdir)/config/fp-bit.c >> dp-bit.c +#dp-bit.c: $(srcdir)/config/fp-bit.c +# echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c +# echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c +# echo '#endif' >> dp-bit.c +# cat $(srcdir)/config/fp-bit.c >> dp-bit.c -fp-bit.c: $(srcdir)/config/fp-bit.c - echo '#define FLOAT' > fp-bit.c - echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c - echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c - echo '#endif' >> fp-bit.c - cat $(srcdir)/config/fp-bit.c >> fp-bit.c +#fp-bit.c: $(srcdir)/config/fp-bit.c +# echo '#define FLOAT' > fp-bit.c +# echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c +# echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c +# echo '#endif' >> fp-bit.c +# cat $(srcdir)/config/fp-bit.c >> fp-bit.c MULTILIB_ENDIAN = ml MULTILIB_OPTIONS= $(MULTILIB_ENDIAN) m2/m2e/m4-single-only/m4-single/m4 Index: gcc/config/sh/IEEE-754/DF/adddf3.s =================================================================== RCS file: adddf3.s diff -N adddf3.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsRb1Qdn Wed Aug 4 19:58:21 2004 @@ -0,0 +1,780 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Routine for adding two double numbers + +! Author: Rakesh Kumar + +! Arguments: r4-r5, r6-r7 +! Result: r0-r1 + +! The value in r4-r5 is referred to as op1 +! and that in r6-r7 is referred to as op2 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (subdf3) + FUNC (GLOBAL (subdf3)) + .global GLOBAL (adddf3) + FUNC (GLOBAL (adddf3)) + +GLOBAL (subdf3): +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r6,r2 + + mov r5,r4 + mov r7,r6 + + mov r1,r5 + mov r2,r7 +#endif + mov.l .L_sign,r2 + bra .L_adddf3_1 + xor r2,r6 + +GLOBAL (adddf3): +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r6,r2 + + mov r5,r4 + mov r7,r6 + + mov r1,r5 + mov r2,r7 +#endif + +.L_adddf3_1: + mov.l r8,@-r15 + mov r4,r1 + + mov.l .L_inf,r2 + mov r6,r3 + + mov.l r9,@-r15 + and r2,r1 !Exponent of op1 in r1 + + mov.l r10,@-r15 + and r2,r3 !Exponent of op2 in r3 + + ! Check for Nan or Infinity + mov.l .L_sign,r9 + cmp/eq r2,r1 + + mov r9,r10 + bt .L_thread_inv_exp_op1 + + mov r9,r0 + cmp/eq r2,r3 +! op1 has a valid exponent. We need not check it again. +! Return op2 straight away. + and r4,r9 !r9 has sign bit for op1 + bt .L_ret_op2 + + ! Check for -ve zero + cmp/eq r4,r0 + and r6,r10 !r10 has sign bit for op2 + + bt .L_op1_nzero + + cmp/eq r6,r0 + bt .L_op2_nzero + +! Check for zero +.L_non_zero: + tst r4,r4 + bt .L_op1_zero + + ! op1 is not zero, check op2 for zero + tst r6,r6 + bt .L_op2_zero + +! r1 and r3 has masked out exponents, r9 and r10 has signs +.L_add: + mov.l .L_high_mant,r8 + mov #-20,r2 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r1 ! r1 now has exponent for op1 in its lower bits +#else + SHLR20 (r1) +#endif + and r8,r6 ! Higher bits of mantissa of op2 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r3 ! r3 has exponent for op2 in its lower bits +#else + SHLR20 (r3) +#endif + and r8,r4 ! Higher bits of mantissa of op1 + + mov.l .L_21bit,r8 + + tst r1,r1 + bt .L_norm_op1 + + ! Set the 21st bit. + or r8,r4 + tst r3,r3 + + bt .L_norm_op2 + or r8,r6 + +! Check for negative mantissas. Make them positive by negation +! r9 and r10 have signs of op1 and op2 respectively +.L_neg_mant: + tst r9,r9 + bf .L_neg_op1 + + tst r10,r10 + bf .L_neg_op2 + +.L_add_1: + cmp/ge r1,r3 + + mov r1,r0 + bt .L_op2_exp_greater + + sub r3,r0 + ! If exponent difference is greater than 54, the resultant exponent + ! won't be changed. Return op1 straight away. + mov #54,r2 + cmp/gt r2,r0 + + bt .L_pack_op1 + + mov r1,r3 + clrt + + cmp/eq #0,r0 + bt .L_add_mant + + ! Shift left the first operand and apply rest of shifts to second operand. + mov #0,r2 + shll r5 + + rotcl r4 + + add #-1,r3 + add #-1,r0 + + cmp/eq #0,r0 + bt .L_add_mant +! Shift the mantissa part of op2 so that both exponents are equal +.L_shfrac_op2: + add #-1,r0 + shar r6 + + rotcr r7 + rotcr r2 + + cmp/eq #0,r0 + bf .L_shfrac_op2 + + +! Add the psotive mantissas and check for overflow by checking the +! MSB of the resultant. In case of overflow, negate the result. +.L_add_mant: + clrt + addc r7,r5 + + mov #0,r10 ! Assume resultant to be positive + addc r6,r4 + + mov.l .L_sign,r0 + tst r4,r0 + + bt .L_mant_ptv + + negc r5,r5 + + mov r0,r10 ! The assumption was wrong, result is negative + negc r4,r4 + clrt + +! 23rd bit in the high part of mantissa could be set. +! In this case, right shift the mantissa. +.L_mant_ptv: + mov.l .L_23bit,r0 + + tst r4,r0 + bt .L_mant_ptv_0 + + shlr r4 + rotcr r5 + + add #1,r3 + bra .L_mant_ptv_1 + rotcr r2 + +.L_mant_ptv_0: + mov.l .L_22bit,r0 + tst r4,r0 + + bt .L_norm_mant + +.L_mant_ptv_1: + ! 22 bit of resultant mantissa is set. Shift right the mantissa + ! and add 1 to exponent + add #1,r3 + shlr r4 + rotcr r5 + ! The mantissa is already normalized. We don't need to + ! spend any effort. Branch to epilogue. + bra .L_epil + rotcr r2 + +! Normalize operands +.L_norm_op1: + shll r5 + + rotcl r4 + add #-1,r1 + + tst r4,r8 + bt .L_norm_op1 + + tst r3,r3 + bf/s .L_neg_mant + add #1,r1 + +.L_norm_op2: + shll r7 + + rotcl r6 + add #-1,r3 + + tst r6,r8 + bt .L_norm_op2 + + bra .L_neg_mant + add #1,r3 + +! Negate the mantissa of op1 +.L_neg_op1: + clrt + negc r5,r5 + + negc r4,r4 + tst r10,r10 + + bt .L_add_1 + +! Negate the mantissa of op2 +.L_neg_op2: + clrt + negc r7,r7 + + bra .L_add_1 + negc r6,r6 + +! Thread the jump to .L_inv_exp_op1 +.L_thread_inv_exp_op1: + bra .L_inv_exp_op1 + nop + +.L_ret_op2: + mov.l @r15+,r10 +#ifdef __LITTLE_ENDIAN__ + mov r6,r1 +#else + mov r6,r0 +#endif + + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov r7,r0 +#else + mov r7,r1 +#endif + + rts + mov.l @r15+,r8 + +.L_op1_nzero: + tst r5,r5 + bt .L_ret_op2 + + ! op1 is not zero. Check op2 for negative zero + cmp/eq r6,r0 + bf .L_non_zero ! both op1 and op2 are not -0 + +.L_op2_nzero: + tst r7,r7 + bf .L_non_zero + + mov.l @r15+,r10 +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 +#else + mov r4,r0 ! op2 is -0, return op1 +#endif + + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov r5,r0 +#else + mov r5,r1 +#endif + + rts + mov.l @r15+,r8 + +! High bit of op1 is known to be zero. +! Check low bit. r2 contains 0x00000000 +.L_op1_zero: + tst r5,r5 + bt .L_ret_op2 + + ! op1 is not zero. Check high bit of op2 + tst r6,r6 + bf .L_add ! both op1 and op2 are not zero + +! op1 is not zero. High bit of op2 is known to be zero. +! Check low bit of op2. r2 contains 0x00000000 +.L_op2_zero: + tst r7,r7 + bf .L_add + + mov.l @r15+,r10 +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 +#else + mov r4,r0 ! op2 is zero, return op1 +#endif + + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov r5,r0 +#else + mov r5,r1 +#endif + + rts + mov.l @r15+,r8 + +! exp (op1) is smaller or equal to exp (op2) +! The logic of same operations is present in .L_add. Kindly refer it for +! comments +.L_op2_exp_greater: + mov r3,r0 + sub r1,r0 + + mov #54,r2 + cmp/gt r2,r0 + + bt .L_pack_op2 + + cmp/eq #0,r0 + bt .L_add_mant + + mov #0,r2 + shll r7 + rotcl r6 + add #-1,r0 + add #-1,r3 + + cmp/eq #0,r0 + bt .L_add_mant +.L_shfrac_op1: + add #-1,r0 + shar r4 + + rotcr r5 + rotcr r2 + + cmp/eq #0,r0 + bf .L_shfrac_op1 + + bra .L_add_mant + nop + +! Return the value in op1 +.L_ret_op1: + mov.l @r15+,r10 +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 +#else + mov r4,r0 +#endif + + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov r5,r0 +#else + mov r5,r1 +#endif + + rts + mov.l @r15+,r8 + +! r1 has exp, r9 has sign, r4 and r5 mantissa +.L_pack_op1: + mov.l .L_high_mant,r7 + mov r4,r0 + + tst r9,r9 + bt .L_pack_op1_1 + + clrt + negc r5,r5 + negc r0,r0 + +.L_pack_op1_1: + and r7,r0 + mov r1,r3 + + mov #20,r2 + mov r5,r1 + + mov.l @r15+,r10 + or r9,r0 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r3 +#else + SHLL20 (r3) +#endif + mov.l @r15+,r9 + + or r3,r0 +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +!r2 has exp, r10 has sign, r6 and r7 mantissa +.L_pack_op2: + mov.l .L_high_mant,r9 + mov r6,r0 + + tst r10,r10 + bt .L_pack_op2_1 + + clrt + negc r7,r7 + negc r0,r0 + +.L_pack_op2_1: + and r9,r0 + mov r7,r1 + + mov #20,r2 + or r10,r0 + + mov.l @r15+,r10 +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r3 +#else + SHLL20 (r3) +#endif + + mov.l @r15+,r9 + + or r3,r0 +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +! Normalize the mantissa by setting its 21 bit in high part +.L_norm_mant: + mov.l .L_21bit,r0 + + tst r4,r0 + bf .L_epil + + tst r4,r4 + bf .L_shift_till_1 + + tst r5,r5 + bf .L_shift_till_1 + + ! Mantissa is zero, return 0 + mov.l @r15+,r10 + mov #0,r0 + + mov.l @r15+,r9 + mov.l @r15+,r8 + + rts + mov #0,r1 + +! A loop for making the 21st bit 1 in high part of resultant mantissa +! It is already ensured that 1 bit is present in the mantissa +.L_shift_till_1: + clrt + shll r5 + + rotcl r4 + add #-1,r3 + + tst r4,r0 + bt .L_shift_till_1 + +! Return the result. Mantissa is in r4-r5. Exponent is in r3 +! Sign bit in r10 +.L_epil: + cmp/pl r3 + bf .L_denorm + + clrt + mov #0,r1 + + tst r2,r2 + bt .L_epil_1_0 + + shll r2 + movt r0 + clrt + addc r0,r5 + addc r1,r4 + +.L_epil_1_0: + +! Check extra MSB here + mov.l .L_22bit,r9 + tst r9,r4 + bf .L_epil_1 + +.L_epil_0: + mov.l .L_21bit,r1 + + not r1,r1 + and r1,r4 + + mov r4,r0 + or r10,r0 + + mov.l @r15+,r10 + mov #20,r2 + + mov.l @r15+,r9 + mov r5,r1 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r3 +#else + SHLL20 (r3) +#endif + or r3,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_epil_1: + shlr r4 + add #1,r3 + bra .L_epil_0 + rotcr r5 + +.L_denorm: + add #-1,r3 +.L_denorm_1: + tst r3,r3 + bt .L_denorm_2 + + shlr r4 + rotcr r5 + + movt r1 + bra .L_denorm_1 + add #1,r3 + +.L_denorm_2: + clrt + mov #0,r2 + addc r1,r5 + + addc r2,r4 + mov r4,r0 + + or r10,r0 + mov.l @r15+,r10 + + mov r5,r1 + + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +! op1 is known to be positive infinity, and op2 is Inf. The sign +! of op2 is not known. Return the appropriate value +.L_op1_pinf_op2_inf: + mov.l .L_sign,r0 + tst r6,r0 + + bt .L_ret_op2_1 + + ! op2 is negative infinity. Inf - Inf is being performed + mov.l .L_inf,r0 + mov.l @r15+,r10 + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov r0,r1 +#endif + mov.l @r15+,r8 + + rts +#ifdef __LITTLE_ENDIAN__ + mov #1,r0 +#else + mov #1,r1 ! Any value here will return Nan +#endif + +.L_ret_op1_1: + mov.l @r15+,r10 +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 +#else + mov r4,r0 +#endif + + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov r5,r0 +#else + mov r5,r1 +#endif + + rts + mov.l @r15+,r8 + +.L_ret_op2_1: + mov.l @r15+,r10 +#ifdef __LITTLE_ENDIAN__ + mov r6,r1 +#else + mov r6,r0 +#endif + + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov r7,r0 +#else + mov r7,r1 +#endif + + rts + mov.l @r15+,r8 + +! op1 is negative Inf. op2 is Inf. Check for sign of op2 +! r2 is Nan +.L_op1_ninf_op2_inf: + mov.l .L_sign,r0 + tst r6,r0 + + bf .L_ret_op2_1 !-Inf + -Inf + + ! b is positive infinity. -Inf + Inf is being performed + mov.l @r15+,r10 +#ifdef __LITTLE_ENDIAN__ + mov r2,r1 +#else + mov r2,r0 +#endif + + mov.l @r15+,r9 +#ifdef __LITTLE_ENDIAN__ + mov #1,r0 +#else + mov #1,r1 ! Any value here will return Nan +#endif + + rts + mov.l @r15+,r8 + +! op1 is negative infinity. Check op2 for infinity or Nan +.L_op1_ninf: + cmp/eq r2,r3 + bf .L_ret_op1_1 ! op2 is neither Nan nor Inf + + ! r2 is free now + mov.l .L_high_mant,r0 + and r6,r0 + + cmp/pl r0 + bf/s .L_op1_ninf_op2_inf ! op2 is Infinity, and op1 is -Inf + mov r7,r0 + + cmp/pl r0 + bf .L_op1_ninf_op2_inf ! op2 is Infinity, and op1 is -Inf + ! op2 is not Inf. It is Nan + bt .L_ret_op2_1 + +!r1 contains exponent for op1, r3 contains exponent for op2 +!r2 has .L_inf (+ve Inf) +!op1 has invalid exponent. Either it contains Nan or Inf +.L_inv_exp_op1: + ! Check if a is Nan + cmp/pl r5 + bt .L_ret_op1_1 + + mov.l .L_high_mant,r0 + and r4,r0 + + cmp/pl r0 + bt .L_ret_op1_1 + + ! op1 is not Nan. It is infinity. Get the sign of it + ! If op2 is Nan, return op2 + mov.l .L_sign,r0 + tst r4,r0 + + bf .L_op1_ninf + + ! op2 is +ve infinity here + cmp/eq r2,r3 + bf .L_ret_op1_1 ! op2 is neither Nan nor Inf + + ! r2 is free now + mov.l .L_high_mant,r0 + tst r6,r0 ! op2 also has invalid exponent + + bf .L_ret_op2_1 ! op2 is Infinity, and op1 is +Infinity + + tst r7,r7 + bt .L_op1_pinf_op2_inf ! op2 is Infinity, and op1 is +Infinity + !op2 is not infinity, It is Nan + bf .L_ret_op2_1 + + .align 2 +.L_high_mant: + .long 0x000FFFFF + +.L_21bits: + .long 0x001FFFFF + +.L_22bit: + .long 0x00200000 + +.L_23bit: + .long 0x00400000 + +.L_21bit: + .long 0x00100000 + +.L_sign: + .long 0x80000000 + +.L_inf: + .long 0x7ff00000 + +ENDFUNC (GLOBAL (subdf3)) +ENDFUNC (GLOBAL (adddf3)) Index: gcc/config/sh/IEEE-754/DF/compdf2.s =================================================================== RCS file: compdf2.s diff -N compdf2.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsCBJRYl Wed Aug 4 19:58:21 2004 @@ -0,0 +1,395 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!comparison of two double precision floating point numbers +!Author:Aanchal Khanna +! +!Entry: +!r4,r5:operand 1 +! +!r6,r7:operand 2 +! +!Exit: +!r0:result +! +!Notes:argument 1 is passed in regs r4 and r5 and argument 2 is passed in regs r6 +!and r7, result is returned in reg r0. operand 1 is referred as op1 and operand 2 +!as op2. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + .text + .align 5 + .global GLOBAL (gtdf2) + FUNC (GLOBAL (gtdf2)) + +GLOBAL (gtdf2): + mov.l r8,@-r15 + bra GLOBAL (compdf2) + mov #-1,r8 + + .text + .align 5 + .global GLOBAL (gedf2) + FUNC (GLOBAL (gedf2)) + +GLOBAL (gedf2): + mov.l r8,@-r15 + bra GLOBAL (compdf2) + mov #-1,r8 + + .text + .align 5 + .global GLOBAL (ltdf2) + FUNC (GLOBAL (ltdf2)) + +GLOBAL (ltdf2): + mov.l r8,@-r15 + bra GLOBAL (compdf2) + mov #1,r8 + + .text + .align 5 + .global GLOBAL (ledf2) + FUNC (GLOBAL (ledf2)) + +GLOBAL (ledf2): + mov.l r8,@-r15 + bra GLOBAL (compdf2) + mov #1,r8 + + .text + .align 5 + .global GLOBAL (nedf2) + FUNC (GLOBAL (nedf2)) + +GLOBAL (nedf2): + mov.l r8,@-r15 + bra GLOBAL (compdf2) + mov #1,r8 + + .text + .align 5 + .global GLOBAL (eqdf2) + FUNC (GLOBAL (eqdf2)) + +GLOBAL (eqdf2): + mov.l r8,@-r15 + mov #1,r8 + +!compares two given double precision floating point numbers and returns the result in r0 as + +!op1 > op2 return 1 +!op1 == op2 return 0 +!op1 < op2 return -1 + + .text + .align 5 + .global GLOBAL (compdf2) + FUNC (GLOBAL (compdf2)) + +GLOBAL (compdf2): + +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r5,r4 + mov r1,r5 + + mov r6,r1 + mov r7,r6 + mov r1,r7 +#endif + mov #0,r0 + mov.l .L_inf,r1 + + mov r1,r2 + and r4,r1 !r1 contains exp of op1 + + cmp/eq r1,r2 + mov r2,r3 + + bt .L_a_inv + and r6,r2 !r2 contains exp of op2 + + cmp/eq r2,r3 + mov.l .L_high_mant,r1 + + bt/s .L_b_inv + and r6,r1 + + mov.l .L_mask_sign,r1 + bf .L_normal_nos + +.L_a_inv: + !Check if op1 is NaN or Inf + mov.l .L_high_mant,r1 + mov r2,r3 !r3 contains Inf + + and r4,r1 + cmp/hi r0,r1 + + mov r0,r7 + and r6,r3 + + mov r8,r0 + bt .L_ret_nan !op1 NaN,return 1 + + cmp/hi r7,r5 + mov.l .L_mask_sign,r5 + + bt .L_ret_nan !op1 NaN,return 1 + and r4,r5 + + cmp/eq r2,r3 + mov.l .L_high_mant,r1 + + and r6,r1 + mov r2,r3 + + mov r7,r0 + bf/s .L_a_inf + mov #1,r2 + +.L_b_inv: + !Check if op2 is NaN or Inf + cmp/hi r0,r1 + mov r3,r2 + + mov r0,r5 + mov #1,r0 + + bt .L_ret_nan + and r4,r3 !r3 contains exp of op1 + + cmp/hi r5,r7 + mov r0,r1 + + bt .L_ret_nan + mov r5,r0 + + mov.l .L_mask_sign,r5 + mov r1,r7 + + cmp/eq r3,r2 + mov r5,r1 + + bf/s .L_b_inf + and r6,r5 + +.L_a_inf_b_inf: + !If op1=op2=+Inf or op1=op2=-Inf,return 0 + and r4,r1 + cmp/eq r5,r1 + + bt .L_return !op1=op2=Inf,sign same,return 0 + mov #-1,r3 + +.L_sign_not_same: + !if sign of op1 and op2 are not same + cmp/hi r2,r5 !r2=sign of op1,r5=sign of op2 + mov r7,r0 + + bt .L_return + mov.l @r15+,r8 + + rts + mov r3,r0 + +.L_normal_nos: + !If op1 and op2 are normal numbers + mov r1,r2 + and r4,r1 + + bra .L_chk_zero + and r6,r2 + +.L_cmp_signs: + cmp/eq r1,r2 !Check if signs are same + mov.l .L_inf,r3 + + mov #-1,r0 + bt .L_cmp_exps + + cmp/hi r2,r1 + mov #1,r3 + + bt .L_return + mov.l @r15+,r8 + + rts + mov r3,r0 + +.L_b_inf: + !If op2=+Inf/-Inf and op1=normal number + cmp/hi r0,r5 + mov #-1,r3 + + bt/s .L_return + mov r7,r0 + mov.l @r15+,r8 + + rts + mov r3,r0 + +.L_return: + rts + mov.l @r15+,r8 + +.L_a_inf: + !If op1=+Inf/-Inf and op2=normal number + cmp/hi r0,r5 !r5 contains sign of op1 + mov #-1,r3 + + bt/s .L_return + mov r3,r0 + mov.l @r15+,r8 + + rts + mov r2,r0 + +.L_cmp_exps: + !sign of op1 and op2 are same,compare exps + mov r3,r1 !r3 contains Inf + mov r3,r2 + + and r4,r1 + mov.l .L_mask_sign,r3 + + and r6,r2 + mov #0,r0 + + cmp/eq r1,r2 !compare exps + and r4,r3 + + bt .L_cmp_mant + mov #-1,r5 + + cmp/hi r2,r1 + mov #1,r7 + + bt .L_a_greater + mov #-1,r1 + + cmp/hi r0,r3 !Chk sign of op1, op1=+ve, return -1 else return 1 + bt/s .L_return + mov r7,r0 + + mov.l @r15+,r8 + mov r1,r0 + + rts + nop + +.L_a_greater: + !Chk sign of op1, op1=+ve, return 1 else return -1 + cmp/hi r0,r3 + + bt/s .L_return + mov r5,r0 + mov.l @r15+,r8 + + rts + mov r7,r0 + +.L_cmp_mant: + !exps are equal, compare mantissas + mov.l .L_high_mant,r1 + mov r1,r2 + + and r4,r1 + + and r6,r2 + cmp/eq r1,r2 !compare higher mantissas + + bt .L_cmp_lower_mant + mov #-1,r5 + + cmp/hi r2,r1 + mov #1,r7 + + bt .L_a_greater + cmp/hi r0,r3 !Chk sign of op1, op1=+ve, return -1 else return 1 + + bt/s .L_return + mov r7,r0 + mov.l @r15+,r8 + + rts + mov r5,r0 + +.L_cmp_lower_mant: + !higher mantissas are equal, compare lower mantissas + cmp/eq r5,r7 + mov #1,r2 + + bt .L_return + mov #-1,r1 + + cmp/hi r7,r5 + + bt .L_a_mant_greater + cmp/hi r0,r3 !Chk sign of op1, op1=+ve, return -1 else return 1 + + bf/s .L_return + mov r1,r0 + mov.l @r15+,r8 + + rts + mov r2,r0 + +.L_ret_nan: + mov r8,r0 + + rts + mov.l @r15+,r8 + +.L_a_mant_greater: + !Chk sign of op1, op1=+ve, return 1 else return -1 + cmp/hi r0,r3 + + bt/s .L_return + mov r1,r0 + + mov.l @r15+,r8 + + rts + mov r2,r0 + +.L_chk_zero: + cmp/eq r0,r7 + bf .L_cmp_signs + + cmp/eq r0,r5 + bf .L_cmp_signs + + shll r4 + shlr r4 + + cmp/eq r0,r4 + or r1,r4 + + bf .L_cmp_signs + shll r6 + + shlr r6 + cmp/eq r0,r6 + + or r2,r6 + bf .L_cmp_signs + + rts + mov.l @r15+,r8 + + .align 2 + +.L_high_mant: + .long 0x000fffff +.L_mask_sign: + .long 0x80000000 +.L_inf: + .long 0x7ff00000 + +ENDFUNC (GLOBAL (compdf2)) +ENDFUNC (GLOBAL (eqdf2)) +ENDFUNC (GLOBAL (nedf2)) +ENDFUNC (GLOBAL (ledf2)) +ENDFUNC (GLOBAL (ltdf2)) +ENDFUNC (GLOBAL (gedf2)) +ENDFUNC (GLOBAL (gtdf2)) Index: gcc/config/sh/IEEE-754/DF/divdf3.s =================================================================== RCS file: divdf3.s diff -N divdf3.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvswjmbKk Wed Aug 4 19:58:21 2004 @@ -0,0 +1,604 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!division of two double precision floating point numbers +!Author:Aanchal Khanna +! +!Entry: +!r4,r5:dividend +! +!r6,r7:divisor +! +!Exit: +!r0,r1:quotient + +!Notes: dividend is passed in regs r4 and r5 and divisor is passed in regs +!r6 and r7, quotient is returned in regs r0 and r1. dividend is referred as op1 +!and divisor as op2. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (divdf3) + FUNC (GLOBAL (divdf3)) + +GLOBAL (divdf3): + +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r5,r4 + mov r1,r5 + + mov r6,r1 + mov r7,r6 + mov r1,r7 +#endif + mov r4,r2 + mov.l .L_inf,r1 + + and r1,r2 + mov.l r8,@-r15 + + cmp/eq r1,r2 + mov r6,r8 + + bt .L_a_inv + and r1,r8 + + cmp/eq r1,r8 + mov.l .L_high_mant,r3 + + bf .L_chk_zero + and r6,r3 + + mov.l .L_mask_sign,r8 + cmp/pl r7 + + mov r8,r0 + bt .L_ret_b !op2=NaN,return op2 + + and r4,r8 + cmp/pl r3 + + and r6,r0 + bt .L_ret_b !op2=NaN,return op2 + + xor r8,r0 !op1=normal no,op2=Inf, return Zero + mov #0,r1 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_ret_b: + mov r7,r1 + mov r6,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + + rts + mov.l @r15+,r8 + +.L_a_inv: + !chk if op1 is Inf or NaN + mov.l .L_high_mant,r2 + cmp/pl r5 + + and r4,r2 + bt .L_ret_a + + and r1,r8 !r1 contains infinity + cmp/pl r2 + + mov.l .L_mask_sign,r0 + bt .L_ret_a + + and r0,r6 + cmp/eq r1,r8 + + add #1,r5 + bt .L_ret_a + + and r4,r0 + mov.l @r15+,r8 + + shll r4 + xor r6,r0 !r0 contains the resultant sign bit + + mov #0,r1 + shlr r4 + +#ifdef __LITTLE_ENDIAN__ + mov r1,r2 + mov r0,r1 + mov r2,r0 +#endif + rts + +#ifdef __LITTLE_ENDIAN__ + or r4,r1 +#else + or r4,r0 +#endif + +.L_ret_a: + !return op1 + mov r5,r1 + mov r4,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_chk_zero: + !chk if op1=0 + mov.l .L_mask_sign,r0 + mov r4,r3 + + and r0,r3 + shll r4 + + and r6,r0 + shlr r4 + + xor r3,r0 + shll r6 + + shlr r6 + tst r4,r4 + + + bf .L_op1_not_zero + tst r5,r5 + + bf .L_op1_not_zero + tst r7,r7 + + mov.l @r15+,r8 + bf .L_ret_zero + + tst r6,r6 + bf .L_ret_zero + + mov r1,r0 !op1=op2=0, return NaN + mov #1,r1 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + nop + +.L_ret_zero: + !return zero + mov r0,r1 + rts +#ifdef __LITTLE__ENDIAN + mov #0,r0 +#else + mov #0,r1 !op1=0,op2=normal no,return zero +#endif + +.L_norm_b: + !normalize op2 + shll r7 + mov.l .L_imp_bit,r3 + + rotcl r6 + tst r3,r6 + + add #-1,r8 + bt .L_norm_b + + bra .L_divide + add #1,r8 + +.L_op1_not_zero: + !op1!=0, chk if op2=0 + tst r7,r7 + mov r1,r3 + + mov #0,r1 + bf .L_normal_nos + + tst r6,r6 + bf .L_normal_nos + + mov.l @r15+,r8 + or r3,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + + rts + nop + +.L_normal_nos: + !op1 and op2 are normal nos + tst r2,r2 + mov #-20,r1 + +! The subsequent branch is for the upper compare +! Shifting will not alter the result, for the +! macro is declared with care. +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r1,r2 +#else + SHLR20 (r2) +#endif + bt .L_norm_a !normalize dividend + +.L_chk_b: + mov.l r9,@-r15 + tst r8,r8 + + mov.l .L_high_mant,r9 + +! The subsequent branch is for the upper compare +! Shifting will not alter the result, for the +! macro is declared with care. +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r1,r8 +#else + SHLR20 (r8) +#endif + bt/s .L_norm_b !normalize divisor + and r9,r4 + +.L_divide: + mov.l .L_2047,r1 + sub r8,r2 + + mov.l .L_1023,r8 + and r9,r6 + + !resultant exponent + add r8,r2 + !chk the exponent for overflow + cmp/ge r1,r2 + + mov.l .L_imp_bit,r1 + bt .L_overflow + + mov #0,r8 + or r1,r4 + + or r1,r6 + mov #-24,r3 + + !chk if the divisor is 1(mantissa only) + cmp/eq r8,r7 + bf .L_div2 + + cmp/eq r6,r1 + bt .L_den_one + +.L_div2: + !divide the mantissas + shll8 r4 + mov r5,r9 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r3,r9 +#else + SHLR24 (r9) +#endif + shll8 r6 + + or r9,r4 + shll8 r5 + + mov r7,r9 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r3,r9 +#else + SHLR24 (r9) +#endif + mov r8,r3 + shll8 r7 + + or r9,r6 + cmp/gt r4,r6 + + mov r3,r9 + bt .L_shift + + cmp/eq r4,r6 + bf .L_loop + + cmp/gt r5,r7 + bf .L_loop + +.L_shift: + add #-1,r2 + shll r5 + rotcl r4 + +.L_loop: + !actual division loop + cmp/gt r6,r4 + bt .L_subtract + + cmp/eq r6,r4 + bf .L_skip + + cmp/ge r7,r5 + bf .L_skip + +.L_subtract: + clrt + subc r7,r5 + + or r1,r8 + subc r6,r4 + +.L_skip: + shlr r1 + shll r5 + + rotcl r4 + cmp/eq r1,r3 + + bf .L_loop + mov.l .L_imp_bit,r1 + + !chk if the divison was for the higher word of the quotient + tst r1,r9 + bf .L_chk_exp + + mov r8,r9 + mov.l .L_mask_sign,r1 + + !divide for the lower word of the quotient + bra .L_loop + mov r3,r8 + +.L_chk_exp: + !chk if the result needs to be denormalized + cmp/gt r2,r3 + bf .L_round + mov #-53,r7 + +.L_underflow: + !denormalize the result + add #1,r2 + cmp/gt r2,r7 + + or r4,r5 !remainder + add #-2,r2 + + mov #32,r4 + bt .L_return_zero + + add r2,r4 + cmp/ge r3,r4 + + mov r2,r7 + mov r3,r1 + + mov #-54,r2 + bt .L_denorm + mov #-32,r7 + +.L_denorm: + shlr r8 + rotcr r1 + + shll r8 + add #1,r7 + + shlr r9 + rotcr r8 + + cmp/eq r3,r7 + bf .L_denorm + + mov r4,r7 + cmp/eq r2,r4 + + bt .L_break + mov r3,r6 + + cmp/gt r7,r3 + bf .L_break + + mov r2,r4 + mov r1,r6 + + mov r3,r1 + bt .L_denorm + +.L_break: + mov #-31,r2 + mov r1,r7 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r7 +#else + SHLR31 (r7) +#endif + clrt + + addc r7,r8 + mov.l .L_comp_1,r4 + + addc r3,r9 + or r9,r0 + + cmp/eq r5,r3 + bf .L_return + + cmp/eq r3,r6 + mov.l .L_mask_sign,r7 + + bf .L_return + cmp/eq r7,r1 + + bf .L_return + and r4,r8 + +.L_return: + mov.l @r15+,r9 + mov r8,r1 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_norm_a: + !normalize op1 + shll r5 + mov.l .L_imp_bit,r3 + + rotcl r4 + tst r3,r4 + + add #-1,r2 + bt .L_norm_a + + bra .L_chk_b + add #1,r2 + +.L_overflow: + !overflow, return inf + mov.l .L_inf,r2 +#ifdef __LITTLE_ENDIAN__ + or r2,r1 + mov #0,r0 +#else + or r2,r0 + mov #0,r1 +#endif + mov.l @r15+,r9 + rts + mov.l @r15+,r8 + +.L_den_one: + !denominator=1, result=numerator + mov r4,r9 + mov #-53,r7 + + cmp/ge r2,r8 + mov r8,r4 + + mov r5,r8 + mov r4,r3 + + !chk the exponent for underflow + bt/s .L_underflow + mov r4,r5 + + mov.l .L_high_mant,r7 + bra .L_pack + mov #20,r6 + +.L_return_zero: + !return zero + mov r3,r1 + mov.l @r15+,r9 + + rts + mov.l @r15+,r8 + +.L_round: + !apply rounding + cmp/eq r4,r6 + bt .L_lower + + clrt + subc r6,r4 + + bra .L_rounding + mov r4,r6 + +.L_lower: + clrt + subc r7,r5 + mov r5,r6 + +.L_rounding: + !apply rounding + mov.l .L_invert,r1 + mov r3,r4 + + movt r3 + clrt + + not r3,r3 + and r1,r3 + + addc r3,r8 + mov.l .L_high_mant,r7 + + addc r4,r9 + cmp/eq r4,r6 + + mov.l .L_comp_1,r3 + bf/s .L_pack + mov #20,r6 + and r3,r8 + +.L_pack: + !pack the result, r2=exponent,r0=sign,r8=lower mantissa, r9=higher mantissa +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r6,r2 +#else + SHLL20 (r2) +#endif + and r7,r9 + + or r2,r0 + mov r8,r1 + + or r9,r0 + mov.l @r15+,r9 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + + .align 2 + +.L_mask_sign: + .long 0x80000000 +.L_high_mant: + .long 0x000fffff +.L_inf: + .long 0x7ff00000 +.L_1023: + .long 1023 +.L_2047: + .long 2047 +.L_imp_bit: + .long 0x00100000 +.L_comp_1: + .long 0xfffffffe +.L_invert: + .long 0x00000001 + +ENDFUNC (GLOBAL (divdf3)) Index: gcc/config/sh/IEEE-754/DF/extendsfdf2.s =================================================================== RCS file: extendsfdf2.s diff -N extendsfdf2.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsIfRXvj Wed Aug 4 19:58:21 2004 @@ -0,0 +1,171 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Routine for extending float to double + +! Author: Rakesh Kumar + +! Argument: r4 +! Result: r0 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (extendsfdf2) + FUNC (GLOBAL (extendsfdf2)) + + +GLOBAL (extendsfdf2): + mov #0,r1 + mov.l .L_inf,r2 + + mov.l .L_sign,r6 + mov r2,r7 + + and r4,r6 + mov.l .L_mant,r0 + + ! Mask out the exponent + and r4,r2 + mov.l .L_mask_sbit,r3 + + ! Trap any NaN, Inf + and r3,r4 + cmp/eq r2,r7 + + ! Get mantissa + mov #-23,r3 + bt/s .L_inv_exp + and r4,r0 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r3,r2 +#else + SHLR23 (r2) +#endif + + ! Compare the number against zero + cmp/eq r1,r4 + bt .L_ret_zero + + ! Check for denormalized number + cmp/eq r1,r2 + bt .L_normalize + +.L_extend: + mov #0,r1 + + shlr r0 + mov.l .L_exp_bias,r3 + + ! Bias the exponent + rotcr r1 + add r3,r2 + + shlr r0 + mov #20,r3 + + rotcr r1 +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r3,r2 +#else + SHLL20 (r2) +#endif + + ! Position the pieces in proper place + shlr r0 + rotcr r1 + + or r2,r0 + or r6,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + rts + mov r2,r1 +#else + rts + nop +#endif + +.L_ret_zero: + mov #0,r1 + mov r4,r0 + +#ifdef __LITTLE_ENDIAN__ + rts + or r6,r1 +#else + rts + or r6,r0 +#endif + +.L_inv_exp: + cmp/eq r1,r0 + + ! Put invalid exponent with sign +#ifdef __LITTLE_ENDIAN__ + mov.l .L_inf_exp,r1 + bf/s .L_return_NaN + or r6,r1 +#else + mov.l .L_inf_exp,r0 + bf/s .L_return_NaN + or r6,r0 +#endif + + rts + nop + +.L_return_NaN: + rts +#ifdef __LITTLE_ENDIAN__ + mov #1,r0 +#else + mov #1,r1 +#endif + +.L_normalize: + mov.l .L_24_bit,r7 + +.L_1: + mov r7,r5 + shll r0 + + add #-1,r2 + + and r0,r5 + cmp/eq r1,r5 + + bt .L_1 + + not r7,r3 + + and r3,r0 + bra .L_extend + add #1,r2 ! Bias of denormalize number is 126 + + + .align 2 + +.L_exp_bias: + .long 0x00000380 + +.L_sign: + .long 0x80000000 + +.L_inf_exp: + .long 0x7FF00000 + +.L_mant: + .long 0x007FFFFF + +.L_inf: + .long 0x7F800000 + +.L_24_bit: + .long 0x00800000 + +.L_mask_sbit: + .long 0x7FFFFFFF + +ENDFUNC (GLOBAL (extendsfdf2)) Index: gcc/config/sh/IEEE-754/DF/fixdfsi.s =================================================================== RCS file: fixdfsi.s diff -N fixdfsi.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsKpAQhi Wed Aug 4 19:58:21 2004 @@ -0,0 +1,174 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!conversion of double precision floating point number to signed integer +!Author:Aanchal Khanna +! +!Entry: +!r4,r5:operand +! +!Exit: +!r0:result +! +!Note:argument is passed in regs r4 and r5, the result is returned in +!reg r0. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (fixdfsi) + FUNC (GLOBAL (fixdfsi)) + +GLOBAL (fixdfsi): + +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r5,r4 + mov r1,r5 + +#endif + mov.l .L_p_inf,r2 + mov #-20,r1 + + mov r2,r7 + mov.l .L_1023,r3 + + and r4,r2 + shll r4 + + movt r6 ! r6 contains the sign bit + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r1,r2 ! r2 contains the exponent +#else + SHLR20 (r2) +#endif + shlr r4 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r1,r7 +#else + SHLR20 (r7) +#endif + cmp/hi r2,r3 ! if exp < 1023,return 0 + mov.l .L_mask_high_mant,r1 + + bt/s .L_epil + mov #0,r0 + and r4,r1 ! r1 contains high mantissa + + cmp/eq r2,r7 ! chk if exp is invalid + mov.l .L_1053,r7 + + bt .L_inv_exp + mov #11,r0 + + cmp/hi r7,r2 ! If exp > 1053,return maxint + sub r2,r7 + + mov.l .L_21bit,r2 + bt/s .L_ret_max + add #1,r7 ! r7 contains the number of shifts + + or r2,r1 + mov r7,r3 + shll8 r1 + + neg r7,r7 + shll2 r1 + + shll r1 + cmp/hi r3,r0 + + !chk if the result can be made only from higher mantissa + bt/s .L_lower_mantissa + mov #21,r0 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r7,r1 +#else +.L_loop: + tst r7,r7 + bt .L_break1 + add #1,r7 + bra .L_loop + shlr r1 + +.L_break1: +#endif + tst r6,r6 + bt/s .L_epil + mov r1,r0 + + rts + neg r0,r0 + +.L_lower_mantissa: + !result is made from lower mantissa also + neg r0,r0 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r0,r5 +#else + SHLR21 (r5) +#endif + + or r5,r1 !pack lower and higher mantissas + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r7,r1 +#else +.L_sh_loop: + tst r7,r7 + bt .L_break + add #1,r7 + bra .L_sh_loop + shlr r1 + +.L_break: +#endif + mov r1,r0 + bra .L_chk_sign + nop + +.L_epil: + rts + nop + +.L_inv_exp: + cmp/hi r0,r5 + bt .L_epil + + cmp/hi r0,r1 !compare high mantissa,r1 + bt .L_epil + +.L_ret_max: + mov.l .L_maxint,r0 + tst r6,r6 + bt .L_epil + + rts + add #1,r0 + +.L_chk_sign: + tst r6,r6 !sign bit is set, number is -ve + bt .L_epil + + rts + neg r0,r0 + + .align 2 + +.L_maxint: + .long 0x7fffffff +.L_p_inf: + .long 0x7ff00000 +.L_mask_high_mant: + .long 0x000fffff +.L_1023: + .long 0x000003ff +.L_1053: + .long 1053 +.L_21bit: + .long 0x00100000 + +ENDFUNC (GLOBAL (fixdfsi)) Index: gcc/config/sh/IEEE-754/DF/fixunsdfsi.s =================================================================== RCS file: fixunsdfsi.s diff -N fixunsdfsi.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsom6U3g Wed Aug 4 19:58:21 2004 @@ -0,0 +1,155 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!conversion of double precision floating point number to unsigned integer +!Author:Aanchal Khanna +! +!Entry: +!r4,r5:operand +! +!Exit: +!r0:result +! +!Note:argument is passed in regs r4 and r5, the result is returned in +!reg r0. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (fixunsdfsi) + FUNC (GLOBAL (fixunsdfsi)) + +GLOBAL (fixunsdfsi): + +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r5,r4 + mov r1,r5 +#endif + mov.l .L_p_inf,r2 + mov #-20,r1 + + mov r2,r7 + mov.l .L_1023,r3 + + and r4,r2 + shll r4 + + movt r6 ! r6 contains the sign bit +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r1,r2 ! r2 contains the exponent +#else + SHLR20 (r2) +#endif + shlr r4 +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r1,r7 +#else + SHLR20 (r7) +#endif + tst r6,r6 + bf/s .L_epil + mov #0,r0 + + cmp/hi r2,r3 ! if exp < 1023,return 0 + mov.l .L_high_mant,r1 + + bt/s .L_epil + and r4,r1 ! r1 contains high mantissa + + cmp/eq r2,r7 ! chk if exp is invalid + mov.l .L_1054,r7 + + bt .L_inv_exp + mov #11,r0 + + cmp/hi r7,r2 ! If exp > 1054,return maxint + sub r2,r7 !r7 contains the number of shifts + + mov.l .L_21bit,r2 + bt .L_ret_max + + or r2,r1 + mov r7,r3 + + shll8 r1 + neg r7,r7 + + shll2 r1 + + shll r1 + cmp/hi r3,r0 + + bt/s .L_lower_mant + mov #21,r0 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r7,r1 +#else +.L_sh_loop: + tst r7,r7 + bt .L_break + add #1,r7 + bra .L_sh_loop + shlr r1 + +.L_break: +#endif + rts + mov r1,r0 + +.L_lower_mant: + neg r0,r0 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r0,r5 +#else + SHLR21 (r5) +#endif + or r5,r1 !pack lower and higher mantissas + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r7,r1 +#else +.L_loop: + tst r7,r7 + bt .L_break1 + add #1,r7 + bra .L_loop + shlr r1 + +.L_break1: +#endif + mov r1,r0 +.L_epil: + rts + nop + +.L_inv_exp: + cmp/hi r0,r5 + bt .L_epil + + cmp/hi r0,r1 !compare high mantissa,r1 + bt .L_epil + +.L_ret_max: + mov.l .L_maxint,r0 + + rts + nop + + .align 2 + +.L_maxint: + .long 0xffffffff +.L_p_inf: + .long 0x7ff00000 +.L_high_mant: + .long 0x000fffff +.L_1023: + .long 0x000003ff +.L_1054: + .long 1054 +.L_21bit: + .long 0x00100000 + +ENDFUNC (GLOBAL (fixunsdfsi)) Index: gcc/config/sh/IEEE-754/DF/floatsidf.s =================================================================== RCS file: floatsidf.s diff -N floatsidf.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsCoB6Pf Wed Aug 4 19:58:21 2004 @@ -0,0 +1,125 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!conversion of signed integer to double precision floating point number +!Author:Rakesh Kumar +! +!Entry: +!r4:operand +! +!Exit: +!r0,r1:result +! +!Note:argument is passed in reg r4 and the result is returned in +!regs r0 and r1. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (floatsidf) + FUNC (GLOBAL (floatsidf)) + +GLOBAL (floatsidf): + mov.l .L_sign,r0 + mov #0,r1 + + mov r0,r2 + cmp/eq r1,r4 + + ! Extract the sign + mov r2,r3 + bt/s .L_ret_zero ! Zero found + and r4,r0 + + cmp/eq r1,r0 + not r3,r3 + + mov r1,r7 + bt/s .L_loop + and r4,r3 + + ! Treat -2147483648 as special case + cmp/eq r1,r3 + neg r4,r4 + + bt .L_ret_min + +.L_loop: + shll r4 + mov r4,r5 + + and r2,r5 + cmp/eq r1,r5 + + add #1,r7 + bt .L_loop + + mov.l .L_initial_exp,r6 + not r2,r2 + + and r2,r4 + mov #21,r3 + + sub r7,r6 + mov r4,r1 + + mov #20,r7 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r3,r1 +#else + SHLL21 (r1) +#endif + mov #-11,r2 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r7,r6 ! Exponent in proper place +#else + SHLL20 (r6) +#endif + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r4 +#else + SHLR11 (r4) +#endif + or r6,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts +#ifdef __LITTLE_ENDIAN__ + or r4,r1 +#else + or r4,r0 +#endif + +.L_ret_zero: + rts + mov #0,r0 + +.L_ret_min: + mov.l .L_min,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + nop + + .align 2 + +.L_initial_exp: + .long 0x0000041E + +.L_sign: + .long 0x80000000 + +.L_min: + .long 0xC1E00000 + +ENDFUNC (GLOBAL (floatsidf)) Index: gcc/config/sh/IEEE-754/DF/floatunssidf.s =================================================================== RCS file: floatunssidf.s diff -N floatunssidf.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsy5XmCe Wed Aug 4 19:58:21 2004 @@ -0,0 +1,103 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!conversion of unsigned integer to double precision floating point number +!Author:Rakesh Kumar +! +!Entry: +!r4:operand +! +!Exit: +!r0,r1:result +! +!Note:argument is passed in reg r4 and the result is returned in +!regs r0 and r1. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (floatunssidf) + FUNC (GLOBAL (floatunssidf)) + +GLOBAL (floatunssidf): + mov.l .L_sign,r0 + mov #0,r1 + + mov r0,r2 + cmp/eq r1,r4 + + ! Extract the MSB + bt/s .L_ret_zero ! Zero found + and r4,r0 + + cmp/eq r1,r0 + + bf/s .L_epil + mov r1,r7 + +.L_loop: + shll r4 + mov r4,r5 + + and r2,r5 + cmp/eq r1,r5 + + add #1,r7 + bt .L_loop + +.L_epil: + mov.l .L_init_exp,r6 + not r2,r2 + + and r2,r4 + mov #21,r3 + + sub r7,r6 + mov r4,r1 + + mov #20,r7 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r3,r1 +#else + SHLL21 (r1) +#endif + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r7,r6 ! Exponent in proper place +#else + SHLL20 (r6) +#endif + mov #-11,r2 + mov r6,r0 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r4 +#else + SHLR11 (r4) +#endif + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts +#ifdef __LITTLE_ENDIAN__ + or r4,r1 +#else + or r4,r0 +#endif + +.L_ret_zero: + rts + mov #0,r0 + + .align 2 + +.L_init_exp: + .long 0x0000041E + +.L_sign: + .long 0x80000000 + +ENDFUNC (GLOBAL (floatunssidf)) Index: gcc/config/sh/IEEE-754/DF/muldf3.s =================================================================== RCS file: muldf3.s diff -N muldf3.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsK4IHod Wed Aug 4 19:58:21 2004 @@ -0,0 +1,639 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!multiplication of two double precision floating point numbers +!Author:Aanchal Khanna +! +!Entry: +!r4,r5:operand 1 +! +!r6,r7:operand 2 +! +!Exit: +!r0,r1:result +! +!Notes: argument 1 is passed in regs r4 and r5 and argument 2 is passed in regs +!r6 and r7, result is returned in regs r0 and r1. operand 1 is referred as op1 +!and operand 2 as op2. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + .text + .align 5 + .global GLOBAL (muldf3) + FUNC (GLOBAL (muldf3)) + +GLOBAL (muldf3): + +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r5,r4 + mov r1,r5 + + mov r6,r1 + mov r7,r6 + mov r1,r7 +#endif + mov.l .L_mask_sign,r0 + mov r4,r2 + + and r0,r2 + mov #0,r1 + + shll r4 + and r6,r0 + + xor r2,r0 !r0 contains the result's sign bit + shlr r4 + + mov.l .L_inf,r2 + shll r6 + + mov r4,r3 + shlr r6 + +.L_chk_a_inv: + !chk if op1 is Inf/NaN + and r2,r3 + mov.l r8,@-r15 + + cmp/eq r3,r2 + mov.l .L_mask_high_mant,r8 + + mov r2,r3 + bf .L_chk_b_inv + + mov r8,r3 + and r4,r8 + + cmp/hi r1,r8 + bt .L_return_a !op1 NaN, return op1 + + cmp/hi r1,r5 + mov r2,r8 + + bt .L_return_a !op1 NaN, return op1 + and r6,r8 + + cmp/eq r8,r2 + and r6,r3 + + bt .L_b_inv + cmp/eq r1,r6 + + bf .L_return_a !op1 Inf,op2= normal no return op1 + cmp/eq r1,r7 + + bf .L_return_a !op1 Inf,op2= normal no return op1 + add #1,r5 + + mov r4,r0 + mov r5,r1 !op1=Inf, op2=0,return nan + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_b_inv: + !op2 is NaN/Inf + cmp/hi r1,r7 + mov r1,r2 + + mov r5,r1 + bt .L_return_b !op2=NaN,return op2 + + cmp/hi r2,r6 + or r4,r0 + + bt .L_return_b !op2=NaN,return op2 + mov.l @r15+,r8 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + + rts !op1=Inf,op2=Inf,return Inf with sign + nop + +.L_chk_b_inv: + !Chk if op2 is NaN/Inf + and r6,r2 + cmp/eq r3,r2 + + bf .L_chk_a_for_zero + and r6,r8 + + cmp/hi r1,r8 + bt .L_return_b !op2=NaN,return op2 + + cmp/hi r1,r7 + bt .L_return_b !op2=NaN,return op2 + + cmp/eq r5,r1 + bf .L_return_b !op1=normal number,op2=Inf,return Inf + + mov r7,r1 + cmp/eq r4,r1 + + bf/s .L_return_b !op1=normal number, op2=Inf,return Inf + add #1,r1 + mov r6,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_return_a: + mov r5,r1 + or r4,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + + rts + mov.l @r15+,r8 + +.L_return_b: + mov r7,r1 + or r6,r0 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + + rts + mov.l @r15+,r8 + +.L_chk_a_for_zero: + !Chk if op1 is zero + cmp/eq r1,r4 + bf .L_chk_b_for_zero + + cmp/eq r1,r5 + bf .L_chk_b_for_zero + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_chk_b_for_zero: + !op1=0,chk if op2 is zero + cmp/eq r1,r6 + mov r1,r3 + + mov.l .L_inf,r1 + bf .L_normal_nos + + cmp/eq r3,r7 + bf .L_normal_nos + + mov r3,r1 + mov.l @r15+,r8 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + nop + +.L_normal_nos: + !op1 and op2 are normal nos + mov.l r9,@-r15 + mov r4,r3 + + mov #-20,r9 + and r1,r3 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r9,r2 +#else + SHLR20 (r2) +#endif + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r9,r3 +#else + SHLR20 (r3) +#endif + cmp/pl r3 + + bf .L_norm_a !normalize op1 +.L_chk_b: + cmp/pl r2 + bf .L_norm_b !normalize op2 + +.L_mul1: + add r3,r2 + mov.l .L_1023,r1 + + !resultant exponent in r2 + add r1,r2 + mov.l .L_2047,r1 + + !Chk the exponent for overflow + cmp/ge r1,r2 + and r8,r4 + + bt .L_return_inf + mov.l .L_imp_bit,r1 + + or r1,r4 + and r8,r6 + + or r1,r6 + clrt + + !multiplying the mantissas + dmulu.l r7,r5 + + sts macl,r1 !bits 0-31 of product + + sts mach,r3 + + dmulu.l r4,r7 + + sts macl,r8 + + addc r3,r8 + + sts mach,r3 + + dmulu.l r5,r6 + + sts macl,r7 + + movt r9 + clrt + + addc r7,r8 !bits 63-32 of product + + movt r7 + add r7,r9 + + sts mach,r7 + + add r7,r3 + + add r9,r3 + + dmulu.l r4,r6 + + sts macl,r7 + + clrt + addc r7,r3 !bits 64-95 of product + + sts mach,r7 + + mov #0,r5 + addc r5,r7 !bits 96-105 of product + + cmp/eq r5,r1 + mov #1,r4 + + bt .L_skip + or r4,r8 +.L_skip: + mov.l .L_106_bit,r4 + mov r8,r9 + +.L_chk_extra_msb: + !chk if exra MSB is generated + and r7,r4 + cmp/eq r5,r4 + + mov #12,r4 + bf/s .L_shift_rt_by_1 + mov #31,r5 + +.L_pack_mantissa: + !scale the mantissa t0 53 bits + mov #-19,r6 + mov.l .L_mask_high_mant,r5 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r6,r8 +#else + SHLR19 (r8) +#endif + and r3,r5 + + shlr r8 + movt r1 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r4,r5 +#else + SHLL12 (r5) +#endif + add #-1,r6 + + or r5,r8 !lower bits of resulting mantissa +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r6,r3 +#else + SHLR20 (r3) +#endif + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r4,r7 +#else + SHLL12 (r7) +#endif + clrt + + or r7,r3 !higher bits of resulting mantissa + mov #0,r7 + + !chk the exponent for underflow + cmp/ge r2,r7 + bt .L_underflow + + addc r1,r8 !rounding + mov r8,r1 + + addc r7,r3 !rounding + mov.l .L_mask_22_bit,r5 + + and r3,r5 + !chk if extra msb is generated after rounding + cmp/eq r7,r5 + + mov.l .L_mask_high_mant,r8 + bt .L_pack_result + + add #1,r2 + mov.l .L_2047,r6 + + cmp/ge r6,r2 + shlr r1 + + bt .L_return_inf + shlr r3 + + movt r6 + mov #31,r5 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r5,r6 +#else + SHLL31 (r6) +#endif + or r6,r1 + +.L_pack_result: + !pack the result, r2=exponent, r3=higher mantissa, r1=lower mantissa + !r0=sign bit + mov #20,r6 + and r8,r3 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r6,r2 +#else + SHLL20 (r2) +#endif + or r3,r0 + + or r2,r0 + mov.l @r15+,r9 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_norm_a: + !normalize op1 + shll r5 + mov.l .L_imp_bit,r1 + + rotcl r4 + add #-1,r3 + + tst r1,r4 + bt .L_norm_a + + bra .L_chk_b + add #1,r3 + +.L_norm_b: + !normalize op2 + shll r7 + mov.l .L_imp_bit,r1 + + rotcl r6 + add #-1,r2 + + tst r1,r6 + bt .L_norm_b + + bra .L_mul1 + add #1,r2 + +.L_shift_rt_by_1: + !adjust the extra msb + + add #1,r2 !add 1 to exponent + mov.l .L_2047,r6 + + cmp/ge r6,r2 + mov #20,r6 + + bt .L_return_inf + shlr r8 !r8 contains bit 32-63 of product + + movt r6 + shlr r1 !r1 contains bit 31-0 of product + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r5,r6 +#else + SHLL31 (r6) +#endif + shlr r3 !r3 contains bit 64-95 of product + + or r6,r1 + movt r6 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r5,r6 +#else + SHLL31 (r6) +#endif + or r6,r8 + + shlr r7 !r7 contains bit 96-105 of product + movt r6 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r5,r6 +#else + SHLL31 (r6) +#endif + bra .L_pack_mantissa + or r6,r3 + +.L_return_inf: + !return Inf + mov.l .L_inf,r2 + mov #0,r1 + + or r2,r0 + mov.l @r15+,r9 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_underflow: + !check if the result needs to be denormalized + mov #-53,r1 + add #1,r2 + + cmp/gt r2,r1 + mov #32,r4 + + add #-2,r2 + bt .L_return_zero + + add r2,r4 + mov r7,r1 + + cmp/ge r7,r4 + mov r2,r6 + + mov #-54,r2 + bt .L_denorm + + mov #-32,r6 + +.L_denorm: + !denormalize the result + shlr r8 + rotcr r1 + + shll r8 + add #1,r6 + + shlr r3 + rotcr r8 + + cmp/eq r7,r6 + bf .L_denorm + + mov r4,r6 + cmp/eq r2,r4 + + bt .L_break + mov r7,r5 + + cmp/gt r6,r7 + bf .L_break + + mov r2,r4 + mov r1,r5 + + mov r7,r1 + bt .L_denorm + +.L_break: + mov #-31,r2 + mov r1,r6 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r2,r6 +#else + SHLR31 (r6) +#endif + clrt + + addc r6,r8 + mov.l .L_comp_1,r4 + + addc r7,r3 + or r3,r0 + + cmp/eq r9,r7 + bf .L_return + + cmp/eq r7,r5 + mov.l .L_mask_sign,r6 + + bf .L_return + cmp/eq r1,r6 + + bf .L_return + and r4,r8 + +.L_return: + mov.l @r15+,r9 + mov r8,r1 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + rts + mov.l @r15+,r8 + +.L_return_zero: + mov.l @r15+,r9 + mov r7,r1 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + + rts + mov.l @r15+,r8 + + .align 2 + +.L_mask_high_mant: + .long 0x000fffff +.L_inf: + .long 0x7ff00000 +.L_mask_sign: + .long 0x80000000 +.L_1023: + .long -1023 +.L_2047: + .long 2047 +.L_imp_bit: + .long 0x00100000 +.L_mask_22_bit: + .long 0x00200000 +.L_106_bit: + .long 0x00000200 +.L_comp_1: + .long 0xfffffffe + +ENDFUNC (GLOBAL (muldf3)) Index: gcc/config/sh/IEEE-754/DF/negdf2.s =================================================================== RCS file: negdf2.s diff -N negdf2.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsCwOsbc Wed Aug 4 19:58:21 2004 @@ -0,0 +1,47 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!negation of given double precision floating point number +!Author:Rakesh Kumar +! +!Entry: +!r4,r5:operand +! +!Exit: +!r0,r1:result +! +!Note:argument is passed in regs r4 and r5 and the result is returned in +!regs r0 and r1. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (negdf2) + FUNC (GLOBAL (negdf2)) + +GLOBAL (negdf2): + +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r5,r4 + mov r1,r5 +#endif + mov.l .L_sign_bit,r2 + mov r4,r0 + + xor r2,r0 + mov r5,r1 + +#ifdef __LITTLE_ENDIAN__ + mov r0,r2 + mov r1,r0 + mov r2,r1 +#endif + + rts + nop + + .align 2 +.L_sign_bit: + .long 0x80000000 + +ENDFUNC (GLOBAL (negdf2)) Index: gcc/config/sh/IEEE-754/DF/truncdfsf2.s =================================================================== RCS file: truncdfsf2.s diff -N truncdfsf2.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsMOtgYa Wed Aug 4 19:58:21 2004 @@ -0,0 +1,242 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Routine for conversion from double to float + +! Author: Rakesh Kumar + +! Argument: r4-r5 +! Result: r0 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (truncdfsf2) + FUNC (GLOBAL (truncdfsf2)) + +GLOBAL (truncdfsf2): +#ifdef __LITTLE_ENDIAN__ + mov r4,r2 +#endif + mov #0,r1 +#ifdef __LITTLE_ENDIAN__ + mov r5,r4 + mov r2,r5 +#endif + mov.l .L_sign,r0 + + ! Extract the sign in r0 + mov.l .L_mask_sign,r3 + and r4,r0 + + ! Mask out the sign bit + mov.l .L_inf,r2 + and r3,r4 + + ! Extract exponent + cmp/eq r1,r4 + mov r2,r6 + + mov.l .L_max_exp,r3 + bt/s .L_ret_zero + and r4,r2 + + cmp/eq r6,r2 + mov #-20,r7 + + bt .L_inv_exp +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r7,r2 +#else + SHLR20 (r2) +#endif + + ! r3 has maximum exponent that can be + ! biased for an SFmode float + mov.l .L_min_exp,r7 + cmp/gt r3,r2 + + mov.l .L_min_exp,r3 + bt/s .L_ret_inf + sub r3,r2 ! r2 has exponent + + mov.l .L_high_mant,r6 + mov #23,r7 + + ! Exponent is large enough ?? + cmp/gt r1,r2 + and r6,r4 + + mov #0,r6 + bf/s .L_denorm ! Too small + mov r6,r3 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r7,r2 +#else + SHLL23 (r2) +#endif + +! Fit the mantissa in 23 bits +.L_rnd_mant: + ! Round mantissa in r5 + mov.l .L29neg,r7 + +.L_rnd_1: + shlr r5 + rotcr r3 + rotcr r6 + + add #1,r7 + + cmp/eq r1,r7 + bf .L_rnd_1 + + shll2 r4 + mov.l .L_sign,r1 + + shll r4 + or r4,r5 + + ! Round + mov.l .L_24bit,r7 + + tst r1,r3 + bt .L_rnd_4 + + add #1,r5 + + cmp/eq r1,r3 + bf .L_rnd_3 + + tst r6,r6 + bf .L_rnd_3 + + ! Halfway between two numbers. + ! Round towards LSB=0 + shlr r5 + shll r5 + +.L_rnd_3: + ! Rounding might have produced extra MSB + tst r7,r5 + not r7,r1 + + bt .L_rnd_4 + + add r7,r2 + and r1,r5 + +.L_rnd_4: + or r5,r0 + rts + or r2,r0 + +! r0 already has zero with appropriate sign +.L_ret_zero: + rts + nop + +.L_ret_inf: + mov.l .L_inf_sf,r1 + + rts + or r1,r0 + +! Nan or Inf +.L_inv_exp: + mov.l .L_high_mant,r6 + cmp/hi r1,r5 + + mov.l .L_inf_sf,r3 + bt/s .L_return_NaN + and r6,r4 + + cmp/gt r1,r4 + bt .L_return_NaN + + rts + or r3,r0 + +.L_return_NaN: + mov.l .L_inf_sf,r1 + mov #1,r2 + + or r1,r0 + rts + or r2,r0 + +.L_denorm: + mov.l .L_21bit,r7 + or r7,r4 + + add #-1,r2 ! Exponent bias is 126 + +! Make the exponent zero +.L_1: + shlr r4 + rotcr r5 + + rotcr r3 + rotcr r6 + + add #1,r2 + + cmp/eq r1,r2 + bf .L_1 + + bra .L_rnd_mant + nop + +! Apply default rounding mode +.L_rnd: + clrt + addc r3,r5 + + addc r6,r4 + + add #1,r6 + cmp/eq r6,r1 + + bt .L_4 + + bra .L_rnd_mant + nop + +! Round towards LSB = 0 +.L_4: + shlr r5 + bra .L_rnd_mant + shll r5 + + .align 2 +.L_high_mant: + .long 0x000FFFFF + +.L_sign: + .long 0x80000000 + +.L_mask_sign: + .long 0x7FFFFFFF + +.L_inf_sf: + .long 0x7F800000 + +.L_inf: + .long 0x7FF00000 + +.L_max_exp: + .long 0x47E + +.L_min_exp: + .long 0x380 + +.L29neg: + .long 0xFFFFFFE3 + +.L_21bit: + .long 0x00100000 + +.L_24bit: + .long 0x00800000 + +ENDFUNC (GLOBAL (truncdfsf2)) Index: gcc/config/sh/IEEE-754/DF/unorddf2.s =================================================================== RCS file: unorddf2.s diff -N unorddf2.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsmYFlL9 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,113 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!checks if any of the arguments is NaN. +!Author:Rakesh Kumar +! +!Entry: +!r4,r5:operand 1 +! +!r6,r7:operand 2 +! +!Exit: +!r0:result +! +!Note:argument 1 is passed in regs r4 and r5 and argument 2 is passed in regs r6 and +!r7,the result is returned in reg r0. +! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (unorddf2) + FUNC (GLOBAL (unorddf2)) + +GLOBAL (unorddf2): +! Do not use fmov.d. It can't be used with little endian. +! Better take safer route. Also, this function will not be +! used very often +#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) + fmov.s fr4,@-r15 + fmov.s fr5,@-r15 + mov.l @r15+,r5 + mov.l @r15+,r4 + + fmov.s fr6,@-r15 + fmov.s fr7,@-r15 + mov.l @r15+,r7 + mov.l @r15+,r6 +#endif + +#ifdef __LITTLE_ENDIAN__ + mov r4,r1 + mov r5,r4 + mov r1,r5 + + mov r6,r1 + mov r7,r6 + mov r1,r7 +#endif + ! Extract the exponents + mov.l .L_exp,r2 + mov #0,r1 + + mov r2,r0 + mov r2,r3 + + ! Compare the exponent of a + and r4,r2 + cmp/eq r0,r2 + + and r6,r3 + bt .L_a_NaN + + ! Compare the exponent of b + cmp/eq r0,r3 + + bt .L_b_NaN + + rts + mov r1,r0 + +.L_a_NaN: + ! Extract the high bits of mantissa + mov.l .L_high_mant,r2 + cmp/gt r1,r5 + + and r4,r2 + bt .L_return ! a is NaN + + cmp/gt r1,r2 + bt .L_return ! NaN found + + ! Check b for NaN + cmp/eq r0,r3 + bt .L_b_NaN + + rts + mov r1,r0 ! Return zero + +.L_b_NaN: + ! Extract high bits of mantissa + mov.l .L_high_mant,r2 + cmp/gt r1,r7 + + and r6,r2 + bt .L_return ! b is NaN + + cmp/gt r1,r2 + bt .L_return ! NaN found + + rts + mov r1,r0 ! Return zero + +.L_return: + rts + mov #1,r0 + + .align 2 +.L_high_mant: + .long 0x000FFFFF + +.L_exp: + .long 0x7FF00000 + +ENDFUNC (GLOBAL (unorddf2)) Index: gcc/config/sh/IEEE-754/SF/addsf3.s =================================================================== RCS file: addsf3.s diff -N addsf3.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsBPByy8 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,510 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Add floating point numbers in r4, r5. + +! Author: Rakesh Kumar + +! Arguments are in r4, r5 and result in r0 + +! Entry points: ___subsf3, ___addsf3 + +! r4 and r5 are referred as op1 and op2 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (subsf3) + .global GLOBAL (addsf3) + FUNC (GLOBAL (subsf3)) + FUNC (GLOBAL (addsf3)) + +GLOBAL (subsf3): + mov.l .L_sign_bit,r1 + xor r1,r5 + +GLOBAL (addsf3): + mov.l r8,@-r15 + mov r4,r3 + + mov.l .L_pinf,r2 + mov #0,r8 + + and r2,r3 ! op1's exponent. + mov r5,r6 + + ! Check NaN or Infinity + and r2,r6 ! op2's exponent. + cmp/eq r2,r3 + + ! go if op1 is NaN or INF. + mov.l .L_sign_bit,r0 + bt/s .L_inv_op1 + mov #-23,r1 + + ! Go if op2 is NaN/INF. + cmp/eq r2,r6 + mov r0,r7 + bt .L_ret_op2 + +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLR23 (r3) +#else + shld r1,r3 +#endif +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLR23 (r6) +#else + shld r1,r6 +#endif + + ! Check for negative zero + cmp/eq r0,r5 + + mov r5,r1 + bt/s .L_ret_op1 + and r7,r1 + + cmp/eq r0,r4 + bt .L_ret_op2 + + ! if op1 is zero return op2 + tst r4,r4 + bt .L_ret_op2 + + ! Equal numbers with opposite sign + mov r4,r2 + xor r5,r2 + + cmp/eq r0,r2 + bt .L_ret_zero + + ! if op2 is zero return op1 + mov.l .L_mask_fra,r2 + tst r5,r5 + + ! Extract the mantissa + mov r4,r0 + bt/s .L_ret_op1 + and r2,r5 + + and r2,r4 + + mov.l .L_imp_bit,r2 + and r7,r0 ! sign bit of op1 + + ! Check for denormals + tst r3,r3 + bt .L_norm_op1 + + ! Attach the implicit bit + or r2,r4 + tst r6,r6 + + bt .L_norm_op2 + + or r2,r5 + tst r0,r0 + + ! operands are +ve or -ve?? + bt .L_ptv_op1 + + neg r4,r4 + +.L_ptv_op1: + tst r1,r1 + bt .L_ptv_op2 + + neg r5,r5 + +! Test exponents for equality +.L_ptv_op2: + cmp/eq r3,r6 + bt .L_exp_eq + +! Make exponents of two arguments equal +.L_exp_ne: + ! r0, r1 contain sign bits. + ! r4, r5 contain mantissas. + ! r3, r6 contain exponents. + ! r2, r7 scratch. + + ! Calculate result exponent. + mov r6,r2 + sub r3,r2 ! e2 - e1 + + cmp/pl r2 + mov #23,r7 + + ! e2 - e1 is -ve + bf .L_exp_ne_1 + + mov r6,r3 ! Result exp. + cmp/gt r7,r2 ! e2-e1 > 23 + + mov #1,r7 + bt .L_pack_op2_0 + + ! Align the mantissa +.L_loop_ne: + shar r4 + + rotcr r8 + cmp/eq r7,r2 + + add #-1,r2 + bf .L_loop_ne + + bt .L_exp_eq + +! Exponent difference is too high. +! Return op2 after placing pieces in proper place +.L_pack_op2_0: + ! If op1 is -ve + tst r1,r1 + bt .L_pack_op2 + + neg r5,r5 + +! r6 has exponent +! r5 has mantissa, r1 has sign +.L_pack_op2: + mov.l .L_nimp_bit,r2 + mov #23,r3 + + mov r1,r0 + + and r2,r5 + mov.l @r15+,r8 + + or r5,r0 + +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLL23 (r6) +#else + shld r3,r6 +#endif + rts + or r6,r0 + +! return op1. It is NAN or INF or op2 is zero. +.L_ret_op1: + mov r4,r0 + + rts + mov.l @r15+,r8 + +! return zero +.L_ret_zero: + mov #0,r0 + + rts + mov.l @r15+,r8 + +! return op2. It is NaN or INF or op1 is zero. +.L_ret_op2: + mov r5,r0 + + rts + mov.l @r15+,r8 + +! op2 is denormal. Normalize it. +.L_norm_op2: + shll r5 + add #-1,r6 + + tst r2,r5 + bt .L_norm_op2 + + ! Check sign + tst r1,r1 + bt .L_norm_op2_2 + + neg r5,r5 + +.L_norm_op2_2: + add #1,r6 + cmp/eq r3,r6 + + bf .L_exp_ne + bt .L_exp_eq + +! Normalize op1 +.L_norm_op1: + shll r4 + add #-1,r3 + + tst r2,r4 + bt .L_norm_op1 + + ! Check sign + tst r0,r0 + bt .L_norm_op1_1 + + neg r4,r4 + +.L_norm_op1_1: + ! Adjust biasing + add #1,r3 + + ! Check op2 for denormalized value + tst r6,r6 + bt .L_norm_op2 + + mov.l .L_imp_bit,r2 + + tst r1,r1 ! Check sign + or r2,r5 ! Attach 24th bit + + bt .L_norm_op1_2 + + neg r5,r5 + +.L_norm_op1_2: + cmp/eq r3,r6 + + bt .L_exp_eq + bf .L_exp_ne + +! op1 is NaN or Inf +.L_inv_op1: + ! Return op1 if it is NAN. + ! r2 is infinity + cmp/gt r2,r4 + bt .L_ret_op1 + + ! op1 is +/- INF + ! If op2 is same return now. + cmp/eq r4,r5 + bt .L_ret_op1 + + ! return op2 if it is NAN + cmp/gt r2,r5 + bt .L_ret_op2 + + ! Check if op2 is inf + cmp/eq r2,r6 + bf .L_ret_op1 + + ! Both op1 and op2 are infinities + !of opp signs, or there is -NAN. Return a NAN. + add #1,r2 + + mov.l @r15+,r8 + rts + mov r2,r0 + +! Make unequal exponents equal. +.L_exp_ne_1: + mov #-25,r7 + cmp/gt r2,r7 ! -23 > e2 - e1 + + add #1,r2 + bf .L_exp_ne_2 + + tst r0,r0 + bt .L_pack_op1 + +.L_pack_op1_0: + bra .L_pack_op1 + neg r4,r4 + +! Accumulate the shifted bits in r8 +.L_exp_ne_2: + ! Shift with rounding + shar r5 + rotcr r8 + + tst r2,r2 + + add #1,r2 + bf .L_exp_ne_2 + +! Exponents of op1 and op2 are equal (or made so) +! The mantissas are in r4-r5 and remaining bits in r8 +.L_exp_eq: + add r5,r4 ! Add fractions. + mov.l .L_sign_bit,r2 + + ! Check for negative result + mov #0,r0 + tst r2,r4 + + mov.l .L_255,r5 + bt .L_post_add + + neg r4,r4 + or r2,r0 + +.L_post_add: + ! Check for extra MSB + mov.l .L_chk_25,r2 + + tst r2,r4 + bt .L_imp_check + + shar r4 + rotcr r8 + + add #1,r3 + cmp/ge r5,r3 + + ! Return Inf if exp > 254 + bt .L_ret_inf + +! Check for implicit (24th) bit in result +.L_imp_check: + mov.l .L_imp_bit,r2 + tst r2,r4 + + bf .L_pack_op1 + +! Result needs left shift +.L_lft_shft: + shll r8 + rotcl r4 + + add #-1,r3 + tst r2,r4 + + bt .L_lft_shft + +! Pack the result after rounding +.L_pack_op1: + ! See if denormalized result is possible + mov.l .L_chk_25,r5 + cmp/pl r3 + + bf .L_denorm_res + + ! Are there any bits shifted previously? + tst r8,r8 + bt .L_pack_1 + + ! Round + shll r8 + movt r6 + + add r6,r4 + + ! If we are halfway between two numbers, + ! round towards LSB = 0 + tst r8,r8 + + bf .L_pack_1 + + shlr r4 + shll r4 + +.L_pack_1: + ! Adjust extra MSB generated after rounding + tst r4,r5 + mov.l .L_255,r2 + + bt .L_pack_2 + shar r4 + + add #1,r3 + cmp/ge r2,r3 ! Check for exp overflow + + bt .L_ret_inf + +! Pack it finally +.L_pack_2: + ! Do not store implicit bit + mov.l .L_nimp_bit,r2 + mov #23,r1 + + and r2,r4 + +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLL23 (r3) +#else + shld r1,r3 +#endif + mov.l @r15+,r8 + + or r4,r0 + rts + or r3,r0 + +! Return infinity +.L_ret_inf: + mov.l .L_pinf,r2 + + mov.l @r15+,r8 + rts + or r2,r0 + +! Result must be denormalized +.L_denorm_res: + mov #0,r2 + +! Denormalizing loop with rounding +.L_den_1: + shar r4 + movt r6 + + tst r3,r3 + bt .L_den_2 + + ! Increment the exponent + add #1,r3 + + tst r6,r6 + bt .L_den_0 + + ! Count number of ON bits shifted + add #1,r2 + +.L_den_0: + bra .L_den_1 + nop + +! Apply rounding +.L_den_2: + cmp/eq r6,r1 + bf .L_den_3 + + add r6,r4 + mov #1,r1 + + ! If halfway between two numbers, + ! round towards LSB = 0 + cmp/eq r2,r1 + bf .L_den_3 + + shar r4 + shll r4 + +.L_den_3: + + mov.l @r15+,r8 + rts + or r4,r0 + + .align 2 +.L_imp_bit: + .long 0x00800000 + +.L_nimp_bit: + .long 0xFF7FFFFF + +.L_mask_fra: + .long 0x007FFFFF + +.L_pinf: + .long 0x7F800000 + +.L_sign_bit: + .long 0x80000000 + +.L_bit_25: + .long 0x01000000 + +.L_chk_25: + .long 0x7F000000 + +.L_255: + .long 0x000000FF + +ENDFUNC (GLOBAL (addsf3)) +ENDFUNC (GLOBAL (subsf3)) Index: gcc/config/sh/IEEE-754/SF/compsf2.s =================================================================== RCS file: compsf2.s diff -N compsf2.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsvr41l7 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,345 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Routines for comparison two float values + +! Author: Aanchal Khanna + +! Arguments are in r4 and r5 +! Result in r0 + +! Entry points: +! ___gtsf2, ___gesf2, ___ltsf2, +! ___lesf2, ___eqsf2, ___nesf2 + +! r4 is referred as op1 +! r5 is referred as op2 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + .text + .align 5 + .global GLOBAL (gtsf2) + FUNC (GLOBAL (gtsf2)) + +GLOBAL (gtsf2): + bra GLOBAL (compsf2) + mov #-1,r7 + + .text + .align 5 + .global GLOBAL (gesf2) + FUNC (GLOBAL (gesf2)) + +GLOBAL (gesf2): + bra GLOBAL (compsf2) + mov #-1,r7 + + .text + .align 5 + .global GLOBAL (lesf2) + FUNC (GLOBAL (lesf2)) +GLOBAL (lesf2): + bra GLOBAL (compsf2) + mov #1,r7 + + .text + .align 5 + .global GLOBAL (ltsf2) + FUNC (GLOBAL (ltsf2)) + +GLOBAL (ltsf2): + bra GLOBAL (compsf2) + mov #1,r7 + + .text + .align 5 + .global GLOBAL (eqsf2) + FUNC (GLOBAL (eqsf2)) + +GLOBAL (eqsf2): + bra GLOBAL (compsf2) + mov #1,r7 + + .text + .align 5 + .global GLOBAL (nesf2) + FUNC (GLOBAL (nesf2)) +GLOBAL (nesf2): + bra GLOBAL (compsf2) + mov #1,r7 + +!compares the two arguments and returns the result as +! +!op1 < op2 return -1 +!op1 == op2 return 0 +!op1 > op2 return 1 + + .text + .align 5 + .global GLOBAL (compsf2) + FUNC (GLOBAL (compsf2)) + +GLOBAL (compsf2): + !op1=op2=0,return 0 + mov.l .L_mask_sign,r3 + tst r4,r4 + + mov.l .L_p_infinity,r1 + bt .L_op1_zero + + mov r1,r2 + cmp/eq r3,r4 + + and r4,r2 + bt .L_op1_zero + +.L_op1_not_zero: + tst r5,r5 + mov.l .L_mask_mant,r3 + + bf .L_cmp + mov.l .L_mask_sign,r6 + + !op2=0,check if op1 is NaN or Inf + cmp/eq r2,r1 + and r4,r3 + + and r4,r6 + bf .L_op1_not_nan + + tst r3,r3 + bt .L_op1_not_nan + + rts + mov r7,r0 + +.L_op1_not_nan: + mov #1,r0 + tst r6,r6 + + !op2=0, op1 +ve, return 1 else return -1 + bt .L_return + mov #-1,r0 + +.L_return: + rts + nop + +.L_op1_zero: + cmp/eq r5,r3 + mov r1,r6 + + bt .L_ret_zero + and r5,r6 + + tst r5,r5 + bt .L_ret_zero + +.L_op2_not_zero: + !Check if op2 is NaN + mov #-1,r4 + mov.l .L_mask_mant,r3 + + mov.l .L_mask_sign,r2 + cmp/eq r6,r1 + + and r5,r2 + bf .L_op2_not_nan + + and r5,r3 + !Check if op2 is Inf + tst r3,r3 + + !op2 NaN, return 1 + bt .L_op2_not_nan + mov r7,r0 + + rts + nop + +.L_op2_not_nan: + mov r4,r0 + tst r2,r2 + + !op1=0, op2 +ve, return -1 else return 1 + bt .L_return + mov #1,r0 + + rts + nop + +.L_cmp: + !op1 and op2 are not zero, check for Inf or NaN + cmp/eq r2,r1 + mov r1,r6 + + bf .L_chk_exp_op2 + and r4,r3 + + tst r3,r3 + bt .L_op1_inf + + rts + mov r7,r0 + +.L_op1_inf: + !op1 is Inf, check op2 for Inf or NaN + and r5,r6 + mov.l .L_mask_sign,r2 + + cmp/eq r6,r1 + mov r2,r3 + + and r4,r2 + bt/s .L_chk_mant_op2 + mov #1,r0 + + !op1 +Inf, return 1 + tst r2,r2 + !op1 -Inf, return -1 + bt .L_return + + rts + mov #-1,r0 + +.L_chk_exp_op2: + !op1 is a normal no, check op2 + and r5,r6 + mov.l .L_mask_sign,r2 + + cmp/eq r6,r1 + and r5,r3 + + bf .L_normal_nos + and r5,r2 + + tst r3,r3 + !op2 is NaN, return 1 + bt .L_op2_inf + + rts + mov r7,r0 + +.L_op2_inf: + !op2 +Inf, return -1 + tst r2,r2 + mov #-1,r0 + + !op2 -Inf, return 1 + bt .L_return + mov #1,r0 + + rts + nop + +.L_chk_mant_op2: + !op1 Inf,Check if op2 is NaN + mov.l .L_mask_mant,r6 + and r5,r6 + + tst r6,r6 + and r5,r3 + + mov r7,r0 + !B is nan,return 1 + bf .L_return + !Sign of op1 and op2 are same, op1=op2=Inf,return 0 + cmp/eq r3,r2 + + mov #0,r0 + bt .L_return + +.L_sign_not_same: + !op1 -ve return -1 else return 1 + cmp/hi r3,r2 + mov #-1,r0 + + bt .L_return + mov #1,r0 + + rts + nop + +.L_ret_zero: + rts + mov #0,r0 + +.L_normal_nos: + !op1 and op2 normal nos, compare signs + mov r2,r3 + and r4,r2 + + and r5,r3 + !Sign of op1 and op2 are same + cmp/eq r2,r3 + bf .L_sign_not_same + +.L_cmp_exps: + !If signs are same, compare exps + mov r1,r6 + and r4,r1 + + and r5,r6 + mov.l .L_mask_mant,r3 + + cmp/eq r6,r1 + bt .L_cmp_mant + + cmp/hi r6,r1 + bf .L_a_smaller + +.L_a_greater: + !If exp or mantissa of op1 is greater, check sign of op1 + tst r2,r2 + mov #1,r0 + + bt .L_return + mov #-1,r0 + + rts + nop + +.L_cmp_mant: + !If exps are equal, compare mantissas + mov r3,r1 + and r3,r4 + + and r1,r5 + cmp/eq r4,r5 + bt .L_ret_zero + + cmp/hi r4,r5 + bf .L_a_greater + +.L_a_smaller: + !If exp or mantissa of op1 is smaller, check sign of op1 + tst r2,r2 + mov #-1,r0 + + bt .L_return + mov #1,r0 + + rts + nop + + .align 2 + +.L_p_infinity: + .long 0x7f800000 + +.L_mask_sign: + .long 0x80000000 + +.L_minus_1: + .long 0xbf800000 + +.L_plus_1: + .long 0x3f800000 + +.L_mask_mant: + .long 0x007fffff + +ENDFUNC (GLOBAL (compsf2)) +ENDFUNC (GLOBAL (nesf2)) +ENDFUNC (GLOBAL (eqsf2)) +ENDFUNC (GLOBAL (ltsf2)) +ENDFUNC (GLOBAL (lesf2)) +ENDFUNC (GLOBAL (gesf2)) +ENDFUNC (GLOBAL (gtsf2)) Index: gcc/config/sh/IEEE-754/SF/divsf3.s =================================================================== RCS file: divsf3.s diff -N divsf3.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsbgLG95 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,381 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!divides two single precision floating point + +! Author: Aanchal Khanna + +! Arguments: Dividend is in r4, divisor in r5 +! Result: r0 + +! r4 and r5 are referred as op1 and op2 resp. + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (divsf3) + FUNC (GLOBAL (divsf3)) + +GLOBAL (divsf3): + mov.l .L_mask_sign,r1 + mov r4,r3 + + xor r5,r3 + shll r4 + + shlr r4 + mov.l .L_inf,r2 + + and r3,r1 !r1=resultant sign + mov r4,r6 + + shll r5 + mov #0,r0 + + shlr r5 + and r2,r6 + + cmp/eq r2,r6 + mov r5,r7 + + and r2,r7 + bt .L_op1_inv + + cmp/eq r2,r7 + mov #-23,r3 + + bt .L_op2_inv +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLR23 (r6) + SHLR23 (r7) +#else + shld r3,r6 + shld r3,r7 +#endif + + cmp/eq r0,r4 + + bt .L_op1_zero !dividend=0 + cmp/eq r0,r6 + + mov.l .L_imp_bit,r3 + bt .L_norm_op1 !normalize dividend +.L_chk_op2: + cmp/eq r0,r5 + bt .L_op2_zero !divisor=0 + + cmp/eq r0,r7 + bt .L_norm_op2 !normalize divisor + +.L_div1: + sub r7,r6 + add #127,r6 !r6=resultant exponent + + mov r3,r7 + mov.l .L_mask_mant,r3 + + and r3,r4 + !chk exponent for overflow + mov.l .L_255,r2 + + and r3,r5 + or r7,r4 + + cmp/ge r2,r6 + or r7,r5 + + bt .L_return_inf + mov r0,r2 + + cmp/eq r4,r5 + bf .L_den_one + + cmp/ge r6,r0 + !numerator=denominator, quotient=1, remainder=0 + mov r7,r2 + + mov r0,r4 + !chk exponent for underflow + bt .L_underflow + bra .L_pack + nop + +.L_den_one: + !denominator=1, result=numerator + + cmp/eq r7,r5 + bf .L_divide + + !chk exponent for underflow + cmp/ge r6,r0 + mov r4,r2 + + bt/s .L_underflow + mov r0,r4 + bra .L_pack + nop + +.L_divide: + !dividing the mantissas r4<-dividend, r5<-divisor + + cmp/hi r4,r5 + bf .L_loop + + shll r4 ! if mantissa(op1)< mantissa(op2) + add #-1,r6 ! shift left the numerator and decrease the exponent. + +.L_loop: + !division loop + + cmp/ge r5,r4 + bf .L_skip + + or r7,r2 + sub r5,r4 + +.L_skip: + shlr r7 + shll r4 + + cmp/eq r0,r7 + bf .L_loop + + !chk the exponent for underflow + cmp/ge r6,r0 + bt .L_underflow + + !apply rounding + cmp/gt r5,r4 + bt .L_round1 + + cmp/eq r4,r5 + bt .L_round2 + +.L_pack: + !pack the result, r1=sign, r2=quotient, r6=exponent + + mov #23,r4 + and r3,r2 + +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLL23 (r6) +#else + shld r4,r6 +#endif + or r2,r1 + + or r6,r1 + mov r1,r0 + + rts + nop + +.L_round1: + !Apply proper rounding + + bra .L_pack + add #1,r2 + +.L_round2: + !Apply proper rounding + + mov.l .L_comp_1,r5 + bra .L_pack + and r5,r2 + +.L_op1_inv: + !chk if op1 is Inf or NaN + + mov.l .L_mask_mant,r3 + mov r4,r6 + + and r3,r6 + cmp/hi r0,r6 + + bt .L_ret_op1 + cmp/eq r2,r7 + + bf/s .L_ret_op1 + mov r1,r0 + add #1,r4 + + rts + mov r4,r0 + +.L_op2_inv: + !chk if op2 is Inf or NaN + + mov.l .L_mask_mant,r3 + mov r5,r7 + + and r3,r7 + cmp/hi r0,r7 + + bt .L_ret_op2 + mov r1,r0 + + rts + nop + +.L_op1_zero: + !op1 is zero. If op2 is zero, return NaN, else return zero + + cmp/eq r0,r5 + add #1,r2 + + bf .L_ret_op1 + mov r2,r0 + + rts + nop + +.L_op2_zero: + !B is zero,return Inf + + rts + or r2,r0 + +.L_return_inf: + mov.l .L_inf,r0 + + rts + or r1,r0 + +.L_norm_op1: + !normalize dividend + + shll r4 + tst r2,r4 + + add #-1,r6 + bt .L_norm_op1 + + bra .L_chk_op2 + add #1,r6 + +.L_norm_op2: + !normalize divisor + + shll r5 + tst r2,r5 + + add #-1,r7 + bt .L_norm_op2 + + bra .L_div1 + add #1,r7 + +.L_underflow: + !denormalize the result + + add #1,r6 + mov #-24,r7 + + cmp/gt r6,r7 + mov r2,r5 + + bt .L_return_zero + add #-1,r6 + + mov #32,r3 + neg r6,r7 + + add #1,r7 +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r6,r2 +#else + cmp/ge r0,r6 + bf .L_mov_right + +.L_mov_left: + cmp/eq r0,r6 + bt .L_out + + shll r2 + bra .L_mov_left + add #-1,r6 + +.L_mov_right: + cmp/eq r0,r6 + bt .L_out + + add #1,r6 + bra .L_mov_right + shlr r2 + +.L_out: +#endif + sub r7,r3 + +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r3,r5 +#else + cmp/ge r0,r3 + bf .L_mov_right_1 + +.L_mov_left_1: + shll r5 + add #-1,r3 + + cmp/eq r0,r3 + bf .L_mov_left_1 + + bt .L_out_1 + +.L_mov_right_1: + cmp/eq r0,r3 + bt .L_out_1 + + add #1,r3 + bra .L_mov_right_1 + shlr r5 + +.L_out_1: +#endif + shlr r2 + addc r0,r2 + + cmp/eq r4,r0 !r4 contains the remainder + mov r2,r0 + + mov.l .L_mask_sign,r7 + bf .L_return + + mov.l .L_comp_1,r2 + cmp/eq r7,r5 + + bf .L_return + and r2,r0 + +.L_return: + rts + or r1,r0 + +.L_ret_op1: + rts + or r4,r0 + +.L_ret_op2: + rts + or r5,r0 + +.L_return_zero: + rts + or r1,r0 + + + + .align 2 +.L_inf: + .long 0x7f800000 +.L_mask_sign: + .long 0x80000000 +.L_mask_mant: + .long 0x007fffff +.L_imp_bit: + .long 0x00800000 +.L_comp_1: + .long 0xfffffffe +.L_255: + .long 255 + +ENDFUNC (GLOBAL (divsf3)) Index: gcc/config/sh/IEEE-754/SF/fixsfsi.s =================================================================== RCS file: fixsfsi.s diff -N fixsfsi.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsv8uHX4 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,139 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Conversion routine for float to integer + +! Author: Rakesh Kumar + +! Arguments: r4 (in floating point format) +! Return: r0 + +! r4 is referred as op1 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (fixsfsi) + FUNC (GLOBAL (fixsfsi)) + +GLOBAL (fixsfsi): + mov.l .L_mask_sign,r7 + mov r4,r2 + + ! Check for NaN + mov.l .L_inf,r1 + and r7,r2 + + cmp/gt r1,r2 + mov #127,r5 + + mov r4,r3 + bt/s .L_epil + mov #0,r0 + + shll r2 + mov.l .L_frac,r6 + + shlr16 r2 + and r6,r3 ! r3 has fraction + + shlr8 r2 ! r2 has exponent + mov.l .L_24bit,r1 + + ! If exponent is less than 127, return 0 + cmp/gt r2,r5 + or r1,r3 ! Set the implicit bit + + mov.l .L_157,r1 + bt/s .L_epil + shll8 r3 + + ! If exponent is greater than 157, + ! return the maximum/minumum integer + ! value deducing from sign + cmp/gt r1,r2 + sub r2,r1 + + mov.l .L_sign,r2 + bt/s .L_ret_max + add #1,r1 + + and r4,r2 ! Sign in r2 + neg r1,r1 + + ! Shift mantissa by exponent difference from 157 +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r1,r3 +#else + cmp/gt r0,r1 + bt .L_mov_left + +.L_mov_right: + cmp/eq r1,r0 + bt .L_ret + + add #1,r1 + bra .L_mov_right + + shlr r3 + +.L_mov_left: + add #-1,r1 + + shll r3 + cmp/eq r1,r0 + + bf .L_mov_left +.L_ret: +#endif + ! If op1 is negative, negate the result + cmp/eq r0,r2 + bf/s .L_negate + mov r3,r0 + +! r0 has the appropriate value +.L_epil: + rts + nop + +! Return the max/min integer value +.L_ret_max: + and r4,r2 ! Sign in r2 + mov.l .L_max,r3 + + mov.l .L_sign,r1 + cmp/eq r0,r2 + + mov r3,r0 + bt .L_epil + + ! Negative number, return min int + rts + mov r1,r0 + +! Negate the result +.L_negate: + rts + neg r0,r0 + + .align 2 +.L_inf: + .long 0x7F800000 + +.L_157: + .long 157 + +.L_max: + .long 0x7FFFFFFF + +.L_frac: + .long 0x007FFFFF + +.L_sign: + .long 0x80000000 + +.L_24bit: + .long 0x00800000 + +.L_mask_sign: + .long 0x7FFFFFFF + +ENDFUNC (GLOBAL (fixsfsi)) Index: gcc/config/sh/IEEE-754/SF/fixunssfsi.s =================================================================== RCS file: fixunssfsi.s diff -N fixunssfsi.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvstWANL3 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,129 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Conversion from floating point to unsigned integer + +! Author: Rakesh Kumar + +! Argument: r4 (in floating point format) +! Result: r0 + +! For negative floating point numbers, it returns zero + +! The argument is referred as op1 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (fixunssfsi) + FUNC (GLOBAL (fixunssfsi)) + +GLOBAL (fixunssfsi): + mov.l .L_sign,r0 + mov r4,r2 + + ! Check for NaN + mov.l .L_inf,r1 + and r4,r0 + + mov.l .L_mask_sign,r7 + mov #127,r5 + + ! Remove sign bit + cmp/eq #0,r0 + and r7,r2 + + ! If number is negative, return 0 + ! LIBGCC deviates from standard in this regard. + mov r4,r3 + bf/s .L_epil + mov #0,r0 + + mov.l .L_frac,r6 + cmp/gt r1,r2 + + shll r2 + bt/s .L_epil + shlr16 r2 + + shlr8 r2 ! r2 has exponent + mov.l .L_24bit,r1 + + and r6,r3 ! r3 has fraction + cmp/gt r2,r5 + + ! If exponent is less than 127, return 0 + or r1,r3 + bt .L_epil + + ! Process only if exponent is less than 158 + mov.l .L_158,r1 + shll8 r3 + + cmp/gt r1,r2 + sub r2,r1 + + neg r1,r1 + bt/s .L_ret_max + +! Shift the mantissa with exponent difference from 158 +#if !defined (__sh1__) && !defined (__sh2__) && !defined (__SH2E__) + shld r1,r3 +#else + cmp/gt r0,r1 + bt .L_mov_left + +.L_mov_right: + cmp/eq r1,r0 + bt .L_ret + + add #1,r1 + bra .L_mov_right + shlr r3 + +.L_mov_left: + add #-1,r1 + + shll r3 + cmp/eq r1,r0 + + bf .L_mov_left + +.L_ret: +#endif + rts + mov r3,r0 + +! r0 already has appropriate value +.L_epil: + rts + nop + +! Return the maximum unsigned integer value +.L_ret_max: + mov.l .L_max,r3 + + rts + mov r3,r0 + + .align 2 +.L_inf: + .long 0x7F800000 + +.L_158: + .long 158 + +.L_max: + .long 0xFFFFFFFF + +.L_frac: + .long 0x007FFFFF + +.L_sign: + .long 0x80000000 + +.L_24bit: + .long 0x00800000 + +.L_mask_sign: + .long 0x7FFFFFFF + +ENDFUNC (GLOBAL (fixunssfsi)) Index: gcc/config/sh/IEEE-754/SF/floatsisf.s =================================================================== RCS file: floatsisf.s diff -N floatsisf.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvs1DAYz2 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,174 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Conversion of integer to floating point + +! Author: Rakesh Kumar + +! Argument: r4 +! Result: r0 + +! r4 is referred as op1 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (floatsisf) + FUNC (GLOBAL (floatsisf)) + +GLOBAL (floatsisf): + mov.l .L_sign,r2 + mov #23,r6 + + ! Check for zero + tst r4,r4 + mov.l .L_24_bits,r7 + + ! Extract sign + and r4,r2 + bt .L_ret + + ! Negative ??? + mov.l .L_imp_bit,r5 + cmp/pl r4 + + not r7,r3 + bf .L_neg + + ! Decide the direction for shifting + cmp/gt r7,r4 + mov r4,r0 + + and r5,r0 + bt .L_shr_0 + + ! Number may already be in normalized form + cmp/eq #0,r0 + bf .L_pack + +! Shift the bits to the left. Adjust the exponent +.L_shl: + shll r4 + mov r4,r0 + + and r5,r0 + cmp/eq #0,r0 + + bt/s .L_shl + add #-1,r6 + +! Pack the value in floating point format. +! r6 has unbiased exponent, r4 has mantissa, r2 has sign +.L_pack: + mov #23,r3 + not r5,r5 + + mov r2,r0 + add #127,r6 + + and r5,r4 +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLL23 (r6) +#else + shld r3,r6 +#endif + + or r6,r0 + rts + or r4,r0 + +! Negate the number +.L_neg: + ! Take care for -2147483648. + mov r4,r0 + shll r0 + + cmp/eq #0,r0 + bt/s .L_ret_min + neg r4,r4 + + cmp/gt r7,r4 + bt .L_shr_0 + + mov r4,r0 + and r5,r0 + + cmp/eq #0,r0 + bf .L_pack + bt .L_shl + +.L_shr_0: + mov #0,r1 + +! Shift right the number with rounding +.L_shr: + shlr r4 + movt r7 + + tst r7,r7 + + ! Count number of ON bits shifted + bt .L_shr_1 + add #1,r1 + +.L_shr_1: + mov r4,r0 + add #1,r6 + + and r3,r0 + cmp/eq #0,r0 + + ! Add MSB of shifted bits + bf .L_shr + add r7,r4 + + tst r7,r7 + bt .L_pack + +.L_pack1: + mov #1,r0 + cmp/eq r1,r0 + + bt .L_rnd + mov r4,r0 + + ! Rounding may have misplaced MSB. Adjust. + and r3,r0 + cmp/eq #0,r0 + + bf .L_shr + bt .L_pack + +! If only MSB of shifted bits is ON, we are halfway +! between two numbers. Round towards even LSB of +! resultant mantissa. +.L_rnd: + shlr r4 + bra .L_pack + shll r4 + +.L_ret: + rts + mov r4,r0 + +! Return value for -2147483648 +.L_ret_min: + mov.l .L_min_val,r0 + rts + nop + + .align 2 +.L_sign: + .long 0x80000000 + +.L_imp_bit: + .long 0x00800000 + +.L_24_bits: + .long 0x00FFFFFF + +.L_nsign: + .long 0x7FFFFFFF + +.L_min_val: + .long 0xCF000000 + +ENDFUNC (GLOBAL (floatsisf)) Index: gcc/config/sh/IEEE-754/SF/floatunssisf.s =================================================================== RCS file: floatunssisf.s diff -N floatunssisf.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsvv8jo1 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,111 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Conversion of unsigned integer to floating point + +! Author: Rakesh Kumar + +! Argument: r4 +! Result: r0 + +! r4 is referred as op1 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (floatunssisf) + FUNC (GLOBAL (floatunssisf)) + +GLOBAL (floatunssisf): + tst r4,r4 + mov #23,r6 + + mov.l .L_set_24_bits,r7 + bt/s .L_return + not r7,r3 + + ! Decide the direction for shifting + mov.l .L_set_24_bit,r5 + cmp/hi r7,r4 + + not r5,r2 + bt/s .L_shift_right + mov #0,r7 + + tst r5,r4 + + mov #0,r0 + bf .L_pack_sf + +! Shift the bits to the left. Adjust the exponent +.L_shift_left: + shll r4 + tst r5,r4 + + add #-1,r6 + bt .L_shift_left + +! Pack the value in floating point format. +! r6 has unbiased exponent, r4 has mantissa +.L_pack_sf: + mov #23,r3 + add #127,r6 + + ! Align the exponent + and r2,r4 +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLL23 (r6) +#else + shld r3,r6 +#endif + + or r6,r0 + rts + or r4,r0 + +! Shift right the number with rounding +.L_shift_right: + shlr r4 + rotcr r7 + + tst r4,r3 + add #1,r6 + + bf .L_shift_right + + tst r7,r7 + bt .L_sh_rt_1 + + shll r7 + movt r1 + + add r1,r4 + + tst r7,r7 + bf .L_sh_rt_1 + + ! Halfway between two numbers. + ! Round towards LSB = 0 + shlr r4 + shll r4 + +.L_sh_rt_1: + mov r4,r0 + + ! Rounding may have misplaced MSB. Adjust. + and r3,r0 + cmp/eq #0,r0 + + bf .L_shift_right + bt .L_pack_sf + +.L_return: + rts + mov r4,r0 + + .align 2 +.L_set_24_bit: + .long 0x00800000 + +.L_set_24_bits: + .long 0x00FFFFFF + +ENDFUNC (GLOBAL (floatunssisf)) Index: gcc/config/sh/IEEE-754/SF/mulsf3.s =================================================================== RCS file: mulsf3.s diff -N mulsf3.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsfNXKc0 Wed Aug 4 19:58:21 2004 @@ -0,0 +1,322 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Routine for multiplying two floating point numbers + +! Author: Rakesh Kumar + +! Arguments: r4 and r5 +! Result: r0 + +! The arguments are referred as op1 and op2 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (mulsf3) + FUNC (GLOBAL (mulsf3)) + +GLOBAL (mulsf3): + ! Extract the sign bits + mov.l .L_sign,r3 + mov r3,r0 + + and r4,r3 ! sign bit for op1 + mov.l .L_sign_mask,r6 + + ! Mask out the sign bit from op1 and op2 + and r5,r0 ! sign bit for op2 + mov.l .L_inf,r2 + + and r6,r4 + xor r3,r0 ! Final sign in r0 + + and r6,r5 + tst r4,r4 + + ! Check for zero + mov r5,r7 + bt/s .L_op1_zero ! op1 is zero + mov r4,r6 + + tst r5,r5 + bt .L_op2_zero ! op2 is zero + + ! Extract the exponents + and r2,r6 ! Exponent of op1 + cmp/eq r2,r6 + + and r2,r7 + bt .L_inv_op1 ! op1 is NaN or Inf + + mov.l .L_mant,r3 + cmp/eq r2,r7 + + and r3,r4 ! Mantissa of op1 + bt .L_ret_op2 ! op2 is Nan or Inf + + and r3,r5 ! Mantissa of op2 + + mov #-23,r3 +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLR23 (r6) + SHLR23 (r7) +#else + shld r3,r6 + shld r3,r7 +#endif + ! Check for denormals + mov.l .L_24bit,r3 + tst r6,r6 + + bt .L_norm_op1 ! op1 is denormal + add #-127,r6 ! Unbias op1's exp + + tst r7,r7 + bt .L_norm_op2 ! op2 is denormal + + add #-127,r7 ! Unbias op2's exp + +.L_multiply: + add r6,r7 ! Final exponent in r7 + mov.l .L_24bit,r1 + + ! set 24th bit of mantissas + mov #127,r3 + or r1,r4 + + ! Multiply + or r1,r5 + dmulu.l r4,r5 + + sts macl,r4 + sts mach,r5 + + mov.l .L_16bit,r6 + + ! Check for extra MSB generated + tst r5,r6 + + mov.l .L_255,r1 + bf .L_shift_by_1 ! Adjust the extra MSB + +! Normalize the result with rounding +.L_epil: + ! Bias the exponent + add #127,r7 + cmp/ge r1,r7 + + ! Check exponent overflow and underflow + bt .L_ret_inf + + cmp/pl r7 + bf .L_denorm + +.L_epil_0: + mov #-23,r3 + shll r5 + mov #0,r6 + +! Fit resultant mantissa in 24 bits +! Apply default rounding +.L_loop_epil_0: + tst r3,r3 + bt .L_loop_epil_out + + add #1,r3 + shlr r4 + + bra .L_loop_epil_0 + rotcr r6 + +! Round mantissa +.L_loop_epil_out: + shll8 r5 + or r5,r4 + + mov.l .L_mant,r2 + mov #23,r3 + + ! Check last bit shifted out of result + tst r6,r6 + bt .L_epil_2 + + ! Round + shll r6 + movt r5 + + add r5,r4 + + ! If this is the only ON bit shifted + ! Round towards LSB = 0 + tst r6,r6 + bf .L_epil_2 + + shlr r4 + shll r4 + +.L_epil_2: + ! Rounding may have produced extra MSB. + mov.l .L_25bit,r5 + tst r4,r5 + + bt .L_epil_1 + + add #1,r7 + shlr r4 + +.L_epil_1: +#if defined (__sh1__) || defined (__sh2__) || defined (__SH2E__) + SHLL23 (r7) +#else + shld r3,r7 +#endif + + and r2,r4 + + or r7,r4 + rts + or r4,r0 + +.L_denorm: + mov #0,r3 + +.L_den_1: + shlr r5 + rotcr r4 + + cmp/eq r3,r7 + bt .L_epil_0 + + bra .L_den_1 + add #1,r7 + + +! Normalize the first argument +.L_norm_op1: + shll r4 + tst r3,r4 + + add #-1,r6 + bt .L_norm_op1 + + ! The biasing is by 126 + add #-126,r6 + tst r7,r7 + + bt .L_norm_op2 + + bra .L_multiply + add #-127,r7 + +! Normalize the second argument +.L_norm_op2: + shll r5 + tst r3,r5 + + add #-1,r7 + bt .L_norm_op2 + + bra .L_multiply + add #-126,r7 + +! op2 is zero. Check op1 for exceptional cases +.L_op2_zero: + mov.l .L_inf,r2 + and r2,r6 + + cmp/eq r2,r6 + bf/s .L_ret_op2 ! op1 is deterministic + mov #1,r1 + + ! Return NaN + or r2,r0 + rts + or r1,r0 + +! Adjust the extra MSB +.L_shift_by_1: + shlr r5 + rotcr r4 + + add #1,r7 ! Show the shift in exponent + + cmp/gt r3,r7 + bf .L_epil + + ! The resultant exponent is invalid + mov.l .L_inf,r1 + rts + or r1,r0 + +.L_ret_op1: + rts + or r4,r0 + +! op1 is zero. Check op2 for exceptional cases +.L_op1_zero: + mov.l .L_inf,r2 + and r2,r7 + + cmp/eq r2,r7 + bf/s .L_ret_op1 ! op2 is deterministic + mov #1,r1 + + ! Return NaN + or r2,r0 + rts + or r1,r0 + +.L_inv_op1: + mov.l .L_mant,r3 + mov r4,r6 + + and r3,r6 + tst r6,r6 + + bf .L_ret_op1 ! op1 is Nan + ! op1 is not Nan. It is Inf + + cmp/eq r2,r7 + bf .L_ret_op1 ! op2 has a valid exponent + +! op2 has a invalid exponent. It could be Inf, -Inf, Nan. +! It doesn't make any difference. +.L_ret_op2: + rts + or r5,r0 + +.L_ret_inf: + rts + or r2,r0 + +.L_ret_zero: + mov #0,r2 + rts + or r2,r0 + + + .align 2 +.L_mant: + .long 0x007FFFFF + +.L_inf: + .long 0x7F800000 + +.L_24bit: + .long 0x00800000 + +.L_25bit: + .long 0x01000000 + +.L_16bit: + .long 0x00008000 + +.L_sign: + .long 0x80000000 + +.L_sign_mask: + .long 0x7FFFFFFF + +.L_255: + .long 0x000000FF + +ENDFUNC (GLOBAL (mulsf3)) Index: gcc/config/sh/IEEE-754/SF/negsf2.s =================================================================== RCS file: negsf2.s diff -N negsf2.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsptJm1Y Wed Aug 4 19:58:21 2004 @@ -0,0 +1,28 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Negating a single precision float value + +! Author: Rakesh Kumar + +! Argument: r4 +! Result: r0 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + .text + .align 5 + .global GLOBAL (negsf2) + FUNC (GLOBAL (negsf2)) + +! The floating point number is passed into register R4. +! a = exclusive or (a,0x80000000) +! return a + +GLOBAL (negsf2): + mov.l .L_sign, r0 + rts + xor r4,r0 + +.align 2 +.L_sign: + .long 0x80000000 + +ENDFUNC (GLOBAL (negsf2)) Index: gcc/config/sh/IEEE-754/SF/unordsf2.s =================================================================== RCS file: unordsf2.s diff -N unordsf2.s --- /dev/null Sat Mar 24 10:07:44 2001 +++ /tmp/cvsNnI1PX Wed Aug 4 19:58:21 2004 @@ -0,0 +1,52 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! Unordered comparison of two numbers + +! Author: Rakesh Kumar + +! Arguments: r4 and r5 +! Result: r0 +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + .text + .align 5 + .global GLOBAL (unordsf2) + FUNC (GLOBAL (unordsf2)) + +GLOBAL (unordsf2): +#if defined (__SH2E__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) + fmov.s fr4,@-r15 + fmov.s fr5,@-r15 + mov.l @r15+,r4 + mov.l @r15+,r5 +#endif + ! Mask out the sign bit from two arguments + mov.l .L_pinf, r2 + shll r4 + + mov.l .L_mask_sign, r1 + shlr r4 + + ! Compare with infinity + cmp/gt r2, r4 + and r1, r5 + + bt/s .L_return + mov #1,r0 + + cmp/gt r2, r5 + bt .L_return + + rts + mov #0, r0 + +.L_return: + rts + nop + + .align 2 +.L_pinf: + .long 0x7F800000 + +.L_mask_sign: + .long 0x7FFFFFFF + +ENDFUNC (GLOBAL (unordsf2))