This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: Enable FTZ/DAZ for SSE via fast math
On Wed, Aug 10, 2005 at 08:13:17AM -0700, H. J. Lu wrote:
> On Wed, Aug 10, 2005 at 10:18:41AM -0400, Jakub Jelinek wrote:
> > On Wed, Aug 10, 2005 at 07:09:04AM -0700, H. J. Lu wrote:
> > > On Tue, Aug 09, 2005 at 02:58:51PM -0700, Richard Henderson wrote:
> > > > On Tue, Aug 09, 2005 at 02:30:46PM -0700, H. J. Lu wrote:
> > > > > There is a minor problem. How can I add crtfastmath.o for SSE targets
> > > > > only?
> > > >
> > > > You don't. You either add code to detect sse, or you make the
> > > > spec depend on -mfpmath=sse.
> > > >
> > >
> > > Here is the patch to enable FTZ/DAZ for SSE via fast math. There are
> > > no regressions on Linux/x86_64 nor Linux/ia32. The performance of one
> > > FP benchmark on EM64T is more than doubled with -ffast-math.
> >
> > Not all i?86 CPUs support cpuid instruction.
> > Please look at
> > gcc/testsuite/gcc.dg/i386-cpuid.h
> > for the ugly details.
> >
>
> Ok. Also all x86_64 supports SSE. There is no need to check that in
> 64bit.
>
>
Here is the updated patch.
H.J.
---
2005-08-10 H.J. Lu <hongjiu.lu@intel.com>
* config.gcc (i[34567]86-*-linux*): Add i386/t-crtfm to tm-file.
(x86_64-*-linux*): Likewise.
* config/i386/crtfastmath.c: New file.
* config/i386/t-crtfm: Likewise.
* config/i386/linux.h (ENDFILE_SPEC): New.
* config/i386/linux64.h (ENDFILE_SPEC): Likewise.
* config/i386/t-linux64 (EXTRA_MULTILIB_PARTS): Add
crtfastmath.o.
--- gcc/config.gcc.sse 2005-08-09 16:18:14.000000000 -0700
+++ gcc/config.gcc 2005-08-09 16:18:15.000000000 -0700
@@ -1006,7 +1006,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfree
i[34567]86-*-knetbsd*-gnu) tm_file="${tm_file} knetbsd-gnu.h i386/knetbsd-gnu.h" ;;
i[34567]86-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h i386/kfreebsd-gnu.h" ;;
esac
- tmake_file="${tmake_file} i386/t-crtstuff"
+ tmake_file="${tmake_file} i386/t-crtstuff i386/t-crtfm"
;;
x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu)
tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h svr4.h linux.h \
@@ -1015,7 +1015,7 @@ x86_64-*-linux* | x86_64-*-kfreebsd*-gnu
x86_64-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h" ;;
x86_64-*-knetbsd*-gnu) tm_file="${tm_file} knetbsd-gnu.h" ;;
esac
- tmake_file="${tmake_file} i386/t-linux64"
+ tmake_file="${tmake_file} i386/t-linux64 i386/t-crtfm"
;;
i[34567]86-*-gnu*)
;;
--- gcc/config/i386/crtfastmath.c.sse 2005-08-09 16:18:15.000000000 -0700
+++ gcc/config/i386/crtfastmath.c 2005-08-10 08:47:24.207266245 -0700
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2005 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file. (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * As a special exception, if you link this library with files
+ * compiled with GCC to produce an executable, this does not cause
+ * the resulting executable to be covered by the GNU General Public License.
+ * This exception does not however invalidate any other reasons why
+ * the executable file might be covered by the GNU General Public License.
+ */
+
+#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */
+#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */
+
+static void __attribute__((constructor))
+set_fast_math (void)
+{
+#ifndef __x86_64__
+ /* SSE is the part of 64bit. Only need to check it for 32bit. */
+ unsigned int eax, ebx, ecx, edx;
+
+ /* See if we can use cpuid. */
+ asm volatile ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
+ "pushl %0; popfl; pushfl; popl %0; popfl"
+ : "=&r" (eax), "=&r" (ebx)
+ : "i" (0x00200000));
+
+ if (((eax ^ ebx) & 0x00200000) == 0)
+ return;
+
+ /* Check the highest input value for eax. */
+ asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+ : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
+ : "0" (0));
+
+ if (eax == 0)
+ return;
+
+ asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+ : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
+ : "0" (1));
+
+ if (edx & (1 << 25))
+#endif
+ {
+ unsigned int mxcsr = __builtin_ia32_stmxcsr ();
+ mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
+ __builtin_ia32_ldmxcsr (mxcsr);
+ }
+}
--- gcc/config/i386/linux.h.sse 2004-11-28 17:04:42.000000000 -0800
+++ gcc/config/i386/linux.h 2005-08-09 16:18:15.000000000 -0700
@@ -121,6 +121,12 @@ Boston, MA 02111-1307, USA. */
%{!dynamic-linker:-dynamic-linker %(dynamic_linker)}} \
%{static:-static}}}"
+/* Similar to standard Linux, but adding -ffast-math support. */
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
/* A C statement (sans semicolon) to output to the stdio stream
FILE the assembler definition of uninitialized global DECL named
NAME whose size is SIZE bytes and alignment is ALIGN bytes.
--- gcc/config/i386/linux64.h.sse 2004-11-28 17:04:42.000000000 -0800
+++ gcc/config/i386/linux64.h 2005-08-09 16:18:15.000000000 -0700
@@ -64,6 +64,12 @@ Boston, MA 02111-1307, USA. */
%{!m32:%{!dynamic-linker:-dynamic-linker /lib64/ld-linux-x86-64.so.2}}} \
%{static:-static}}"
+/* Similar to standard Linux, but adding -ffast-math support. */
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
#define MULTILIB_DEFAULTS { "m64" }
#undef NEED_INDICATE_EXEC_STACK
--- gcc/config/i386/t-crtfm.sse 2005-08-09 16:18:15.000000000 -0700
+++ gcc/config/i386/t-crtfm 2005-08-09 16:18:15.000000000 -0700
@@ -0,0 +1,6 @@
+EXTRA_PARTS += crtfastmath.o
+
+$(T)crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -msse -c \
+ $(srcdir)/config/i386/crtfastmath.c \
+ -o $(T)crtfastmath$(objext)
--- gcc/config/i386/t-linux64.sse 2003-03-03 12:03:59.000000000 -0800
+++ gcc/config/i386/t-linux64 2005-08-09 16:18:15.000000000 -0700
@@ -11,7 +11,8 @@ MULTILIB_OSDIRNAMES = ../lib64 ../lib
LIBGCC = stmp-multilib
INSTALL_LIBGCC = install-multilib
-EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o \
+ crtbeginT.o crtfastmath.o
# The pushl in CTOR initialization interferes with frame pointer elimination.
# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,