This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
PATCH: Enable FTZ/DAZ for SSE via fast math
On Tue, Aug 09, 2005 at 02:58:51PM -0700, Richard Henderson wrote:
> On Tue, Aug 09, 2005 at 02:30:46PM -0700, H. J. Lu wrote:
> > There is a minor problem. How can I add crtfastmath.o for SSE targets
> > only?
>
> You don't. You either add code to detect sse, or you make the
> spec depend on -mfpmath=sse.
>
Here is the patch to enable FTZ/DAZ for SSE via fast math. There are
no regressions on Linux/x86_64 nor Linux/ia32. The performance of one
FP benchmark on EM64T is more than doubled with -ffast-math.
H.J.
---
2005-08-09 H.J. Lu <hongjiu.lu@intel.com>
* config.gcc (i[34567]86-*-linux*): Add i386/t-crtfm to tm-file.
(x86_64-*-linux*): Likewise.
* config/i386/crtfastmath.c: New file.
* config/i386/t-crtfm: Likewise.
* config/i386/linux.h (ENDFILE_SPEC): New.
* config/i386/linux64.h (ENDFILE_SPEC): Likewise.
* config/i386/t-linux64 (EXTRA_MULTILIB_PARTS): Add
crtfastmath.o.
--- gcc/config.gcc.sse 2005-08-06 07:22:06.000000000 -0700
+++ gcc/config.gcc 2005-08-09 15:09:16.313927259 -0700
@@ -1001,7 +1001,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfree
i[34567]86-*-knetbsd*-gnu) tm_file="${tm_file} knetbsd-gnu.h i386/knetbsd-gnu.h" ;;
i[34567]86-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h i386/kfreebsd-gnu.h" ;;
esac
- tmake_file="${tmake_file} i386/t-crtstuff"
+ tmake_file="${tmake_file} i386/t-crtstuff i386/t-crtfm"
;;
x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu)
tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h svr4.h linux.h \
@@ -1010,7 +1010,7 @@ x86_64-*-linux* | x86_64-*-kfreebsd*-gnu
x86_64-*-kfreebsd*-gnu) tm_file="${tm_file} kfreebsd-gnu.h" ;;
x86_64-*-knetbsd*-gnu) tm_file="${tm_file} knetbsd-gnu.h" ;;
esac
- tmake_file="${tmake_file} i386/t-linux64"
+ tmake_file="${tmake_file} i386/t-linux64 i386/t-crtfm"
;;
i[34567]86-*-gnu*)
;;
--- gcc/config/i386/crtfastmath.c.sse 2005-08-09 15:09:39.634095529 -0700
+++ gcc/config/i386/crtfastmath.c 2005-08-09 15:29:01.796141023 -0700
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2005 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file. (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * As a special exception, if you link this library with files
+ * compiled with GCC to produce an executable, this does not cause
+ * the resulting executable to be covered by the GNU General Public License.
+ * This exception does not however invalidate any other reasons why
+ * the executable file might be covered by the GNU General Public License.
+ */
+
+#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */
+#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */
+
+static void __attribute__((constructor))
+set_fast_math (void)
+{
+ /* Check if SSE is available. */
+ unsigned int eax, ebx, ecx, edx;
+ asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
+ : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
+ : "0" (1));
+
+ if (edx & (1 << 25))
+ {
+ unsigned int mxcsr = __builtin_ia32_stmxcsr ();
+ mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
+ __builtin_ia32_ldmxcsr (mxcsr);
+ }
+}
--- gcc/config/i386/linux.h.sse 2004-11-28 17:04:42.000000000 -0800
+++ gcc/config/i386/linux.h 2005-08-09 14:22:44.554244342 -0700
@@ -121,6 +121,12 @@ Boston, MA 02111-1307, USA. */
%{!dynamic-linker:-dynamic-linker %(dynamic_linker)}} \
%{static:-static}}}"
+/* Similar to standard Linux, but adding -ffast-math support. */
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
/* A C statement (sans semicolon) to output to the stdio stream
FILE the assembler definition of uninitialized global DECL named
NAME whose size is SIZE bytes and alignment is ALIGN bytes.
--- gcc/config/i386/linux64.h.sse 2004-11-28 17:04:42.000000000 -0800
+++ gcc/config/i386/linux64.h 2005-08-09 15:46:50.138601985 -0700
@@ -64,6 +64,12 @@ Boston, MA 02111-1307, USA. */
%{!m32:%{!dynamic-linker:-dynamic-linker /lib64/ld-linux-x86-64.so.2}}} \
%{static:-static}}"
+/* Similar to standard Linux, but adding -ffast-math support. */
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
#define MULTILIB_DEFAULTS { "m64" }
#undef NEED_INDICATE_EXEC_STACK
--- gcc/config/i386/t-crtfm.sse 2005-08-09 15:09:33.236146774 -0700
+++ gcc/config/i386/t-crtfm 2005-08-09 15:53:19.144246878 -0700
@@ -0,0 +1,6 @@
+EXTRA_PARTS += crtfastmath.o
+
+$(T)crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -msse -c \
+ $(srcdir)/config/i386/crtfastmath.c \
+ -o $(T)crtfastmath$(objext)
--- gcc/config/i386/t-linux64.sse 2003-03-03 12:03:59.000000000 -0800
+++ gcc/config/i386/t-linux64 2005-08-09 15:36:22.796680353 -0700
@@ -11,7 +11,8 @@ MULTILIB_OSDIRNAMES = ../lib64 ../lib
LIBGCC = stmp-multilib
INSTALL_LIBGCC = install-multilib
-EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o \
+ crtbeginT.o crtfastmath.o
# The pushl in CTOR initialization interferes with frame pointer elimination.
# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,