This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, i386]: Implement -mcmpxchg16b and -msahf options


Jan Hubicka wrote:
+@item -mcmpxchg16b
+@opindex -mcmpxchg16b
+This option will enable GCC to use CMPXCHG16B instruction in generated code.

Perhaps little extra information can be put here. Average user looking for flags to speed up his application will have no idea what CMPXCHG16B is used for and thus we can save his time spent by looking for docs or unnecesary benchmarking if his 3d FPS now runs cooler.

Jan, thanks for pointing me to Wikipedia reference. I have added the text from wikipedia entry to both items, with a crossreference to Atomic Builtins.


BTW: As Jakub pointed out, I agree that -mcx16 is a better name for -mcmpxchg16b option.

A test was also added to check -mcx16 flag.

-/* true if cmpxchg16b is supported. */
-int x86_cmpxchg16b;
-
-/* true if sahf is supported. Early Intel CPUs with Intel 64
- lacked LAHF and SAHF instructions supported by AMD64 until
- introduction of Pentium 4 G1 step in December 2005. */
-int x86_sahf;
-

Are there any, except for historic, reasons why those are not declared
bool?
Actually, there variables are now generated directly from i386.opt "Var" statement. Please note, that we already ran out of target flags, so this is now the only choice for new compile flags.

I'll commit following patch revision to SVN mainline:

2007-03-13 Uros Bizjak <ubizjak@gmail.com>

       * config/i386/i386.opt (mcx16, msahf): New options.
       * config/i386/i386.c (x86_cmpxchg16b, x86_sahf): Remove.
       (ix86_tune_features) [X86_TUNE_USE_SAHF]: Enable for m_GENERIC.

       * config/i386/driver-i386.c (bit_LAHF_LM): New define.
       (host_detect_local_cpu): Detect cx16 and lahf_lm cpuid bits.
       Output -mcx16 and -msahf options when corresponding bit is set.

       * doc/invoke.texi (i386 and x86-64 Options): Document -mcx16
       and -msahf options.

testsuite/ChangeLog:

* testsuite/gcc.target/i386/cmpxchg16b-1.c: New test.

Uros.

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 122881)
+++ doc/invoke.texi	(working copy)
@@ -543,7 +543,7 @@
 -masm=@var{dialect}  -mno-fancy-math-387 @gol
 -mno-fp-ret-in-387  -msoft-float  -msvr3-shlib @gol
 -mno-wide-multiply  -mrtd  -malign-double @gol
--mpreferred-stack-boundary=@var{num} @gol
+-mpreferred-stack-boundary=@var{num} -mcx16 -msahf @gol
 -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
 -mthreads  -mno-align-stringops  -minline-all-stringops @gol
 -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
@@ -10081,6 +10081,22 @@
 the file containing the CPU detection code should be compiled without
 these options.
 
+@item -mcx16
+@opindex -mcx16
+This option will enable GCC to use CMPXCHG16B instruction in generated code.
+CMPXCHG16B allows for atomic operations on 128-bit double quadword (or oword)
+data types.  This is useful for high resolution counters that could be updated
+by multiple processors (or cores).  This instruction is generated as part of
+atomic built-in functions: see @ref{Atomic Builtins} for details.
+
+@item -msahf
+@opindex -msahf
+This option will enable GCC to use SAHF instruction in generated code.  Early
+Intel CPUs with Intel 64 lacked LAHF and SAHF instructions supported by AMD64
+until introduction of Pentium 4 G1 step in December 2005.  LAHF and SAHF are
+load and store instructions, respectively, for certain status flags.  These
+instructions are used for virtualization and floating-point condition handling.
+
 @item -mpush-args
 @itemx -mno-push-args
 @opindex mpush-args
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 122881)
+++ ChangeLog	(working copy)
@@ -1,3 +1,16 @@
+2007-03-13  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/i386/i386.opt (mcx16, msahf): New options.
+	* config/i386/i386.c (x86_cmpxchg16b, x86_sahf): Remove.
+	(ix86_tune_features) [X86_TUNE_USE_SAHF]: Enable for m_GENERIC.
+
+	* config/i386/driver-i386.c (bit_LAHF_LM): New define.
+	(host_detect_local_cpu): Detect cx16 and lahf_lm cpuid bits.
+	Output -mcx16 and -msahf options when corresponding bit is set.
+
+	* doc/invoke.texi (i386 and x86-64 Options): Document -mcx16
+	and -msahf options.
+
 2007-03-13  Alexandre Oliva  <aoliva@redhat.com>
 
 	* configure.ac: Test for assembler tolerance to # 0 "".
Index: testsuite/gcc.target/i386/cmpxchg16b-1.c
===================================================================
--- testsuite/gcc.target/i386/cmpxchg16b-1.c	(revision 0)
+++ testsuite/gcc.target/i386/cmpxchg16b-1.c	(revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile { target x86_64-*-* } } */
+/* { dg-options "-O2 -mcx16" } */
+
+typedef int TItype __attribute__ ((mode (TI)));
+
+TItype m_128;
+
+void test(TItype x_128)
+{
+  m_128 = __sync_val_compare_and_swap (&m_128, x_128, m_128);
+}
+
+/* { dg-final { scan-assembler "cmpxchg16b" } } */
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 122881)
+++ config/i386/i386.h	(working copy)
@@ -322,17 +322,14 @@
 #define TARGET_XADD		ix86_arch_features[X86_ARCH_XADD]
 #define TARGET_BSWAP		ix86_arch_features[X86_ARCH_BSWAP]
 
+#define TARGET_CMPXCHG16B	x86_cmpxchg16b
+#define TARGET_SAHF		x86_sahf
+
 #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
 
 extern int x86_prefetch_sse;
 #define TARGET_PREFETCH_SSE	x86_prefetch_sse
 
-extern int x86_cmpxchg16b;
-#define TARGET_CMPXCHG16B	x86_cmpxchg16b
-
-extern int x86_sahf;
-#define TARGET_SAHF		x86_sahf
-
 #define ASSEMBLER_DIALECT	(ix86_asm_dialect)
 
 #define TARGET_SSE_MATH		((ix86_fpmath & FPMATH_SSE) != 0)
Index: config/i386/i386.opt
===================================================================
--- config/i386/i386.opt	(revision 122881)
+++ config/i386/i386.opt	(working copy)
@@ -221,6 +221,14 @@
 namely __builtin_popcount, __builtin_popcountl, __builtin_popcountll and
 __builtin_clz, __builtin_clzl, __builtin_clzll
 
+mcx16
+Target Report RejectNegative Var(x86_cmpxchg16b)
+Support code generation of cmpxchg16b instruction.
+
+msahf
+Target Report RejectNegative Var(x86_sahf)
+Support code generation of sahf instruction in 64bit x86-64 code
+
 msseregparm
 Target RejectNegative Mask(SSEREGPARM)
 Use SSE register passing conventions for SF and DF mode
Index: config/i386/driver-i386.c
===================================================================
--- config/i386/driver-i386.c	(revision 122881)
+++ config/i386/driver-i386.c	(working copy)
@@ -43,6 +43,7 @@
 #define bit_SSE4a (1 << 6)
 #define bit_CMPXCHG16B (1 << 13)
 
+#define bit_LAHF_LM (1 << 0)
 #define bit_3DNOW (1 << 31)
 #define bit_3DNOWP (1 << 30)
 #define bit_LM (1 << 29)
@@ -188,6 +189,7 @@
 {
   const char *cpu = NULL;
   const char *cache = "";
+  const char *options = "";
   enum processor_type processor = PROCESSOR_I386;
   unsigned int eax, ebx, ecx, edx;
   unsigned int max_level;
@@ -195,6 +197,7 @@
   unsigned int ext_level;
   unsigned char has_mmx = 0, has_3dnow = 0, has_3dnowp = 0, has_sse = 0;
   unsigned char has_sse2 = 0, has_sse3 = 0, has_ssse3 = 0, has_cmov = 0;
+  unsigned char has_cmpxchg16b = 0, has_lahf_lm = 0;
   unsigned char has_longmode = 0, has_cmpxchg8b = 0, has_sse4a = 0;
   unsigned char is_amd = 0;
   unsigned int family = 0;
@@ -236,6 +239,7 @@
   has_sse2 = !!(edx & bit_SSE2);
   has_sse3 = !!(ecx & bit_SSE3);
   has_ssse3 = !!(ecx & bit_SSSE3);
+  has_cmpxchg16b = !!(ecx & bit_CMPXCHG16B);
   /* We don't care for extended family.  */
   family = (eax >> 8) & ~(1 << 4);
 
@@ -244,6 +248,7 @@
   if (ext_level >= 0x80000000)
     {
       cpuid (0x80000001, eax, ebx, ecx, edx);
+      has_lahf_lm = !!(ecx & bit_LAHF_LM);
       has_3dnow = !!(edx & bit_3DNOW);
       has_3dnowp = !!(edx & bit_3DNOWP);
       has_longmode = !!(edx & bit_LM);
@@ -416,8 +421,16 @@
       break;
     }
 
+  if (arch)
+    {
+      if (has_cmpxchg16b)
+	options = concat (options, "-mcx16 ", NULL);
+      if (has_lahf_lm)
+	options = concat (options, "-msahf ", NULL);
+    }
+
 done:
-  return concat (cache, "-m", argv[0], "=", cpu, NULL);
+  return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL);
 }
 #else
 /* If we aren't compiling with GCC we just provide a minimal
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 122881)
+++ config/i386/i386.c	(working copy)
@@ -1040,7 +1040,7 @@
   
   /* X86_TUNE_USE_SAHF */
   m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
-  | m_NOCONA | m_CORE2 | m_GENERIC32,
+  | m_NOCONA | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
      partial dependencies.  */
@@ -1438,14 +1438,6 @@
 /* true if sse prefetch instruction is not NOOP.  */
 int x86_prefetch_sse;
 
-/* true if cmpxchg16b is supported.  */
-int x86_cmpxchg16b;
-
-/* true if sahf is supported. Early Intel CPUs with Intel 64
-   lacked LAHF and SAHF instructions supported by AMD64 until
-   introduction of Pentium 4 G1 step in December 2005.  */
-int x86_sahf;
-
 /* ix86_regparm_string as a number */
 static int ix86_regparm;
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]