This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, i386]: Implement -mcmpxchg16b and -msahf options


H. J. Lu wrote:

-march=nocona shouldn't use sahf. However, -machine=native should
check the lahf_lm bit.
Attached patch implements processor specific compile options -mcmpxchg16b and -msahf that instructs the compiler to generate these instructions in asm code. The compiler driver is further enhanced to detect availability of these instructions through cpuid query in order to set appropriate compiler flags for -march=native compilations.

Due to this change, X86_TUNE_USE_SAHF flag can be set also for m_GENERIC64. sahf generation itself is disabled by default, but can be enabled by -msahf - either manually or via -arch=native. When enabled, it should be used as it results in smaller, but equivalently fast code on all 64bit processors. The same logic applies to m_NOCONA (as reported by Jan). sahf generation is disabled by default for march=nocona, but if -march=native detects availability of instruction, it enables it via -msahf.

As far as the implementation of compile option handling is concerned, I would like to point out that we ran out of target mask flags for i386, so "normal" variables have to be used. Also, options are marked as RejectNegative, as they only override default setting [false].


2007-03-08 Uros Bizjak <ubizjak@gmail.com>


       * config/i386/i386.opt (mcmpxchg16b, msahf): New options.
       * config/i386/i386.c (x86_cmpxchg16b, x86_sahf): Remove.
       (ix86_tune_features) [X86_TUNE_USE_SAHF]: Enable for m_GENERIC.

       * config/i386/driver-i386.c (bit_LAHF_LM): New define.
       (host_detect_local_cpu): Detect cmpxchgb16b and lahf_lm cpuid
       bits.  Output -mcmpxchg16b and -msahf options when corresponding
       bit is set.

       * doc/invoke.texi (i386 and x86-64 Options): Document -mcmpxchg16b
       and -msahf options.

Patch was bootstrapped on x86_64 (core2) using "-O2 -march=native" and regression tested for all default languages. OK for mainline?

Uros.


Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 122703)
+++ doc/invoke.texi	(working copy)
@@ -541,7 +541,7 @@
 -masm=@var{dialect}  -mno-fancy-math-387 @gol
 -mno-fp-ret-in-387  -msoft-float  -msvr3-shlib @gol
 -mno-wide-multiply  -mrtd  -malign-double @gol
--mpreferred-stack-boundary=@var{num} @gol
+-mpreferred-stack-boundary=@var{num} -mcmpxchg16b -msahf @gol
 -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
 -mthreads  -mno-align-stringops  -minline-all-stringops @gol
 -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
@@ -10026,6 +10026,16 @@
 the file containing the CPU detection code should be compiled without
 these options.
 
+@item -mcmpxchg16b
+@opindex -mcmpxchg16b
+This option will enable GCC to use CMPXCHG16B instruction in generated code.
+
+@item -msahf
+@opindex -msahf
+This option will enable GCC to use SAHF instruction in generated code.  Early
+Intel CPUs with Intel 64 lacked LAHF and SAHF instructions supported by AMD64
+until introduction of Pentium 4 G1 step in December 2005.
+
 @item -mpush-args
 @itemx -mno-push-args
 @opindex mpush-args
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 122705)
+++ config/i386/i386.h	(working copy)
@@ -322,17 +322,14 @@
 #define TARGET_XADD		ix86_arch_features[X86_ARCH_XADD]
 #define TARGET_BSWAP		ix86_arch_features[X86_ARCH_BSWAP]
 
+#define TARGET_CMPXCHG16B	x86_cmpxchg16b
+#define TARGET_SAHF		x86_sahf
+
 #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
 
 extern int x86_prefetch_sse;
 #define TARGET_PREFETCH_SSE	x86_prefetch_sse
 
-extern int x86_cmpxchg16b;
-#define TARGET_CMPXCHG16B	x86_cmpxchg16b
-
-extern int x86_sahf;
-#define TARGET_SAHF		x86_sahf
-
 #define ASSEMBLER_DIALECT	(ix86_asm_dialect)
 
 #define TARGET_SSE_MATH		((ix86_fpmath & FPMATH_SSE) != 0)
Index: config/i386/i386.opt
===================================================================
--- config/i386/i386.opt	(revision 122703)
+++ config/i386/i386.opt	(working copy)
@@ -221,6 +221,14 @@
 namely __builtin_popcount, __builtin_popcountl, __builtin_popcountll and
 __builtin_clz, __builtin_clzl, __builtin_clzll
 
+mcmpxchg16b
+Target Report RejectNegative Var(x86_cmpxchg16b)
+Support code generation of cmpxchg16b instruction.
+
+msahf
+Target Report RejectNegative Var(x86_sahf)
+Support code generation of sahf instruction in 64bit x86-64 code
+
 msseregparm
 Target RejectNegative Mask(SSEREGPARM)
 Use SSE register passing conventions for SF and DF mode
Index: config/i386/driver-i386.c
===================================================================
--- config/i386/driver-i386.c	(revision 122703)
+++ config/i386/driver-i386.c	(working copy)
@@ -43,6 +43,7 @@
 #define bit_SSE4a (1 << 6)
 #define bit_CMPXCHG16B (1 << 13)
 
+#define bit_LAHF_LM (1 << 0)
 #define bit_3DNOW (1 << 31)
 #define bit_3DNOWP (1 << 30)
 #define bit_LM (1 << 29)
@@ -188,6 +189,7 @@
 {
   const char *cpu = NULL;
   const char *cache = "";
+  const char *options = "";
   enum processor_type processor = PROCESSOR_I386;
   unsigned int eax, ebx, ecx, edx;
   unsigned int max_level;
@@ -195,6 +197,7 @@
   unsigned int ext_level;
   unsigned char has_mmx = 0, has_3dnow = 0, has_3dnowp = 0, has_sse = 0;
   unsigned char has_sse2 = 0, has_sse3 = 0, has_ssse3 = 0, has_cmov = 0;
+  unsigned char has_cmpxchg16b = 0, has_lahf_lm = 0;
   unsigned char has_longmode = 0, has_cmpxchg8b = 0, has_sse4a = 0;
   unsigned char is_amd = 0;
   unsigned int family = 0;
@@ -236,6 +239,7 @@
   has_sse2 = !!(edx & bit_SSE2);
   has_sse3 = !!(ecx & bit_SSE3);
   has_ssse3 = !!(ecx & bit_SSSE3);
+  has_cmpxchg16b = !!(ecx & bit_CMPXCHG16B);
   /* We don't care for extended family.  */
   family = (eax >> 8) & ~(1 << 4);
 
@@ -244,6 +248,7 @@
   if (ext_level >= 0x80000000)
     {
       cpuid (0x80000001, eax, ebx, ecx, edx);
+      has_lahf_lm = !!(ecx & bit_LAHF_LM);
       has_3dnow = !!(edx & bit_3DNOW);
       has_3dnowp = !!(edx & bit_3DNOWP);
       has_longmode = !!(edx & bit_LM);
@@ -416,8 +421,13 @@
       break;
     }
 
+  if (has_cmpxchg16b)
+    options = concat (options, "-mcmpxchg16b ", NULL);
+  if (has_lahf_lm)
+    options = concat (options, "-msahf ", NULL);
+
 done:
-  return concat (cache, "-m", argv[0], "=", cpu, NULL);
+  return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL);
 }
 #else
 /* If we aren't compiling with GCC we just provide a minimal
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 122705)
+++ config/i386/i386.c	(working copy)
@@ -1040,7 +1040,7 @@
   
   /* X86_TUNE_USE_SAHF */
   m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
-  | m_NOCONA | m_CORE2 | m_GENERIC32,
+  | m_NOCONA | m_CORE2 | m_GENERIC,
 
   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
      partial dependencies.  */
@@ -1438,14 +1438,6 @@
 /* true if sse prefetch instruction is not NOOP.  */
 int x86_prefetch_sse;
 
-/* true if cmpxchg16b is supported.  */
-int x86_cmpxchg16b;
-
-/* true if sahf is supported. Early Intel CPUs with Intel 64
-   lacked LAHF and SAHF instructions supported by AMD64 until
-   introduction of Pentium 4 G1 step in December 2005.  */
-int x86_sahf;
-
 /* ix86_regparm_string as a number */
 static int ix86_regparm;
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]