This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch] Move the popcnt intrinsics to a separate file


On Fri, Dec 4, 2009 at 14:41, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Dec 04, 2009 at 11:47:33AM -0600, Sebastian Pop wrote:
>> In my opinion the _mm_popcnt_u* intrinsics should be removed from the
>> smmintrin.h, as they do not deal with XMM registers, and they should
>> be put separately in a popcount.h file, as I originally proposed, and
>> then include that file in abmintrin.h and somewhere else than smmintrin.h
>> for Intel processors.
>
> BTW, isn't popcount.h too generic header name (and likely to clash with
> other package's headers)? ÂWouldn't popcntintrin.h or something similar be
> better?

Ok.

In the manuals from both Intel and AMD, an identification bit is set
by cpuid on processors containing the popcnt insn.  I would like to
propose the attached fixes that define __POPCNT__, include the
popcntintrin.h both in smmintrin.h and x86intrin.h, and add the cpuid
checks for ABM and LWP.

The patch set passes make -k check RUNTESTFLAGS=i386.exp
Ok for trunk after it passes bootstrap and regtest?

Thanks,
Sebastian
From 081f4ed0e17467358701fb174232413d1000c4bb Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Sat, 5 Dec 2009 19:30:57 -0600
Subject: [PATCH] Fix _mm_popcnt* intrinsics.

	* config.gcc (i[34567]86-*-*, x86_64-*-*): Add popcntintrin.h.
	* config/i386/abmintrin.h (_mm_popcnt_u32, _mm_popcnt_u64): Moved...
	* config/i386/i386-c.c (__POPCNT__): Defined.
	* config/i386/popcntintrin.h: ...here.  New file.
	* config/i386/smmintrin.h (_mm_popcnt_u32, _mm_popcnt_u64): Moved...
	Include popcntintrin.h.
	* config/i386/x86intrin.h: Include popcntintrin.h when __POPCNT__
	is defined.
---
 gcc/config.gcc                 |    4 +-
 gcc/config/i386/abmintrin.h    |   15 -------------
 gcc/config/i386/i386-c.c       |    2 +
 gcc/config/i386/popcntintrin.h |   46 ++++++++++++++++++++++++++++++++++++++++
 gcc/config/i386/smmintrin.h    |   15 +-----------
 gcc/config/i386/x86intrin.h    |    4 +++
 6 files changed, 56 insertions(+), 30 deletions(-)
 create mode 100644 gcc/config/i386/popcntintrin.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index f7b23aa..e6e419a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -288,7 +288,7 @@ i[34567]86-*-*)
 		       pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
-		       ia32intrin.h cross-stdarg.h lwpintrin.h"
+		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -298,7 +298,7 @@ x86_64-*-*)
 		       pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
-		       ia32intrin.h cross-stdarg.h lwpintrin.h"
+		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
diff --git a/gcc/config/i386/abmintrin.h b/gcc/config/i386/abmintrin.h
index b85bdb7..9d87f57 100644
--- a/gcc/config/i386/abmintrin.h
+++ b/gcc/config/i386/abmintrin.h
@@ -52,19 +52,4 @@ __lzcnt64 (unsigned long __X)
 }
 #endif
 
-/* Calculate a number of bits set to 1.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u32 (unsigned int __X)
-{
-  return __builtin_popcount (__X);
-}
-
-#ifdef __x86_64__
-extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u64 (unsigned long long __X)
-{
-  return __builtin_popcountll (__X);
-}
-#endif
-
 #endif /* _ABMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index cba9ceb..35eab49 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -238,6 +238,8 @@ ix86_target_macros_internal (int isa_flag,
     def_or_undef (parse_in, "__LWP__");
   if (isa_flag & OPTION_MASK_ISA_ABM)
     def_or_undef (parse_in, "__ABM__");
+  if (isa_flag & OPTION_MASK_ISA_POPCNT)
+    def_or_undef (parse_in, "__POPCNT__");
   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
     def_or_undef (parse_in, "__SSE_MATH__");
   if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
new file mode 100644
index 0000000..8d4d657
--- /dev/null
+++ b/gcc/config/i386/popcntintrin.h
@@ -0,0 +1,46 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef __POPCNT__
+# error "POPCNT instruction set not enabled"
+#endif /* __POPCNT__ */
+
+#ifndef _POPCNTINTRIN_H_INCLUDED
+#define _POPCNTINTRIN_H_INCLUDED
+
+/* Calculate a number of bits set to 1.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u32 (unsigned int __X)
+{
+  return __builtin_popcount (__X);
+}
+
+#ifdef __x86_64__
+extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u64 (unsigned long long __X)
+{
+  return __builtin_popcountll (__X);
+}
+#endif
+
+#endif /* _POPCNTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index 8fbb35c..170fae5 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -793,19 +793,8 @@ _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
 }
 
-/* Calculate a number of bits set to 1.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u32 (unsigned int __X)
-{
-  return __builtin_popcount (__X);
-}
-
-#ifdef __x86_64__
-extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u64 (unsigned long long __X)
-{
-  return __builtin_popcountll (__X);
-}
+#ifdef __POPCNT__
+#include <popcntintrin.h>
 #endif
 
 /* Accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 63252bf..29d44dc 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -81,4 +81,8 @@
 #include <abmintrin.h>
 #endif
 
+#ifdef __POPCNT__
+#include <popcntintrin.h>
+#endif
+
 #endif /* _X86INTRIN_H_INCLUDED */
-- 
1.6.0.4

From 1854f41b37c8eca1d10c016bdb2e8afa94f9ac73 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Sat, 5 Dec 2009 20:06:33 -0600
Subject: [PATCH] Fix ABM.

2009-12-04  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (bdesc_special_args): Move __builtin_clzs from
	this array ...
	(bdesc_args): ... here.
	* config.gcc (i[34567]86-*-*): Include abmintrin.h.
	(x86_64-*-*): Likewise.

	* gcc.target/i386/sse-12.c: Add -mabm to dg-options, mention
	abmintrin.h is also tested.
	* gcc.target/i386/sse-13.c: Likewise.
---
 gcc/config.gcc                         |    6 ++++--
 gcc/config/i386/i386.c                 |    4 ++--
 gcc/testsuite/gcc.target/i386/sse-12.c |    7 ++++---
 gcc/testsuite/gcc.target/i386/sse-13.c |   11 ++++++-----
 4 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index e6e419a..c9620cc 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -288,7 +288,8 @@ i[34567]86-*-*)
 		       pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
-		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h"
+		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
+		       abmintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -298,7 +299,8 @@ x86_64-*-*)
 		       pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
-		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h"
+		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
+		       abmintrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7cafdf6..0e58a17 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21547,8 +21547,6 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3,   "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS64,  UNKNOWN,     (int) UCHAR_FTYPE_UINT_UINT_UINT },
   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3,   "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64,  UNKNOWN,     (int) UCHAR_FTYPE_UINT64_UINT_UINT },
 
-  { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
-
 };
 
 /* Builtins with variable number of arguments.  */
@@ -22173,6 +22171,8 @@ static const struct builtin_description bdesc_args[] =
 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
+
+  { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
 };
 
 /* FMA4 and XOP.  */
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index d03c41b..4a314e8 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -1,7 +1,8 @@
-/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h and mm_malloc.h are
-   usable with -O -std=c89 -pedantic-errors.  */
+/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h,
+   abmintrin.h and mm_malloc.h are usable with -O -std=c89
+   -pedantic-errors.  */
 /* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mabm" } */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 2ef63d5..546a99f 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,12 +1,13 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -maes -mpclmul" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mabm" } */
 
 #include <mm_malloc.h>
 
-/* Test that the intrinsics compile with optimization.  All of them are
-   defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h and mm3dnow.h
-   that reference the proper builtin functions.  Defining away "extern" and
-   "__inline" results in all of them being compiled as proper functions.  */
+/* Test that the intrinsics compile with optimization.  All of them
+   are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
+   xopintrin.h, abmintrin.h and mm3dnow.h that reference the proper
+   builtin functions.  Defining away "extern" and "__inline" results
+   in all of them being compiled as proper functions.  */
 
 #define extern
 #define __inline
-- 
1.6.0.4

From 0fa24ef4e1abc41aa5430ac7b12efd72c26af9df Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Sat, 5 Dec 2009 19:18:47 -0600
Subject: [PATCH] Check cpuid ABM bit.

	* config/i386/cpuid.h (bit_ABM): New.
	(host_detect_local_cpu): Add -mabm to the options when bit_ABM is set.
---
 gcc/config/i386/cpuid.h       |    5 +++--
 gcc/config/i386/driver-i386.c |    5 ++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 21f0e31..3228414 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -46,11 +46,12 @@
 
 /* Extended Features */
 /* %ecx */
-#define bit_FMA4        (1 << 16) 
 #define bit_LAHF_LM	(1 << 0)
-#define bit_LWP 	(1 << 15)
+#define bit_ABM		(1 << 5)
 #define bit_SSE4a	(1 << 6)
 #define bit_XOP         (1 << 11)
+#define bit_LWP 	(1 << 15)
+#define bit_FMA4        (1 << 16)
 
 /* %edx */
 #define bit_LM		(1 << 29)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index df0689d..d3088f3 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -383,7 +383,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
   unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
-  unsigned int has_pclmul = 0;
+  unsigned int has_pclmul = 0, has_abm = 0;
 
   bool arch;
 
@@ -444,6 +444,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 
       has_lahf_lm = ecx & bit_LAHF_LM;
       has_sse4a = ecx & bit_SSE4a;
+      has_abm = ecx & bit_ABM;
 
       has_longmode = edx & bit_LM;
       has_3dnowp = edx & bit_3DNOWP;
@@ -622,6 +623,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	options = concat (options, " -mpclmul", NULL);
       if (has_popcnt)
 	options = concat (options, " -mpopcnt", NULL);
+      if (has_abm)
+	options = concat (options, " -mabm", NULL);
 
       if (has_avx)
 	options = concat (options, " -mavx", NULL);
-- 
1.6.0.4

From 69ea94ac7ff0ec58a44b5ee184494f0a8a2edd7f Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Sat, 5 Dec 2009 19:20:55 -0600
Subject: [PATCH] Check cpuid LWP bit.

	* config/i386/cpuid.h (host_detect_local_cpu): Add -mlwp to the
	options when bit_LWP is set.
---
 gcc/config/i386/driver-i386.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index d3088f3..05ba01e 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -383,7 +383,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
   unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
-  unsigned int has_pclmul = 0, has_abm = 0;
+  unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
 
   bool arch;
 
@@ -445,6 +445,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_lahf_lm = ecx & bit_LAHF_LM;
       has_sse4a = ecx & bit_SSE4a;
       has_abm = ecx & bit_ABM;
+      has_lwp = ecx & bit_LWP;
 
       has_longmode = edx & bit_LM;
       has_3dnowp = edx & bit_3DNOWP;
@@ -625,6 +626,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	options = concat (options, " -mpopcnt", NULL);
       if (has_abm)
 	options = concat (options, " -mabm", NULL);
+      if (has_lwp)
+	options = concat (options, " -mlwp", NULL);
 
       if (has_avx)
 	options = concat (options, " -mavx", NULL);
-- 
1.6.0.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]