This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [patch] Move the popcnt intrinsics to a separate file
On Fri, Dec 4, 2009 at 14:41, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Dec 04, 2009 at 11:47:33AM -0600, Sebastian Pop wrote:
>> In my opinion the _mm_popcnt_u* intrinsics should be removed from the
>> smmintrin.h, as they do not deal with XMM registers, and they should
>> be put separately in a popcount.h file, as I originally proposed, and
>> then include that file in abmintrin.h and somewhere else than smmintrin.h
>> for Intel processors.
>
> BTW, isn't popcount.h too generic header name (and likely to clash with
> other package's headers)? ÂWouldn't popcntintrin.h or something similar be
> better?
Ok.
In the manuals from both Intel and AMD, an identification bit is set
by cpuid on processors containing the popcnt insn. I would like to
propose the attached fixes that define __POPCNT__, include the
popcntintrin.h both in smmintrin.h and x86intrin.h, and add the cpuid
checks for ABM and LWP.
The patch set passes make -k check RUNTESTFLAGS=i386.exp
Ok for trunk after it passes bootstrap and regtest?
Thanks,
Sebastian
From 081f4ed0e17467358701fb174232413d1000c4bb Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Sat, 5 Dec 2009 19:30:57 -0600
Subject: [PATCH] Fix _mm_popcnt* intrinsics.
* config.gcc (i[34567]86-*-*, x86_64-*-*): Add popcntintrin.h.
* config/i386/abmintrin.h (_mm_popcnt_u32, _mm_popcnt_u64): Moved...
* config/i386/i386-c.c (__POPCNT__): Defined.
* config/i386/popcntintrin.h: ...here. New file.
* config/i386/smmintrin.h (_mm_popcnt_u32, _mm_popcnt_u64): Moved...
Include popcntintrin.h.
* config/i386/x86intrin.h: Include popcntintrin.h when __POPCNT__
is defined.
---
gcc/config.gcc | 4 +-
gcc/config/i386/abmintrin.h | 15 -------------
gcc/config/i386/i386-c.c | 2 +
gcc/config/i386/popcntintrin.h | 46 ++++++++++++++++++++++++++++++++++++++++
gcc/config/i386/smmintrin.h | 15 +-----------
gcc/config/i386/x86intrin.h | 4 +++
6 files changed, 56 insertions(+), 30 deletions(-)
create mode 100644 gcc/config/i386/popcntintrin.h
diff --git a/gcc/config.gcc b/gcc/config.gcc
index f7b23aa..e6e419a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -288,7 +288,7 @@ i[34567]86-*-*)
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h xopintrin.h
- ia32intrin.h cross-stdarg.h lwpintrin.h"
+ ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -298,7 +298,7 @@ x86_64-*-*)
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h xopintrin.h
- ia32intrin.h cross-stdarg.h lwpintrin.h"
+ ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
diff --git a/gcc/config/i386/abmintrin.h b/gcc/config/i386/abmintrin.h
index b85bdb7..9d87f57 100644
--- a/gcc/config/i386/abmintrin.h
+++ b/gcc/config/i386/abmintrin.h
@@ -52,19 +52,4 @@ __lzcnt64 (unsigned long __X)
}
#endif
-/* Calculate a number of bits set to 1. */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u32 (unsigned int __X)
-{
- return __builtin_popcount (__X);
-}
-
-#ifdef __x86_64__
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u64 (unsigned long long __X)
-{
- return __builtin_popcountll (__X);
-}
-#endif
-
#endif /* _ABMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index cba9ceb..35eab49 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -238,6 +238,8 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__LWP__");
if (isa_flag & OPTION_MASK_ISA_ABM)
def_or_undef (parse_in, "__ABM__");
+ if (isa_flag & OPTION_MASK_ISA_POPCNT)
+ def_or_undef (parse_in, "__POPCNT__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
def_or_undef (parse_in, "__SSE_MATH__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
new file mode 100644
index 0000000..8d4d657
--- /dev/null
+++ b/gcc/config/i386/popcntintrin.h
@@ -0,0 +1,46 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef __POPCNT__
+# error "POPCNT instruction set not enabled"
+#endif /* __POPCNT__ */
+
+#ifndef _POPCNTINTRIN_H_INCLUDED
+#define _POPCNTINTRIN_H_INCLUDED
+
+/* Calculate a number of bits set to 1. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u32 (unsigned int __X)
+{
+ return __builtin_popcount (__X);
+}
+
+#ifdef __x86_64__
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u64 (unsigned long long __X)
+{
+ return __builtin_popcountll (__X);
+}
+#endif
+
+#endif /* _POPCNTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index 8fbb35c..170fae5 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -793,19 +793,8 @@ _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
}
-/* Calculate a number of bits set to 1. */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u32 (unsigned int __X)
-{
- return __builtin_popcount (__X);
-}
-
-#ifdef __x86_64__
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_popcnt_u64 (unsigned long long __X)
-{
- return __builtin_popcountll (__X);
-}
+#ifdef __POPCNT__
+#include <popcntintrin.h>
#endif
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 63252bf..29d44dc 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -81,4 +81,8 @@
#include <abmintrin.h>
#endif
+#ifdef __POPCNT__
+#include <popcntintrin.h>
+#endif
+
#endif /* _X86INTRIN_H_INCLUDED */
--
1.6.0.4
From 1854f41b37c8eca1d10c016bdb2e8afa94f9ac73 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Sat, 5 Dec 2009 20:06:33 -0600
Subject: [PATCH] Fix ABM.
2009-12-04 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.c (bdesc_special_args): Move __builtin_clzs from
this array ...
(bdesc_args): ... here.
* config.gcc (i[34567]86-*-*): Include abmintrin.h.
(x86_64-*-*): Likewise.
* gcc.target/i386/sse-12.c: Add -mabm to dg-options, mention
abmintrin.h is also tested.
* gcc.target/i386/sse-13.c: Likewise.
---
gcc/config.gcc | 6 ++++--
gcc/config/i386/i386.c | 4 ++--
gcc/testsuite/gcc.target/i386/sse-12.c | 7 ++++---
gcc/testsuite/gcc.target/i386/sse-13.c | 11 ++++++-----
4 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/gcc/config.gcc b/gcc/config.gcc
index e6e419a..c9620cc 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -288,7 +288,8 @@ i[34567]86-*-*)
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h xopintrin.h
- ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h"
+ ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
+ abmintrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -298,7 +299,8 @@ x86_64-*-*)
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h xopintrin.h
- ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h"
+ ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
+ abmintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7cafdf6..0e58a17 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21547,8 +21547,6 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
- { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
-
};
/* Builtins with variable number of arguments. */
@@ -22173,6 +22171,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
};
/* FMA4 and XOP. */
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index d03c41b..4a314e8 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -1,7 +1,8 @@
-/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h and mm_malloc.h are
- usable with -O -std=c89 -pedantic-errors. */
+/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h,
+ abmintrin.h and mm_malloc.h are usable with -O -std=c89
+ -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mabm" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 2ef63d5..546a99f 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,12 +1,13 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -maes -mpclmul" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mabm" } */
#include <mm_malloc.h>
-/* Test that the intrinsics compile with optimization. All of them are
- defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h and mm3dnow.h
- that reference the proper builtin functions. Defining away "extern" and
- "__inline" results in all of them being compiled as proper functions. */
+/* Test that the intrinsics compile with optimization. All of them
+ are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
+ xopintrin.h, abmintrin.h and mm3dnow.h that reference the proper
+ builtin functions. Defining away "extern" and "__inline" results
+ in all of them being compiled as proper functions. */
#define extern
#define __inline
--
1.6.0.4
From 0fa24ef4e1abc41aa5430ac7b12efd72c26af9df Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Sat, 5 Dec 2009 19:18:47 -0600
Subject: [PATCH] Check cpuid ABM bit.
* config/i386/cpuid.h (bit_ABM): New.
(host_detect_local_cpu): Add -mabm to the options when bit_ABM is set.
---
gcc/config/i386/cpuid.h | 5 +++--
gcc/config/i386/driver-i386.c | 5 ++++-
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 21f0e31..3228414 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -46,11 +46,12 @@
/* Extended Features */
/* %ecx */
-#define bit_FMA4 (1 << 16)
#define bit_LAHF_LM (1 << 0)
-#define bit_LWP (1 << 15)
+#define bit_ABM (1 << 5)
#define bit_SSE4a (1 << 6)
#define bit_XOP (1 << 11)
+#define bit_LWP (1 << 15)
+#define bit_FMA4 (1 << 16)
/* %edx */
#define bit_LM (1 << 29)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index df0689d..d3088f3 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -383,7 +383,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
- unsigned int has_pclmul = 0;
+ unsigned int has_pclmul = 0, has_abm = 0;
bool arch;
@@ -444,6 +444,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_lahf_lm = ecx & bit_LAHF_LM;
has_sse4a = ecx & bit_SSE4a;
+ has_abm = ecx & bit_ABM;
has_longmode = edx & bit_LM;
has_3dnowp = edx & bit_3DNOWP;
@@ -622,6 +623,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
options = concat (options, " -mpclmul", NULL);
if (has_popcnt)
options = concat (options, " -mpopcnt", NULL);
+ if (has_abm)
+ options = concat (options, " -mabm", NULL);
if (has_avx)
options = concat (options, " -mavx", NULL);
--
1.6.0.4
From 69ea94ac7ff0ec58a44b5ee184494f0a8a2edd7f Mon Sep 17 00:00:00 2001
From: Sebastian Pop <sebpop@gmail.com>
Date: Sat, 5 Dec 2009 19:20:55 -0600
Subject: [PATCH] Check cpuid LWP bit.
* config/i386/cpuid.h (host_detect_local_cpu): Add -mlwp to the
options when bit_LWP is set.
---
gcc/config/i386/driver-i386.c | 5 ++++-
1 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index d3088f3..05ba01e 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -383,7 +383,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
- unsigned int has_pclmul = 0, has_abm = 0;
+ unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
bool arch;
@@ -445,6 +445,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_lahf_lm = ecx & bit_LAHF_LM;
has_sse4a = ecx & bit_SSE4a;
has_abm = ecx & bit_ABM;
+ has_lwp = ecx & bit_LWP;
has_longmode = edx & bit_LM;
has_3dnowp = edx & bit_3DNOWP;
@@ -625,6 +626,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
options = concat (options, " -mpopcnt", NULL);
if (has_abm)
options = concat (options, " -mabm", NULL);
+ if (has_lwp)
+ options = concat (options, " -mlwp", NULL);
if (has_avx)
options = concat (options, " -mavx", NULL);
--
1.6.0.4