This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, i386] Add new arg values for __builtin_cpu_supports
- From: Ilya Tocar <tocarip dot intel at gmail dot com>
- To: gcc-patches <gcc-patches at gcc dot gnu dot org>
- Cc: Jeff Law <law at redhat dot com>, Jakub Jelinek <jakub at redhat dot com>, enkovich dot gnu at gmail dot com
- Date: Wed, 26 Nov 2014 18:35:57 +0300
- Subject: Re: [PATCH, i386] Add new arg values for __builtin_cpu_supports
- Authentication-results: sourceware.org; auth=none
- References: <20141120163603 dot GA30483 at msticlxl57 dot ims dot intel dot com> <20141120164054 dot GS1745 at tucnak dot redhat dot com> <546F7A4F dot 8050800 at redhat dot com> <CAMbmDYZVy0KJAT-RTbuHJRqUiVLsp7coSOheuaBj2R+jjYu_1Q at mail dot gmail dot com>
> >> I think using cpuid for that is just fine. __builtin_cpu_supports
> >> is for ISA additions users might actually want to version code for,
> >> MPX stuff, as the instructions are nops without hw support, are not
> >> something one would multi-version a function for.
> >> If anything, AVX512F and AVX512BW+VL might be good candidates for that,
> >> not
> >> MPX.
> >
> > SOrry, I didn't know the __builtin_cpu_supports was really only ment for
> > user multi-versioning. In that case, it won't make any sense to put the MPX
> > stuff in there.
> >
> > Sorry for sending you down a wrong path Ilya.
>
> It's OK, AVX guys will just transform this MPX patch into AVX512 one :)
>
Hi,
I've added avx512f support to __builtin_cpu_supports.
I'm not sure about bw+vl, i think for compound values like
avx512bd+dq+vl, arch is better. Also for such cases prority is unclear,
what should we choose bw+vl or e. g. avx512f+er?
I've left MPX bits in cpuid.h, in case we will need them later (e. g.
for runtime mpx tests enabling).
Ok for trunk?
gcc/
* config/i386/cpuid.h (bit_MPX, bit_BNDREGS, bit_BNDCSR):
Define.
* config/i386/i386.c (get_builtin_code_for_version): Add avx512f.
(fold_builtin_cpu): Ditto.
* doc/extend.texi: Documment it.
gcc/testsuite/
* g++.dg/ext/mv2.C: Add test for target ("avx512f").
* gcc.target/i386/builtin_target.c: Ditto.
libgcc/
* config/i386/cpuinfo.c (processor_features): Add FEATURE_AVX512F.
* config/i386/cpuinfo.c (get_available_features): Detect it.
---
gcc/config/i386/cpuid.h | 5 +++
gcc/config/i386/i386.c | 10 +++--
gcc/doc/extend.texi | 2 +
gcc/testsuite/g++.dg/ext/mv2.C | 51 +++++++++++++++-----------
gcc/testsuite/gcc.target/i386/builtin_target.c | 4 ++
libgcc/config/i386/cpuinfo.c | 5 ++-
6 files changed, 52 insertions(+), 25 deletions(-)
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 6c6e7f3..e3b7646 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -72,6 +72,7 @@
#define bit_AVX2 (1 << 5)
#define bit_BMI2 (1 << 8)
#define bit_RTM (1 << 11)
+#define bit_MPX (1 << 14)
#define bit_AVX512F (1 << 16)
#define bit_AVX512DQ (1 << 17)
#define bit_RDSEED (1 << 18)
@@ -91,6 +92,10 @@
#define bit_PREFETCHWT1 (1 << 0)
#define bit_AVX512VBMI (1 << 1)
+/* XFEATURE_ENABLED_MASK register bits (%eax == 13, %ecx == 0) */
+#define bit_BNDREGS (1 << 3)
+#define bit_BNDCSR (1 << 4)
+
/* Extended State Enumeration Sub-leaf (%eax == 13, %ecx == 1) */
#define bit_XSAVEOPT (1 << 0)
#define bit_XSAVEC (1 << 1)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index eafc15a..2493130 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -34235,7 +34235,8 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
P_FMA,
P_PROC_FMA,
P_AVX2,
- P_PROC_AVX2
+ P_PROC_AVX2,
+ P_AVX512F
};
enum feature_priority priority = P_ZERO;
@@ -34263,7 +34264,8 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
{"fma4", P_FMA4},
{"xop", P_XOP},
{"fma", P_FMA},
- {"avx2", P_AVX2}
+ {"avx2", P_AVX2},
+ {"avx512f", P_AVX512F}
};
@@ -35238,6 +35240,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
F_FMA4,
F_XOP,
F_FMA,
+ F_AVX512F,
F_MAX
};
@@ -35326,7 +35329,8 @@ fold_builtin_cpu (tree fndecl, tree *args)
{"fma4", F_FMA4},
{"xop", F_XOP},
{"fma", F_FMA},
- {"avx2", F_AVX2}
+ {"avx2", F_AVX2},
+ {"avx512f",F_AVX512F}
};
tree __processor_model_type = build_processor_model_struct ();
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7178c9a..773e14c 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -11642,6 +11642,8 @@ SSE4.2 instructions.
AVX instructions.
@item avx2
AVX2 instructions.
+@item avx512f
+AVX512F instructions.
@end table
Here is an example:
diff --git a/gcc/testsuite/g++.dg/ext/mv2.C b/gcc/testsuite/g++.dg/ext/mv2.C
index 869e99b..d4f1f92 100644
--- a/gcc/testsuite/g++.dg/ext/mv2.C
+++ b/gcc/testsuite/g++.dg/ext/mv2.C
@@ -20,31 +20,34 @@ int foo () __attribute__ ((target ("sse4.2")));
int foo () __attribute__ ((target ("popcnt")));
int foo () __attribute__ ((target ("avx")));
int foo () __attribute__ ((target ("avx2")));
+int foo () __attribute__ ((target ("avx512f")));
int main ()
{
int val = foo ();
- if (__builtin_cpu_supports ("avx2"))
- assert (val == 1);
+ if (__builtin_cpu_supports ("avx512f"))
+ assert (val == 11);
+ else if (__builtin_cpu_supports ("avx2"))
+ assert (val == 10);
else if (__builtin_cpu_supports ("avx"))
- assert (val == 2);
+ assert (val == 9);
else if (__builtin_cpu_supports ("popcnt"))
- assert (val == 3);
+ assert (val == 8);
else if (__builtin_cpu_supports ("sse4.2"))
- assert (val == 4);
+ assert (val == 7);
else if (__builtin_cpu_supports ("sse4.1"))
- assert (val == 5);
- else if (__builtin_cpu_supports ("ssse3"))
assert (val == 6);
+ else if (__builtin_cpu_supports ("ssse3"))
+ assert (val == 5);
else if (__builtin_cpu_supports ("sse3"))
- assert (val == 7);
+ assert (val == 4);
else if (__builtin_cpu_supports ("sse2"))
- assert (val == 8);
+ assert (val == 3);
else if (__builtin_cpu_supports ("sse"))
- assert (val == 9);
+ assert (val == 2);
else if (__builtin_cpu_supports ("mmx"))
- assert (val == 10);
+ assert (val == 1);
else
assert (val == 0);
@@ -60,59 +63,65 @@ foo ()
int __attribute__ ((target("mmx")))
foo ()
{
- return 10;
+ return 1;
}
int __attribute__ ((target("sse")))
foo ()
{
- return 9;
+ return 2;
}
int __attribute__ ((target("sse2")))
foo ()
{
- return 8;
+ return 3;
}
int __attribute__ ((target("sse3")))
foo ()
{
- return 7;
+ return 4;
}
int __attribute__ ((target("ssse3")))
foo ()
{
- return 6;
+ return 5;
}
int __attribute__ ((target("sse4.1")))
foo ()
{
- return 5;
+ return 6;
}
int __attribute__ ((target("sse4.2")))
foo ()
{
- return 4;
+ return 7;
}
int __attribute__ ((target("popcnt")))
foo ()
{
- return 3;
+ return 8;
}
int __attribute__ ((target("avx")))
foo ()
{
- return 2;
+ return 9;
}
int __attribute__ ((target("avx2")))
foo ()
{
- return 1;
+ return 10;
+}
+
+int __attribute__ ((target("avx512f")))
+foo ()
+{
+ return 11;
}
diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c
index c40983e..b6a3eee 100644
--- a/gcc/testsuite/gcc.target/i386/builtin_target.c
+++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
@@ -145,6 +145,8 @@ check_features (unsigned int ecx, unsigned int edx,
__cpuid_count (7, 0, eax, ebx, ecx, edx);
if (ebx & bit_AVX2)
assert (__builtin_cpu_supports ("avx2"));
+ if (ebx & bit_AVX512F)
+ assert (__builtin_cpu_supports ("avx512f"));
}
}
@@ -241,6 +243,8 @@ quick_check ()
assert (__builtin_cpu_supports ("avx2") >= 0);
+ assert (__builtin_cpu_supports ("avx512f") >= 0);
+
/* Check CPU type. */
assert (__builtin_cpu_is ("amd") >= 0);
diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c
index 6ff7502..8ca385c 100644
--- a/libgcc/config/i386/cpuinfo.c
+++ b/libgcc/config/i386/cpuinfo.c
@@ -96,7 +96,8 @@ enum processor_features
FEATURE_SSE4_A,
FEATURE_FMA4,
FEATURE_XOP,
- FEATURE_FMA
+ FEATURE_FMA,
+ FEATURE_AVX512F
};
struct __processor_model
@@ -278,6 +279,8 @@ get_available_features (unsigned int ecx, unsigned int edx,
__cpuid_count (7, 0, eax, ebx, ecx, edx);
if (ebx & bit_AVX2)
features |= (1 << FEATURE_AVX2);
+ if (ebx & bit_AVX512F)
+ features |= (1 << FEATURE_AVX512F);
}
unsigned int ext_level;
--
1.8.3.1