This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] i386: Update the default -mzeroupper setting
- From: "H.J. Lu" <hjl dot tools at gmail dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>, Sebastian Peryt <sebastian dot peryt at intel dot com>
- Date: Wed, 15 Nov 2017 08:59:26 -0800
- Subject: Re: [PATCH] i386: Update the default -mzeroupper setting
- Authentication-results: sourceware.org; auth=none
- References: <20171115133701.GA15187@gmail.com> <CAFULd4bRdAV4DsQujB_N-qu34pGy90ri+UZuZcq03Ou8z6AcEQ@mail.gmail.com>
On Wed, Nov 15, 2017 at 8:09 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Wed, Nov 15, 2017 at 2:37 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> -mzeroupper is specified to generate vzeroupper instruction. If it
>> isn't used, the default should depend on !TARGET_AVX512ER. Users can
>> always use -mzeroupper or -mno-zeroupper to override it.
>>
>> Sebastian, can you run the full test with it?
>>
>> OK for trunk if there is no regression?
>
> If we want to go this way, please add relevant tune flag (e.g.
> X86_TUNE_EMIT_VZEROUPPER) and use it for ~m_KNL. This tune is the
> property of the processor model, not ISA.
How about this? OK for trunk if there are no regressions?
--
H.J.
From d9388c1b7f36e2310645aed4a4debefa65b5129e Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 14 Nov 2017 20:49:33 -0800
Subject: [PATCH] i386: Add X86_TUNE_EMIT_VZEROUPPER
Add X86_TUNE_EMIT_VZEROUPPER to indicate if vzeroupper instruction should
be inserted before a transfer of control flow out of the function. It is
turned on by default unless we are tuning for KNL. Users can always use
-mzeroupper or -mno-zeroupper to override X86_TUNE_EMIT_VZEROUPPER.
gcc/
PR target/82990
* config/i386/i386.c (pass_insert_vzeroupper::gate): Remove
TARGET_AVX512ER check.
(ix86_option_override_internal): Set MASK_VZEROUPPER if
neither -mzeroupper nor -mno-zeroupper is used and
TARGET_EMIT_VZEROUPPER is set.
* config/i386/i386.h (TARGET_EMIT_VZEROUPPER): New.
* config/i386/x86-tune.def: Add X86_TUNE_EMIT_VZEROUPPER.
gcc/testsuite/
PR target/82990
* gcc.target/i386/pr82942-2.c: Add -mtune=knl.
* gcc.target/i386/pr82990-1.c: New test.
* gcc.target/i386/pr82990-2.c: Likewise.
* gcc.target/i386/pr82990-3.c: Likewise.
* gcc.target/i386/pr82990-4.c: Likewise.
* gcc.target/i386/pr82990-5.c: Likewise.
* gcc.target/i386/pr82990-6.c: Likewise.
* gcc.target/i386/pr82990-7.c: Likewise.
---
gcc/config/i386/i386.c | 5 +++--
gcc/config/i386/i386.h | 2 ++
gcc/config/i386/x86-tune.def | 4 ++++
gcc/testsuite/gcc.target/i386/pr82942-2.c | 2 +-
gcc/testsuite/gcc.target/i386/pr82990-1.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr82990-2.c | 6 ++++++
gcc/testsuite/gcc.target/i386/pr82990-3.c | 6 ++++++
gcc/testsuite/gcc.target/i386/pr82990-4.c | 6 ++++++
gcc/testsuite/gcc.target/i386/pr82990-5.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/pr82990-6.c | 6 ++++++
gcc/testsuite/gcc.target/i386/pr82990-7.c | 6 ++++++
11 files changed, 68 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr82990-7.c
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c5e84a09954..c6ca0712755 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2497,7 +2497,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *)
{
- return TARGET_AVX && !TARGET_AVX512ER
+ return TARGET_AVX
&& TARGET_VZEROUPPER && flag_expensive_optimizations
&& !optimize_size;
}
@@ -4666,7 +4666,8 @@ ix86_option_override_internal (bool main_args_p,
if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
- if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
+ if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
+ && TARGET_EMIT_VZEROUPPER)
opts->x_target_flags |= MASK_VZEROUPPER;
if (!(opts_set->x_target_flags & MASK_STV))
opts->x_target_flags |= MASK_STV;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e3e55da4232..a45e2df5783 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -517,6 +517,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
#define TARGET_ONE_IF_CONV_INSN \
ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
+#define TARGET_EMIT_VZEROUPPER \
+ ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 99282c88341..19fd2b52b30 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -543,3 +543,7 @@ DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0U)
arithmetic to 32bit via PROMOTE_MODE macro. This code generation scheme
is usually used for RISC targets. */
DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0U)
+
+/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion
+ before a transfer of control flow out of the function. */
+DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL)
diff --git a/gcc/testsuite/gcc.target/i386/pr82942-2.c b/gcc/testsuite/gcc.target/i386/pr82942-2.c
index cb0e337113e..ddb4e689659 100644
--- a/gcc/testsuite/gcc.target/i386/pr82942-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr82942-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mavx512f -mavx512er -O2" } */
+/* { dg-options "-mavx512f -mavx512er -mtune=knl -O2" } */
#include "pr82941-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c
new file mode 100644
index 00000000000..ff1d6d40eb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=knl -mvzeroupper" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+ z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-2.c b/gcc/testsuite/gcc.target/i386/pr82990-2.c
new file mode 100644
index 00000000000..0d3cb2333dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-vzeroupper" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c
new file mode 100644
index 00000000000..201fa98d8d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mavx512er -mvzeroupper -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-4.c b/gcc/testsuite/gcc.target/i386/pr82990-4.c
new file mode 100644
index 00000000000..09f161c7291
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-4.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512er -mno-vzeroupper -O2" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-5.c b/gcc/testsuite/gcc.target/i386/pr82990-5.c
new file mode 100644
index 00000000000..9932bdc5375
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mtune=generic" } */
+
+#include <immintrin.h>
+
+extern __m512d y, z;
+
+void
+pr82941 ()
+{
+ z = y;
+}
+
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-6.c b/gcc/testsuite/gcc.target/i386/pr82990-6.c
new file mode 100644
index 00000000000..063a61c111d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-6.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mtune=knl" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82990-7.c b/gcc/testsuite/gcc.target/i386/pr82990-7.c
new file mode 100644
index 00000000000..dedde8b854b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82990-7.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512 -mtune=generic -mtune-ctrl=^emit_vzeroupper" } */
+
+#include "pr82941-1.c"
+
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
--
2.14.3