[PATCH] i386: Remove duplicated AVX2/AVX512 vec_dup patterns
H.J. Lu
hongjiu.lu@intel.com
Fri Nov 2 17:25:00 GMT 2018
Remove duplicated AVX2/AVX512 vec_dup patterns and replace them with
subreg. gcc.target/i386/avx2-vbroadcastss_ps256-1.c is changed by
avx2_test:
.cfi_startproc
- vmovaps x(%rip), %xmm1
- vbroadcastss %xmm1, %ymm0
+ vbroadcastss x(%rip), %ymm0
vmovaps %ymm0, y(%rip)
vzeroupper
ret
.cfi_endproc
gcc.target/i386/avx512vl-vbroadcast-3.c is changed by
@@ -113,7 +113,7 @@ f10:
.cfi_startproc
vmovaps %ymm0, %ymm16
vpermilps $85, %ymm16, %ymm16
- vbroadcastss %xmm16, %ymm16
+ vshuff32x4 $0x0, %ymm16, %ymm16, %ymm16
vzeroupper
ret
.cfi_endproc
@@ -153,8 +153,7 @@ f12:
f13:
.LFB12:
.cfi_startproc
- vmovaps (%rdi), %ymm16
- vbroadcastss %xmm16, %ymm16
+ vbroadcastss (%rdi), %ymm16
vzeroupper
ret
.cfi_endproc
OK for trunk?
Thanks.
H.J.
--
gcc/
* config/i386/i386-builtin.def: Replace CODE_FOR_avx2_vec_dupv4sf,
CODE_FOR_avx2_vec_dupv8sf and CODE_FOR_avx2_vec_dupv4df with
CODE_FOR_vec_dupv4sf, CODE_FOR_vec_dupv8sf and
CODE_FOR_vec_dupv4df, respectively.
* config/i386/i386.c (expand_vec_perm_1): Use subreg with vec_dup.
* config/i386/i386.md (SF to DF splitter): Replace
gen_avx512f_vec_dupv16sf_1 with gen_avx512f_vec_dupv16sf.
* config/i386/sse.md (VF48_AVX512VL): New.
(avx2_vec_dup<mode>): Removed.
(avx2_vec_dupv8sf_1): Likewise.
(avx512f_vec_dup<mode>_1): Likewise.
(avx2_pbroadcast<mode>_1): Likewise.
(avx2_vec_dupv4df): Likewise.
(<avx512>_vec_dup<mode>_1): Likewise.
(*avx_vperm_broadcast_<mode>): Replace gen_avx2_vec_dupv8sf with
gen_vec_dupv8sf.
gcc/testsuite/
* gcc.target/i386/avx2-vbroadcastss_ps256-1.c: Updated.
* gcc.target/i386/avx512vl-vbroadcast-3.c: Likewise.
---
gcc/config/i386/i386-builtin.def | 6 +-
gcc/config/i386/i386.c | 57 ++++++++++---
gcc/config/i386/i386.md | 2 +-
gcc/config/i386/sse.md | 83 +------------------
.../i386/avx2-vbroadcastss_ps256-1.c | 3 +-
.../gcc.target/i386/avx512vl-vbroadcast-3.c | 5 +-
6 files changed, 56 insertions(+), 100 deletions(-)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index df0f7e975ac..d217add8ee2 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1194,9 +1194,9 @@ BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
-BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 963c7fcbb34..6b95d774ad1 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -45963,28 +45963,41 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
{
/* Use vpbroadcast{b,w,d}. */
rtx (*gen) (rtx, rtx) = NULL;
+ machine_mode smode = VOIDmode;
switch (d->vmode)
{
case E_V64QImode:
if (TARGET_AVX512BW)
- gen = gen_avx512bw_vec_dupv64qi_1;
+ {
+ smode = V16QImode;
+ gen = gen_avx512bw_vec_dupv64qi;
+ }
break;
case E_V32QImode:
- gen = gen_avx2_pbroadcastv32qi_1;
+ smode = V16QImode;
+ gen = gen_avx2_pbroadcastv32qi;
break;
case E_V32HImode:
if (TARGET_AVX512BW)
- gen = gen_avx512bw_vec_dupv32hi_1;
+ {
+ smode = V8HImode;
+ gen = gen_avx512bw_vec_dupv32hi;
+ }
break;
case E_V16HImode:
- gen = gen_avx2_pbroadcastv16hi_1;
+ smode = V8HImode;
+ gen = gen_avx2_pbroadcastv16hi;
break;
case E_V16SImode:
if (TARGET_AVX512F)
- gen = gen_avx512f_vec_dupv16si_1;
+ {
+ smode = V4SImode;
+ gen = gen_avx512f_vec_dupv16si;
+ }
break;
case E_V8SImode:
- gen = gen_avx2_pbroadcastv8si_1;
+ smode = V4SImode;
+ gen = gen_avx2_pbroadcastv8si;
break;
case E_V16QImode:
gen = gen_avx2_pbroadcastv16qi;
@@ -45993,19 +46006,25 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
gen = gen_avx2_pbroadcastv8hi;
break;
case E_V16SFmode:
+ smode = SFmode;
if (TARGET_AVX512F)
- gen = gen_avx512f_vec_dupv16sf_1;
+ gen = gen_avx512f_vec_dupv16sf;
break;
case E_V8SFmode:
- gen = gen_avx2_vec_dupv8sf_1;
+ smode = SFmode;
+ gen = gen_vec_dupv8sf;
break;
case E_V8DFmode:
+ smode = DFmode;
if (TARGET_AVX512F)
- gen = gen_avx512f_vec_dupv8df_1;
+ gen = gen_avx512f_vec_dupv8df;
break;
case E_V8DImode:
if (TARGET_AVX512F)
- gen = gen_avx512f_vec_dupv8di_1;
+ {
+ smode = V2DImode;
+ gen = gen_avx512f_vec_dupv8di;
+ }
break;
/* For other modes prefer other shuffles this function creates. */
default: break;
@@ -46013,7 +46032,23 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
if (gen != NULL)
{
if (!d->testing_p)
- emit_insn (gen (d->target, d->op0));
+ {
+ if (smode == VOIDmode)
+ emit_insn (gen (d->target, d->op0));
+ else
+ {
+ rtx op = d->op0;
+ unsigned int oppos = 0;
+ if (SUBREG_P (op))
+ {
+ op = SUBREG_REG (op);
+ oppos = SUBREG_BYTE (op);
+ }
+ emit_insn (gen (d->target,
+ gen_rtx_SUBREG (smode, op,
+ oppos)));
+ }
+ }
return true;
}
}
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7fb2b144f47..4a6fa077db5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4399,7 +4399,7 @@
else
{
rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode);
- emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
+ emit_insn (gen_avx512f_vec_dupv16sf (tmp, tmp));
}
}
else
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ee73e1fdf80..90a700c154a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -7117,42 +7117,6 @@
(set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "SF")])
-(define_insn "avx2_vec_dup<mode>"
- [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
- (vec_duplicate:VF1_128_256
- (vec_select:SF
- (match_operand:V4SF 1 "register_operand" "v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "vbroadcastss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_evex")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "avx2_vec_dupv8sf_1"
- [(set (match_operand:V8SF 0 "register_operand" "=v")
- (vec_duplicate:V8SF
- (vec_select:SF
- (match_operand:V8SF 1 "register_operand" "v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "vbroadcastss\t{%x1, %0|%0, %x1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_evex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "avx512f_vec_dup<mode>_1"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_duplicate:VF_512
- (vec_select:<ssescalarmode>
- (match_operand:VF_512 1 "register_operand" "v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX512F"
- "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<MODE>")])
-
;; Although insertps takes register source, we prefer
;; unpcklps with register source since it is shorter.
(define_insn "*vec_concatv2sf_sse4_1"
@@ -17918,24 +17882,6 @@
(set_attr "prefix" "vex,evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_pbroadcast<mode>_1"
- [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
- (vec_duplicate:VI_256
- (vec_select:<ssescalarmode>
- (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "@
- vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
- vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
- vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
- vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
- [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
- (set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "<avx2_avx512>_permvar<mode><mask_name>"
[(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
(unspec:VI48F_256_512
@@ -18111,32 +18057,6 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx2_vec_dupv4df"
- [(set (match_operand:V4DF 0 "register_operand" "=v")
- (vec_duplicate:V4DF
- (vec_select:DF
- (match_operand:V2DF 1 "register_operand" "v")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "vbroadcastsd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_evex")
- (set_attr "mode" "V4DF")])
-
-(define_insn "<avx512>_vec_dup<mode>_1"
- [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
- (vec_duplicate:VI_AVX512BW
- (vec_select:<ssescalarmode>
- (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX512F"
- "@
- vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
- vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
- [(set_attr "type" "ssemov")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "<avx512>_vec_dup<mode><mask_name>"
[(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
(vec_duplicate:V48_AVX512VL
@@ -18545,8 +18465,7 @@
or VSHUFF128. */
gcc_assert (<MODE>mode == V8SFmode);
if ((mask & 1) == 0)
- emit_insn (gen_avx2_vec_dupv8sf (op0,
- gen_lowpart (V4SFmode, op0)));
+ emit_insn (gen_vec_dupv8sf (op0, gen_lowpart (V4SFmode, op0)));
else
emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
GEN_INT (4), GEN_INT (5),
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vbroadcastss_ps256-1.c b/gcc/testsuite/gcc.target/i386/avx2-vbroadcastss_ps256-1.c
index dfac3916b08..3ff7497aa21 100644
--- a/gcc/testsuite/gcc.target/i386/avx2-vbroadcastss_ps256-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx2-vbroadcastss_ps256-1.c
@@ -1,6 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mavx2 -O2" } */
-/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler-not "vmovaps\[\t \]*\[^,\]*,%xmm\[0-9\]" } } */
#include <immintrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-3.c
index 7233398cd64..1c62364dac4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-3.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcast-3.c
@@ -151,8 +151,8 @@ f16 (V2 *x)
}
/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%xmm16" 4 } } */
-/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%xmm16\[^\n\r]*%ymm16" 3 } } */
-/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%ymm16" 3 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%xmm16\[^\n\r]*%ymm16" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%ymm16" 4 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$170\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
@@ -160,3 +160,4 @@ f16 (V2 *x)
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
/* { dg-final { scan-assembler-times "vshuff32x4\[^\n\r]*\\\$3\[^\n\r]*%ymm16\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
+/* { dg-final { scan-assembler-times "vshuff32x4\[^\n\r]*\\\$0\[^\n\r]*%ymm16\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 1 } } */
--
2.17.2
More information about the Gcc-patches
mailing list