This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Improve avx_vec_concat<mode>
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Uros Bizjak <ubizjak at gmail dot com>, Kirill Yukhin <kirill dot yukhin at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 23 May 2016 19:26:58 +0200
- Subject: [PATCH] Improve avx_vec_concat<mode>
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
Not sure how to easily test these.
In any case, for the vinsert* case, we don't have vinserti128 nor
vinsertf128 in evex, so need to use vinsert[if]{64x4,32x4} or
for DQ {64x2,32x8}. For the case with zero in the other half,
we need AVX512VL and it isn't guaranteed for the output operand,
because it can be 512-bit mode too.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2016-05-23 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (avx_vec_concat<mode>): Add v=v,vm and
Yv=Yv,C alternatives.
--- gcc/config/i386/sse.md.jj 2016-05-23 15:42:49.000000000 +0200
+++ gcc/config/i386/sse.md 2016-05-23 16:25:58.434925572 +0200
@@ -18178,10 +18178,10 @@ (define_insn "<avx2_avx512>_<shift_insn>
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx_vec_concat<mode>"
- [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
+ [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
(vec_concat:V_256_512
- (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
- (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
+ (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
+ (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,vm,C,C")))]
"TARGET_AVX"
{
switch (which_alternative)
@@ -18189,6 +18189,22 @@ (define_insn "avx_vec_concat<mode>"
case 0:
return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
case 1:
+ if (<MODE_SIZE> == 64)
+ {
+ if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
+ return "vinsert<shuffletype>32x8\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
+ else
+ return "vinsert<shuffletype>64x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
+ }
+ else
+ {
+ if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
+ return "vinsert<shuffletype>64x2\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
+ else
+ return "vinsert<shuffletype>32x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
+ }
+ case 2:
+ case 3:
switch (get_attr_mode (insn))
{
case MODE_V16SF:
@@ -18200,9 +18216,19 @@ (define_insn "avx_vec_concat<mode>"
case MODE_V4DF:
return "vmovapd\t{%1, %x0|%x0, %1}";
case MODE_XI:
- return "vmovdqa\t{%1, %t0|%t0, %1}";
+ if (which_alternative == 2)
+ return "vmovdqa\t{%1, %t0|%t0, %1}";
+ else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
+ return "vmovdqa64\t{%1, %t0|%t0, %1}";
+ else
+ return "vmovdqa32\t{%1, %t0|%t0, %1}";
case MODE_OI:
- return "vmovdqa\t{%1, %x0|%x0, %1}";
+ if (which_alternative == 2)
+ return "vmovdqa\t{%1, %x0|%x0, %1}";
+ else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
+ return "vmovdqa64\t{%1, %x0|%x0, %1}";
+ else
+ return "vmovdqa32\t{%1, %x0|%x0, %1}";
default:
gcc_unreachable ();
}
@@ -18210,9 +18236,9 @@ (define_insn "avx_vec_concat<mode>"
gcc_unreachable ();
}
}
- [(set_attr "type" "sselog,ssemov")
- (set_attr "prefix_extra" "1,*")
- (set_attr "length_immediate" "1,*")
+ [(set_attr "type" "sselog,sselog,ssemov,ssemov")
+ (set_attr "prefix_extra" "1,1,*,*")
+ (set_attr "length_immediate" "1,1,*,*")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
Jakub