[PATCH] Fix vec_extract_lo_* patterns (PR target/81225)
Jakub Jelinek
jakub@redhat.com
Thu Jun 29 16:51:00 GMT 2017
Hi!
This patch fixes various issues with the vec_extract_lo_* patterns.
There are splitters for these, but only for some cases (no mask, and
in one case also not xmm32+ reg) that change those into just a copy or load
of the low part subreg, but if those can't be used, the vextract* insns
don't accept memory input operand, but 3 of the 4 patterns have
nonimmediate_operand input, which is wrong for the masked case, and the
other one uses register_operand, even when the splitter can handle
nonimmediate_operand when not masked.
Thus this patch makes sure that the input is nonimmediate_operand and v,vm
if not masked and register_operand and v,v if masked, returns "#" to ensure
splitting in cases the input is a memory, simplifies the conditions (for
masked we don't need to test at runtime if both arguments aren't MEMs,
because the predicate is now register_operand with v constraint), and
changes the single case that used register_operand to follow the rest.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2017-06-29 Jakub Jelinek <jakub@redhat.com>
PR target/81225
* config/i386/sse.md (vec_extract_lo_<mode><mask_name>): For
V8FI, V16FI and VI8F_256 iterators, use <store_mask_predicate> instead
of nonimmediate_operand and <store_mask_constraint> instead of m for
the input operand. For V8FI iterator, always split if input is a MEM.
For V16FI and V8SF_256 iterators, don't test if both operands are MEM
if <mask_applied>. For VI4F_256 iterator, use <store_mask_predicate>
instead of register_operand and <store_mask_constraint> instead of v for
the input operand. Make sure both operands aren't MEMs for if not
<mask_applied>.
* gcc.target/i386/pr81225.c: New test.
--- gcc/config/i386/sse.md.jj 2017-06-21 22:01:41.000000000 +0200
+++ gcc/config/i386/sse.md 2017-06-28 12:30:49.304820307 +0200
@@ -7359,13 +7359,13 @@ (define_insn "vec_extract_lo_<mode>_mask
(define_insn "vec_extract_lo_<mode><mask_name>"
[(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
(vec_select:<ssehalfvecmode>
- (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
+ (match_operand:V8FI 1 "<store_mask_predicate>" "v,<store_mask_constraint>")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
"TARGET_AVX512F
&& (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
{
- if (<mask_applied> || !TARGET_AVX512VL)
+ if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
else
return "#";
@@ -7515,14 +7515,15 @@ (define_expand "avx_vextractf128<mode>"
(define_insn "vec_extract_lo_<mode><mask_name>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
(vec_select:<ssehalfvecmode>
- (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
+ (match_operand:V16FI 1 "<store_mask_predicate>"
+ "<store_mask_constraint>,v")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
"TARGET_AVX512F
&& <mask_mode512bit_condition>
- && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
{
if (<mask_applied>)
return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
@@ -7546,11 +7547,12 @@ (define_split
(define_insn "vec_extract_lo_<mode><mask_name>"
[(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
+ (match_operand:VI8F_256 1 "<store_mask_predicate>"
+ "<store_mask_constraint>,v")
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_AVX
&& <mask_avx512vl_condition> && <mask_avx512dq_condition>
- && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
{
if (<mask_applied>)
return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
@@ -7610,12 +7612,16 @@ (define_split
"operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
(define_insn "vec_extract_lo_<mode><mask_name>"
- [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
+ "=<store_mask_constraint>,v")
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "register_operand" "v")
+ (match_operand:VI4F_256 1 "<store_mask_predicate>"
+ "v,<store_mask_constraint>")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
- "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
+ "TARGET_AVX
+ && <mask_avx512vl_condition> && <mask_avx512dq_condition>
+ && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
{
if (<mask_applied>)
return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
--- gcc/testsuite/gcc.target/i386/pr81225.c.jj 2017-06-28 12:51:10.606338225 +0200
+++ gcc/testsuite/gcc.target/i386/pr81225.c 2017-06-28 12:50:52.000000000 +0200
@@ -0,0 +1,14 @@
+/* PR target/81225 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512ifma -O3 -ffloat-store" } */
+
+long a[24];
+float b[4], c[24];
+int d;
+
+void
+foo ()
+{
+ for (d = 0; d < 24; d++)
+ c[d] = (float) d ? : b[a[d]];
+}
Jakub
More information about the Gcc-patches
mailing list