This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH i386 AVX512] [22/n] Extend unaligned loads & stores.


Hello,
This patch extends unaligned loads and stores patterns.

I've refactored original patch (stored on SVN's branch)
toward reducing complexity of conditions in
   define_insn "<avx512>_storedqu<mode>_mask"

It seems like such a trick won't work for:
   <sse2_avx_avx512f>_loaddqu<mode><mask_name>
Problem is V[32|16]QI modes, which enabled for SSE/AVX
w/o masking and for AVX-512BW & AVX-512VL when masking is
on.

Of course, I can split the define_insn & define_expand
into 3 patterns w/ mode iterators of:
  1. V16QI, V32QI - baseline is SSE2, masks enabled for AVX-512BW&VL
  2. V64QI, V8HI, V16HI, V32HI - baseline is AVX-512BW, masks enabled
     for AVX-512VL
  3. V8DI, V4DI, V2DI, V16SI, V8SI, V4SI - baseline is AVX-512F, masks
     enabled for AVX-512VL.

But such approach will lead to 6 patterns instead of 2 (with non-trivial
asm emit). I have doubts if it is useful...


Current patch passess bootstrap and shows now regiressions under
simulator.

What do you think?

gcc/
	* config/i386/sse.md
	(define_mode_iterator VI48_AVX512VL): New.
	(define_mode_iterator VI_UNALIGNED_LOADSTORE): Add V64QI, V32HI, V16HI,
	V8HI, V4SI, V4DI, V2DI modes.
	(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"): Update
	condition.
	(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"): Update
	condition, handle new modes.
	(define_insn "<sse2_avx_avx512f>_storedqu<mode>"): Handle new modes.
	(define_insn "avx512f_storedqu<mode>_mask"): Delete.
	(define_insn "<avx512>_storedqu<mode>_mask" with
	VI48_AVX512VL): New.
	(define_insn "<avx512>_storedqu<mode>_mask" with
	VI12_AVX512VL): Ditto.

--
Thanks, K


diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index cd0c08e..51cfada 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -235,6 +235,10 @@
 (define_mode_iterator VF_512
   [V16SF V8DF])
 
+(define_mode_iterator VI48_AVX512VL
+  [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
+   V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
+
 (define_mode_iterator VF2_AVX512VL
   [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
@@ -260,8 +264,12 @@
   [(V32QI "TARGET_AVX") V16QI])
 
 (define_mode_iterator VI_UNALIGNED_LOADSTORE
-  [(V32QI "TARGET_AVX") V16QI
-   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512BW")
+   (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 ;; All DImode vector integer modes
 (define_mode_iterator VI8
@@ -1172,7 +1180,10 @@
 	(unspec:VI_UNALIGNED_LOADSTORE
 	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
 	  UNSPEC_LOADU))]
-  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "TARGET_SSE2 
+   && (!<mask_applied>
+       || (TARGET_AVX512BW && TARGET_AVX512VL)
+       || (<MODE>mode != V32QImode && (<MODE>mode != V16QImode)))"
 {
   /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
      just fine if misaligned_operand is true, and without the UNSPEC it can
@@ -1197,20 +1208,27 @@
 	(unspec:VI_UNALIGNED_LOADSTORE
 	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_LOADU))]
-  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "TARGET_SSE2
+   && (!<mask_applied>
+       || (TARGET_AVX512BW && TARGET_AVX512VL)
+       || (<MODE>mode != V32QImode && (<MODE>mode != V16QImode)))"
 {
   switch (get_attr_mode (insn))
     {
+    case MODE_V16SF:
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
-    case MODE_XI:
-      if (<MODE>mode == V8DImode)
-	return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
-      else
-	return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
     default:
-      return "%vmovdqu\t{%1, %0|%0, %1}";
+      switch (<MODE>mode)
+      {
+      case V32QImode:
+      case V16QImode:
+	if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+	  return "%vmovdqu\t{%1, %0|%0, %1}";
+      default:
+	return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+      }
     }
 }
   [(set_attr "type" "ssemov")
@@ -1246,13 +1264,16 @@
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
-    case MODE_XI:
-      if (<MODE>mode == V8DImode)
-	return "vmovdqu64\t{%1, %0|%0, %1}";
-      else
-	return "vmovdqu32\t{%1, %0|%0, %1}";
     default:
-      return "%vmovdqu\t{%1, %0|%0, %1}";
+      switch (<MODE>mode)
+      {
+      case V32QImode:
+      case V16QImode:
+	if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+	  return "%vmovdqu\t{%1, %0|%0, %1}";
+      default:
+	  return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
+      }
     }
 }
   [(set_attr "type" "ssemov")
@@ -1276,21 +1297,32 @@
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
-(define_insn "avx512f_storedqu<mode>_mask"
-  [(set (match_operand:VI48_512 0 "memory_operand" "=m")
-	(vec_merge:VI48_512
-	  (unspec:VI48_512
-	    [(match_operand:VI48_512 1 "register_operand" "v")]
+(define_insn "<avx512>_storedqu<mode>_mask"
+  [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
+	(vec_merge:VI48_AVX512VL
+	  (unspec:VI48_AVX512VL
+	    [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
 	    UNSPEC_STOREU)
 	  (match_dup 0)
 	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
   "TARGET_AVX512F"
-{
-  if (<MODE>mode == V8DImode)
-    return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-  else
-    return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-}
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "movu" "1")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_storedqu<mode>_mask"
+  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
+	(vec_merge:VI12_AVX512VL
+	  (unspec:VI12_AVX512VL
+	    [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
+	    UNSPEC_STOREU)
+	  (match_dup 0)
+	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
+  "TARGET_AVX512BW"
+  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "movu" "1")
    (set_attr "memory" "store")


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]