This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [AVX] PATCH: Add vzeroall/vzeroupper patterns


On Mon, Apr 14, 2008 at 08:02:30AM +0200, Uros Bizjak wrote:
> On Sun, Apr 13, 2008 at 10:58 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> 
> >  > so, xorps was removed by postreload pass, since xorps and vzeroall have
> >  > both cleared xmm0 in V4SFmode.
> >  >
> >  > It looks to me, that postreload pass needs to be extended a bit due to the
> >  > fact that const0_rtx represents zero (aka 0x0....0) in every mode. This
> >  > way, postreload pass would remove xorps in above example even when vzeroall
> >  > creates V4SImode zeros.
> >
> >  I adjusted your patch against AVX branch. Does it look OK?
> 
> It looks OK to me, except for the mode. Please note, that currently,
> only SFmode xorps will be removed, since we have V8SFmode zeros. I
> think that vzeroall should produce zeros in a generic mode (say,
> V8SI). Postreload pass should be extended using MODES_TIEABLE_P to
> account for zeros in any mode when removing extra settings to zero.
> 
> 

Hi Uros,

This is the patch I checked in.

BTW, do you have any suggestions for

;; FIXME: It clobbers the upper 128bits of AVX registers.
(define_insn "avx_vzeroupper"
  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
  "TARGET_AVX"
  "vzeroupper"
  [(set_attr "type" "sse")
   (set_attr "memory" "none")
   (set_attr "mode" "OI")])

It won't work correctly if any 256bit registers are used.

Thanks.

H.J.
----
2008-04-14  Uros Bizjak  <ubizjak@gmail.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.md (XMM0_REG...XMM15_REG): Removed.
	* config/i386/sse.md (avx_vzeroall_rex64): Likewise.
	* config/i386/i386.c (ix86_expand_builtin): Updated.

2008-04-14  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/predicates.md (vzeroall_operation): New.

	* config/i386/sse.md (avx_vzeroall): Rewrite to use define_expand.
	(*avx_vzeroall): New.

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 134221)
+++ config/i386/i386.md	(working copy)
@@ -244,24 +244,8 @@
    (FLAGS_REG			17)
    (FPSR_REG			18)
    (FPCR_REG			19)
-   (XMM0_REG			21)
-   (XMM1_REG			22)
-   (XMM2_REG			23)
-   (XMM3_REG			24)
-   (XMM4_REG			25)
-   (XMM5_REG			26)
-   (XMM6_REG			27)
-   (XMM7_REG			28)
    (R10_REG			39)
    (R11_REG			40)
-   (XMM8_REG			45)
-   (XMM9_REG			46)
-   (XMM10_REG			47)
-   (XMM11_REG			48)
-   (XMM12_REG			49)
-   (XMM13_REG			50)
-   (XMM14_REG			51)
-   (XMM15_REG			52)
   ])
 
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
Index: config/i386/predicates.md
===================================================================
--- config/i386/predicates.md	(revision 134221)
+++ config/i386/predicates.md	(working copy)
@@ -1073,3 +1073,15 @@
 (define_predicate "misaligned_operand"
   (and (match_code "mem")
        (match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
+
+;; Return 1 if OP is a vzeroall operation, known to be a PARALLEL.
+(define_predicate "vzeroall_operation"
+  (match_code "parallel")
+{
+  int nregs = TARGET_64BIT ? 16 : 8;
+
+  if (XVECLEN (op, 0) != nregs + 1)
+    return 0;
+
+  return 1;
+})
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 134221)
+++ config/i386/sse.md	(working copy)
@@ -8569,41 +8569,32 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "avx_vzeroall"
-  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
-   (clobber (reg:V8SI XMM0_REG))
-   (clobber (reg:V8SI XMM1_REG))
-   (clobber (reg:V8SI XMM2_REG))
-   (clobber (reg:V8SI XMM3_REG))
-   (clobber (reg:V8SI XMM4_REG))
-   (clobber (reg:V8SI XMM5_REG))
-   (clobber (reg:V8SI XMM6_REG))
-   (clobber (reg:V8SI XMM7_REG))]
-  "TARGET_AVX && !TARGET_64BIT"
-  "vzeroall"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "none")
-   (set_attr "mode" "OI")])
+(define_expand "avx_vzeroall"
+  [(match_par_dup 0 [(const_int 0)])]
+  "TARGET_AVX"
+{
+  int nregs = TARGET_64BIT ? 16 : 8;
+  int regno;
 
-(define_insn "avx_vzeroall_rex64"
-  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
-   (clobber (reg:V8SI XMM0_REG))
-   (clobber (reg:V8SI XMM1_REG))
-   (clobber (reg:V8SI XMM2_REG))
-   (clobber (reg:V8SI XMM3_REG))
-   (clobber (reg:V8SI XMM4_REG))
-   (clobber (reg:V8SI XMM5_REG))
-   (clobber (reg:V8SI XMM6_REG))
-   (clobber (reg:V8SI XMM7_REG))
-   (clobber (reg:V8SI XMM8_REG))
-   (clobber (reg:V8SI XMM9_REG))
-   (clobber (reg:V8SI XMM10_REG))
-   (clobber (reg:V8SI XMM11_REG))
-   (clobber (reg:V8SI XMM12_REG))
-   (clobber (reg:V8SI XMM13_REG))
-   (clobber (reg:V8SI XMM14_REG))
-   (clobber (reg:V8SI XMM15_REG))]
-  "TARGET_AVX && TARGET_64BIT"
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
+
+  XVECEXP (operands[0], 0, 0)
+    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+			       UNSPECV_VZEROALL);
+
+  for (regno = 0; regno < nregs; regno++)
+    XVECEXP (operands[0], 0, regno + 1)
+      = gen_rtx_SET (VOIDmode,
+		     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
+		     CONST0_RTX (V8SImode));
+})
+
+(define_insn "*avx_vzeroall"
+  [(match_parallel 0 "vzeroall_operation"
+    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
+     (set (match_operand 1 "register_operand" "=x")
+          (match_operand 2 "const0_operand" "X"))])]
+  "TARGET_AVX"
   "vzeroall"
   [(set_attr "type" "sse")
    (set_attr "memory" "none")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 134221)
+++ config/i386/i386.c	(working copy)
@@ -21225,10 +21225,7 @@ ix86_expand_builtin (tree exp, rtx targe
   switch (fcode)
     {
     case IX86_BUILTIN_VZEROALL:
-      if (TARGET_64BIT)
-	emit_insn (gen_avx_vzeroall_rex64 ());
-      else
-	emit_insn (gen_avx_vzeroall ());
+      emit_insn (gen_avx_vzeroall ());
       return 0;
 
     case IX86_BUILTIN_VZEROUPPER:


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]