This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [AVX] PATCH: Add vzeroall/vzeroupper patterns


On Mon, Apr 14, 2008 at 08:39:43PM +0200, Uros Bizjak wrote:
> H.J. Lu wrote:
>> On Mon, Apr 14, 2008 at 08:02:30AM +0200, Uros Bizjak wrote:
>>   
>>> On Sun, Apr 13, 2008 at 10:58 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>
>>>     
>>>>  > so, xorps was removed by postreload pass, since xorps and vzeroall have
>>>>  > both cleared xmm0 in V4SFmode.
>>>>  >
>>>>  > It looks to me, that postreload pass needs to be extended a bit due to the
>>>>  > fact that const0_rtx represents zero (aka 0x0....0) in every mode. This
>>>>  > way, postreload pass would remove xorps in above example even when vzeroall
>>>>  > creates V4SImode zeros.
>>>>
>>>>  I adjusted your patch against AVX branch. Does it look OK?
>>>>       
>>> It looks OK to me, except for the mode. Please note, that currently,
>>> only SFmode xorps will be removed, since we have V8SFmode zeros. I
>>> think that vzeroall should produce zeros in a generic mode (say,
>>> V8SI). Postreload pass should be extended using MODES_TIEABLE_P to
>>> account for zeros in any mode when removing extra settings to zero.
>>>
>>>
>>>     
>>
>> Hi Uros,
>>
>> This is the patch I checked in.
>>
>> BTW, do you have any suggestions for
>>
>> ;; FIXME: It clobbers the upper 128bits of AVX registers.
>> (define_insn "avx_vzeroupper"
>>   [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
>>   "TARGET_AVX"
>>   "vzeroupper"
>>   [(set_attr "type" "sse")
>>    (set_attr "memory" "none")
>>    (set_attr "mode" "OI")])
>>
>> It won't work correctly if any 256bit registers are used.
>>   
> Perhaps using subregs:
>
> (define_expand "avx_vzeroupper"
>  [(match_par_dup 0 [(const_int 0)])]
>  "TARGET_SSE2"
> {
>  int nregs = TARGET_64BIT ? 16 : 8;
>  int regno;
>
>  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs+1));
>
>  XVECEXP (operands[0], 0, 0)
>   = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
>                              UNSPECV_VZEROUPPER);
>  for (regno = 0; regno < nregs; regno++)
>    XVECEXP (operands[0], 0, regno+1)
>     = gen_rtx_SET (VOIDmode,
>                    gen_rtx_SUBREG (V2SFmode,
>                                    gen_rtx_REG (V4SFmode, SSE_REGNO 
> (regno)),
>                                    GET_MODE_SIZE (V2SFmode)),
>                    CONST0_RTX (V2SFmode));
> })
>
> (define_insn "*avx_vzeroupper"
>  [(match_parallel 0 "vzero_operation"
>    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
>     (set (match_operand 1 "register_operand" "=x")
>          (match_operand 2 "const0_operand" "X"))])]
>  "TARGET_SSE2"
>  "vzeroall"
>  [(set_attr "type" "sse")
>   (set_attr "memory" "none")
>   (set_attr "mode" "TI")])
>
>
> IIRC, ix86_cannot_change_mode_class() should be fixed a bit to accept 
> subregs of V8SImode for this to work.

I tried the enclosed patch. It doesn't work on

#include <gmmintrin.h>

extern __m128 bar2 (__m128, __m128);

__m128
foo3 (__m128 y)
{
  __m128 x = { 0 };
  _mm256_zeroupper ();
  return bar2 (x, y);
}

/export/build/gnu/gcc-avx/build-x86_64-linux/gcc/include/gmmintrin.h:521:
internal compiler error: in gen_rtx_SUBREG, at emit-rtl.c:777
Please submit a full bug report,
with preprocessed source if appropriate.
See <http://gcc.gnu.org/bugs.html> for instructions.

subreg_get_info returns false on info->representable_p. This
subreg is limited to vzeroupper. It doesn't work on other AVX
instructions.

Thanks.

H.J.
----
Index: predicates.md
===================================================================
--- predicates.md	(revision 134313)
+++ predicates.md	(working copy)
@@ -1074,8 +1074,9 @@
   (and (match_code "mem")
        (match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
 
-;; Return 1 if OP is a vzeroall operation, known to be a PARALLEL.
-(define_predicate "vzeroall_operation"
+;; Return 1 if OP is a vzeroall/vzeroupper operation, known to be a
+;; PARALLEL.
+(define_predicate "vzero_operation"
   (match_code "parallel")
 {
   int nregs = TARGET_64BIT ? 16 : 8;
Index: sse.md
===================================================================
--- sse.md	(revision 134313)
+++ sse.md	(working copy)
@@ -8590,7 +8590,7 @@
 })
 
 (define_insn "*avx_vzeroall"
-  [(match_parallel 0 "vzeroall_operation"
+  [(match_parallel 0 "vzero_operation"
     [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
      (set (match_operand 1 "register_operand" "=x")
           (match_operand 2 "const0_operand" "X"))])]
@@ -8600,11 +8600,35 @@
    (set_attr "memory" "none")
    (set_attr "mode" "OI")])
 
-;; FIXME: It clobbers the upper 128bits of AVX registers.
-(define_insn "avx_vzeroupper"
-  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
+(define_expand "avx_vzeroupper"
+  [(match_par_dup 0 [(const_int 0)])]
   "TARGET_AVX"
-  "vzeroupper"
+{
+  int nregs = TARGET_64BIT ? 16 : 8;
+  int regno;
+
+  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
+
+  XVECEXP (operands[0], 0, 0)
+    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+			       UNSPECV_VZEROUPPER);
+  for (regno = 0; regno < nregs; regno++)
+    XVECEXP (operands[0], 0, regno+1)
+      = gen_rtx_SET (VOIDmode,
+		     gen_rtx_SUBREG (V4SImode,
+                                     gen_rtx_REG (V8SImode,
+						  SSE_REGNO (regno)),
+				     GET_MODE_SIZE (V4SImode)),
+		     CONST0_RTX (V4SImode));
+})
+
+(define_insn "*avx_vzeroupper"
+  [(match_parallel 0 "vzero_operation"
+    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
+     (set (match_operand 1 "register_operand" "=x")
+          (match_operand 2 "const0_operand" "X"))])]
+  "TARGET_AVX"
+  "vzeroall"
   [(set_attr "type" "sse")
    (set_attr "memory" "none")
    (set_attr "mode" "OI")])
Index: i386.c
===================================================================
--- i386.c	(revision 134313)
+++ i386.c	(working copy)
@@ -22521,6 +22521,11 @@ ix86_cannot_change_mode_class (enum mach
       if (GET_MODE_SIZE (from) < 4)
 	return true;
 
+      /* AVX's vzeroupper supports subreg from V8SImode to V4SImode
+	 with nonzero offset.  */
+      if (from == V8SImode && to == V4SImode)
+	return false;
+
       /* Vector registers do not support subreg with nonzero offsets, which
 	 are otherwise valid for integer registers.  Since we can't see
 	 whether we have a nonzero offset from here, prohibit all


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]