This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [AVX] PATCH: Add vzeroall/vzeroupper patterns
On Mon, Apr 14, 2008 at 08:39:43PM +0200, Uros Bizjak wrote:
> H.J. Lu wrote:
>> On Mon, Apr 14, 2008 at 08:02:30AM +0200, Uros Bizjak wrote:
>>
>>> On Sun, Apr 13, 2008 at 10:58 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>
>>>
>>>> > so, xorps was removed by postreload pass, since xorps and vzeroall have
>>>> > both cleared xmm0 in V4SFmode.
>>>> >
>>>> > It looks to me, that postreload pass needs to be extended a bit due to the
>>>> > fact that const0_rtx represents zero (aka 0x0....0) in every mode. This
>>>> > way, postreload pass would remove xorps in above example even when vzeroall
>>>> > creates V4SImode zeros.
>>>>
>>>> I adjusted your patch against AVX branch. Does it look OK?
>>>>
>>> It looks OK to me, except for the mode. Please note, that currently,
>>> only SFmode xorps will be removed, since we have V8SFmode zeros. I
>>> think that vzeroall should produce zeros in a generic mode (say,
>>> V8SI). Postreload pass should be extended using MODES_TIEABLE_P to
>>> account for zeros in any mode when removing extra settings to zero.
>>>
>>>
>>>
>>
>> Hi Uros,
>>
>> This is the patch I checked in.
>>
>> BTW, do you have any suggestions for
>>
>> ;; FIXME: It clobbers the upper 128bits of AVX registers.
>> (define_insn "avx_vzeroupper"
>> [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
>> "TARGET_AVX"
>> "vzeroupper"
>> [(set_attr "type" "sse")
>> (set_attr "memory" "none")
>> (set_attr "mode" "OI")])
>>
>> It won't work correctly if any 256bit registers are used.
>>
> Perhaps using subregs:
>
> (define_expand "avx_vzeroupper"
> [(match_par_dup 0 [(const_int 0)])]
> "TARGET_SSE2"
> {
> int nregs = TARGET_64BIT ? 16 : 8;
> int regno;
>
> operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs+1));
>
> XVECEXP (operands[0], 0, 0)
> = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
> UNSPECV_VZEROUPPER);
> for (regno = 0; regno < nregs; regno++)
> XVECEXP (operands[0], 0, regno+1)
> = gen_rtx_SET (VOIDmode,
> gen_rtx_SUBREG (V2SFmode,
> gen_rtx_REG (V4SFmode, SSE_REGNO
> (regno)),
> GET_MODE_SIZE (V2SFmode)),
> CONST0_RTX (V2SFmode));
> })
>
> (define_insn "*avx_vzeroupper"
> [(match_parallel 0 "vzero_operation"
> [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
> (set (match_operand 1 "register_operand" "=x")
> (match_operand 2 "const0_operand" "X"))])]
> "TARGET_SSE2"
> "vzeroall"
> [(set_attr "type" "sse")
> (set_attr "memory" "none")
> (set_attr "mode" "TI")])
>
>
> IIRC, ix86_cannot_change_mode_class() should be fixed a bit to accept
> subregs of V8SImode for this to work.
I tried the enclosed patch. It doesn't work on
#include <gmmintrin.h>
extern __m128 bar2 (__m128, __m128);
__m128
foo3 (__m128 y)
{
__m128 x = { 0 };
_mm256_zeroupper ();
return bar2 (x, y);
}
/export/build/gnu/gcc-avx/build-x86_64-linux/gcc/include/gmmintrin.h:521:
internal compiler error: in gen_rtx_SUBREG, at emit-rtl.c:777
Please submit a full bug report,
with preprocessed source if appropriate.
See <http://gcc.gnu.org/bugs.html> for instructions.
subreg_get_info returns false on info->representable_p. This
subreg is limited to vzeroupper. It doesn't work on other AVX
instructions.
Thanks.
H.J.
----
Index: predicates.md
===================================================================
--- predicates.md (revision 134313)
+++ predicates.md (working copy)
@@ -1074,8 +1074,9 @@
(and (match_code "mem")
(match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
-;; Return 1 if OP is a vzeroall operation, known to be a PARALLEL.
-(define_predicate "vzeroall_operation"
+;; Return 1 if OP is a vzeroall/vzeroupper operation, known to be a
+;; PARALLEL.
+(define_predicate "vzero_operation"
(match_code "parallel")
{
int nregs = TARGET_64BIT ? 16 : 8;
Index: sse.md
===================================================================
--- sse.md (revision 134313)
+++ sse.md (working copy)
@@ -8590,7 +8590,7 @@
})
(define_insn "*avx_vzeroall"
- [(match_parallel 0 "vzeroall_operation"
+ [(match_parallel 0 "vzero_operation"
[(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
(set (match_operand 1 "register_operand" "=x")
(match_operand 2 "const0_operand" "X"))])]
@@ -8600,11 +8600,35 @@
(set_attr "memory" "none")
(set_attr "mode" "OI")])
-;; FIXME: It clobbers the upper 128bits of AVX registers.
-(define_insn "avx_vzeroupper"
- [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
+(define_expand "avx_vzeroupper"
+ [(match_par_dup 0 [(const_int 0)])]
"TARGET_AVX"
- "vzeroupper"
+{
+ int nregs = TARGET_64BIT ? 16 : 8;
+ int regno;
+
+ operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
+
+ XVECEXP (operands[0], 0, 0)
+ = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPECV_VZEROUPPER);
+ for (regno = 0; regno < nregs; regno++)
+ XVECEXP (operands[0], 0, regno+1)
+ = gen_rtx_SET (VOIDmode,
+ gen_rtx_SUBREG (V4SImode,
+ gen_rtx_REG (V8SImode,
+ SSE_REGNO (regno)),
+ GET_MODE_SIZE (V4SImode)),
+ CONST0_RTX (V4SImode));
+})
+
+(define_insn "*avx_vzeroupper"
+ [(match_parallel 0 "vzero_operation"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
+ (set (match_operand 1 "register_operand" "=x")
+ (match_operand 2 "const0_operand" "X"))])]
+ "TARGET_AVX"
+ "vzeroall"
[(set_attr "type" "sse")
(set_attr "memory" "none")
(set_attr "mode" "OI")])
Index: i386.c
===================================================================
--- i386.c (revision 134313)
+++ i386.c (working copy)
@@ -22521,6 +22521,11 @@ ix86_cannot_change_mode_class (enum mach
if (GET_MODE_SIZE (from) < 4)
return true;
+ /* AVX's vzeroupper supports subreg from V8SImode to V4SImode
+ with nonzero offset. */
+ if (from == V8SImode && to == V4SImode)
+ return false;
+
/* Vector registers do not support subreg with nonzero offsets, which
are otherwise valid for integer registers. Since we can't see
whether we have a nonzero offset from here, prohibit all